Fix overrun in decl_initializer_alloc.
[tinycc/kirr.git] / x86_64-gen.c
blobf3382aecc2820f5a263d3c32767b2fb172141ae9
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <assert.h>
25 /* number of available registers */
26 #define NB_REGS 5
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
33 #define RC_RAX 0x0004
34 #define RC_RCX 0x0008
35 #define RC_RDX 0x0010
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
43 enum {
44 TREG_RAX = 0,
45 TREG_RCX = 1,
46 TREG_RDX = 2,
47 TREG_RSI = 6,
48 TREG_RDI = 7,
49 TREG_R8 = 8,
50 TREG_R9 = 9,
51 TREG_R10 = 10,
52 TREG_R11 = 11,
54 TREG_XMM0 = 3,
55 TREG_ST0 = 4,
58 #define REX_BASE(reg) ((reg) >> 3)
59 #define REG_VALUE(reg) ((reg) & 7)
61 int reg_classes[NB_REGS] = {
62 /* eax */ RC_INT | RC_RAX,
63 /* ecx */ RC_INT | RC_RCX,
64 /* edx */ RC_INT | RC_RDX,
65 /* xmm0 */ RC_FLOAT | RC_XMM0,
66 /* st0 */ RC_ST0,
69 /* return registers for function */
70 #define REG_IRET TREG_RAX /* single word int return register */
71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
72 #define REG_FRET TREG_XMM0 /* float return register */
74 /* defined if function parameters must be evaluated in reverse order */
75 #define INVERT_FUNC_PARAMS
77 /* pointer size, in bytes */
78 #define PTR_SIZE 8
80 /* long double size and alignment, in bytes */
81 #define LDOUBLE_SIZE 16
82 #define LDOUBLE_ALIGN 8
83 /* maximum alignment (for aligned attribute support) */
84 #define MAX_ALIGN 8
86 /******************************************************/
87 /* ELF defines */
89 #define EM_TCC_TARGET EM_X86_64
91 /* relocation type for 32 bit data relocation */
92 #define R_DATA_32 R_X86_64_32
93 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
94 #define R_COPY R_X86_64_COPY
96 #define ELF_START_ADDR 0x08048000
97 #define ELF_PAGE_SIZE 0x1000
99 /******************************************************/
101 static unsigned long func_sub_sp_offset;
102 static int func_ret_sub;
104 /* XXX: make it faster ? */
105 void g(int c)
107 int ind1;
108 ind1 = ind + 1;
109 if (ind1 > cur_text_section->data_allocated)
110 section_realloc(cur_text_section, ind1);
111 cur_text_section->data[ind] = c;
112 ind = ind1;
115 void o(unsigned int c)
117 while (c) {
118 g(c);
119 c = c >> 8;
123 void gen_le32(int c)
125 g(c);
126 g(c >> 8);
127 g(c >> 16);
128 g(c >> 24);
131 void gen_le64(int64_t c)
133 g(c);
134 g(c >> 8);
135 g(c >> 16);
136 g(c >> 24);
137 g(c >> 32);
138 g(c >> 40);
139 g(c >> 48);
140 g(c >> 56);
143 /* output a symbol and patch all calls to it */
144 void gsym_addr(int t, int a)
146 int n, *ptr;
147 while (t) {
148 ptr = (int *)(cur_text_section->data + t);
149 n = *ptr; /* next value */
150 *ptr = a - t - 4;
151 t = n;
155 void gsym(int t)
157 gsym_addr(t, ind);
160 /* psym is used to put an instruction with a data field which is a
161 reference to a symbol. It is in fact the same as oad ! */
162 #define psym oad
164 static int is64_type(int t)
166 return ((t & VT_BTYPE) == VT_PTR ||
167 (t & VT_BTYPE) == VT_FUNC ||
168 (t & VT_BTYPE) == VT_LLONG);
171 static int is_sse_float(int t) {
172 int bt;
173 bt = t & VT_BTYPE;
174 return bt == VT_DOUBLE || bt == VT_FLOAT;
177 /* instruction + 4 bytes data. Return the address of the data */
178 static int oad(int c, int s)
180 int ind1;
182 o(c);
183 ind1 = ind + 4;
184 if (ind1 > cur_text_section->data_allocated)
185 section_realloc(cur_text_section, ind1);
186 *(int *)(cur_text_section->data + ind) = s;
187 s = ind;
188 ind = ind1;
189 return s;
192 /* output constant with relocation if 'r & VT_SYM' is true */
193 static void gen_addr64(int r, Sym *sym, int64_t c)
195 if (r & VT_SYM)
196 greloc(cur_text_section, sym, ind, R_X86_64_64);
197 gen_le64(c);
200 /* output constant with relocation if 'r & VT_SYM' is true */
201 static void gen_addrpc32(int r, Sym *sym, int c)
203 if (r & VT_SYM)
204 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
205 gen_le32(c-4);
208 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
209 opcode bits */
210 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
212 op_reg = op_reg << 3;
213 if ((r & VT_VALMASK) == VT_CONST) {
214 /* constant memory reference */
215 o(0x05 | op_reg);
216 gen_addrpc32(r, sym, c);
217 } else if ((r & VT_VALMASK) == VT_LOCAL) {
218 /* currently, we use only ebp as base */
219 if (c == (char)c) {
220 /* short reference */
221 o(0x45 | op_reg);
222 g(c);
223 } else {
224 oad(0x85 | op_reg, c);
226 } else {
227 g(0x00 | op_reg | (r & VT_VALMASK));
231 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
232 opcode bits */
233 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
235 int rex = 0x48 | (REX_BASE(op_reg) << 2);
236 if ((r & VT_VALMASK) != VT_CONST &&
237 (r & VT_VALMASK) != VT_LOCAL) {
238 rex |= REX_BASE(VT_VALMASK & r);
240 o(rex);
241 o(opcode);
242 op_reg = REG_VALUE(op_reg) << 3;
243 if ((r & VT_VALMASK) == VT_CONST) {
244 /* constant memory reference */
245 o(0x05 | op_reg);
246 gen_addrpc32(r, sym, c);
247 } else if ((r & VT_VALMASK) == VT_LOCAL) {
248 /* currently, we use only ebp as base */
249 if (c == (char)c) {
250 /* short reference */
251 o(0x45 | op_reg);
252 g(c);
253 } else {
254 oad(0x85 | op_reg, c);
256 } else {
257 g(0x00 | op_reg | (r & VT_VALMASK));
262 /* load 'r' from value 'sv' */
263 void load(int r, SValue *sv)
265 int v, t, ft, fc, fr;
266 SValue v1;
268 fr = sv->r;
269 ft = sv->type.t;
270 fc = sv->c.ul;
272 v = fr & VT_VALMASK;
273 if (fr & VT_LVAL) {
274 if (v == VT_LLOCAL) {
275 v1.type.t = VT_PTR;
276 v1.r = VT_LOCAL | VT_LVAL;
277 v1.c.ul = fc;
278 load(r, &v1);
279 fr = r;
281 if ((ft & VT_BTYPE) == VT_FLOAT) {
282 o(0x6e0f66); /* movd */
283 r = 0;
284 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
285 o(0x7e0ff3); /* movq */
286 r = 0;
287 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
288 o(0xdb); /* fldt */
289 r = 5;
290 } else if ((ft & VT_TYPE) == VT_BYTE) {
291 o(0xbe0f); /* movsbl */
292 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
293 o(0xb60f); /* movzbl */
294 } else if ((ft & VT_TYPE) == VT_SHORT) {
295 o(0xbf0f); /* movswl */
296 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
297 o(0xb70f); /* movzwl */
298 } else if (is64_type(ft)) {
299 gen_modrm64(0x8b, r, fr, sv->sym, fc);
300 return;
301 } else {
302 o(0x8b); /* movl */
304 gen_modrm(r, fr, sv->sym, fc);
305 } else {
306 if (v == VT_CONST) {
307 if ((ft & VT_BTYPE) == VT_LLONG) {
308 o(0x48);
309 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
310 gen_addr64(fr, sv->sym, sv->c.ull);
311 } else {
312 if (fr & VT_SYM) {
313 o(0x8d48);
314 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
315 gen_addrpc32(fr, sv->sym, fc);
316 } else {
317 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
318 gen_le32(fc);
321 } else if (v == VT_LOCAL) {
322 o(0x48 | REX_BASE(r));
323 o(0x8d); /* lea xxx(%ebp), r */
324 gen_modrm(r, VT_LOCAL, sv->sym, fc);
325 } else if (v == VT_CMP) {
326 oad(0xb8 + r, 0); /* mov $0, r */
327 o(0x0f); /* setxx %br */
328 o(fc);
329 o(0xc0 + r);
330 } else if (v == VT_JMP || v == VT_JMPI) {
331 t = v & 1;
332 oad(0xb8 + r, t); /* mov $1, r */
333 o(0x05eb); /* jmp after */
334 gsym(fc);
335 oad(0xb8 + r, t ^ 1); /* mov $0, r */
336 } else if (v != r) {
337 if (r == TREG_XMM0) {
338 assert(v == TREG_ST0);
339 /* gen_cvt_ftof(VT_DOUBLE); */
340 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
341 /* movsd -0x10(%rsp),%xmm0 */
342 o(0x44100ff2);
343 o(0xf024);
344 } else if (r == TREG_ST0) {
345 assert(v == TREG_XMM0);
346 /* gen_cvt_ftof(VT_LDOUBLE); */
347 /* movsd %xmm0,-0x10(%rsp) */
348 o(0x44110ff2);
349 o(0xf024);
350 o(0xf02444dd); /* fldl -0x10(%rsp) */
351 } else {
352 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
353 o(0x89);
354 o(0xc0 + r + v * 8); /* mov v, r */
360 /* store register 'r' in lvalue 'v' */
361 void store(int r, SValue *v)
363 int fr, bt, ft, fc;
364 int op64 = 0;
366 ft = v->type.t;
367 fc = v->c.ul;
368 fr = v->r & VT_VALMASK;
369 bt = ft & VT_BTYPE;
370 /* XXX: incorrect if float reg to reg */
371 if (bt == VT_FLOAT) {
372 o(0x7e0f66); /* movd */
373 r = 0;
374 } else if (bt == VT_DOUBLE) {
375 o(0xd60f66); /* movq */
376 r = 0;
377 } else if (bt == VT_LDOUBLE) {
378 o(0xc0d9); /* fld %st(0) */
379 o(0xdb); /* fstpt */
380 r = 7;
381 } else {
382 if (bt == VT_SHORT)
383 o(0x66);
384 if (bt == VT_BYTE || bt == VT_BOOL)
385 o(0x88);
386 else if (is64_type(bt))
387 op64 = 0x89;
388 else
389 o(0x89);
391 if (op64) {
392 if (fr == VT_CONST ||
393 fr == VT_LOCAL ||
394 (v->r & VT_LVAL)) {
395 gen_modrm64(op64, r, v->r, v->sym, fc);
396 } else if (fr != r) {
397 /* XXX: don't we really come here? */
398 abort();
399 o(0xc0 + fr + r * 8); /* mov r, fr */
401 } else {
402 if (fr == VT_CONST ||
403 fr == VT_LOCAL ||
404 (v->r & VT_LVAL)) {
405 gen_modrm(r, v->r, v->sym, fc);
406 } else if (fr != r) {
407 /* XXX: don't we really come here? */
408 abort();
409 o(0xc0 + fr + r * 8); /* mov r, fr */
414 static void gadd_sp(int val)
416 if (val == (char)val) {
417 o(0xc48348);
418 g(val);
419 } else {
420 oad(0xc48148, val); /* add $xxx, %rsp */
424 /* 'is_jmp' is '1' if it is a jump */
425 static void gcall_or_jmp(int is_jmp)
427 int r;
428 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
429 /* constant case */
430 if (vtop->r & VT_SYM) {
431 /* relocation case */
432 greloc(cur_text_section, vtop->sym,
433 ind + 1, R_X86_64_PC32);
434 } else {
435 /* put an empty PC32 relocation */
436 put_elf_reloc(symtab_section, cur_text_section,
437 ind + 1, R_X86_64_PC32, 0);
439 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
440 } else {
441 /* otherwise, indirect call */
442 r = TREG_R11;
443 load(r, vtop);
444 o(0x41); /* REX */
445 o(0xff); /* call/jmp *r */
446 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
450 static uint8_t arg_regs[6] = {
451 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
453 /* Generate function call. The function address is pushed first, then
454 all the parameters in call order. This functions pops all the
455 parameters and the function address. */
456 void gfunc_call(int nb_args)
458 int size, align, r, args_size, i, func_call;
459 Sym *func_sym;
460 SValue *orig_vtop;
461 int nb_reg_args = 0;
462 int nb_sse_args = 0;
463 int sse_reg, gen_reg;
465 /* calculate the number of integer/float arguments */
466 args_size = 0;
467 for(i = 0; i < nb_args; i++) {
468 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
469 args_size += type_size(&vtop->type, &align);
470 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
471 args_size += 16;
472 } else if (is_sse_float(vtop[-i].type.t)) {
473 nb_sse_args++;
474 if (nb_sse_args > 8) args_size += 8;
475 } else {
476 nb_reg_args++;
477 if (nb_reg_args > 6) args_size += 8;
481 /* for struct arguments, we need to call memcpy and the function
482 call breaks register passing arguments we are preparing.
483 So, we process arguments which will be passed by stack first. */
484 orig_vtop = vtop;
485 gen_reg = nb_reg_args;
486 sse_reg = nb_sse_args;
487 /* adjust stack to align SSE boundary */
488 if (args_size &= 8) {
489 o(0x50); /* push $rax */
491 for(i = 0; i < nb_args; i++) {
492 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
493 size = type_size(&vtop->type, &align);
494 /* align to stack align size */
495 size = (size + 3) & ~3;
496 /* allocate the necessary size on stack */
497 o(0x48);
498 oad(0xec81, size); /* sub $xxx, %rsp */
499 /* generate structure store */
500 r = get_reg(RC_INT);
501 o(0x48 + REX_BASE(r));
502 o(0x89); /* mov %rsp, r */
503 o(0xe0 + r);
505 /* following code breaks vtop[1] */
506 SValue tmp = vtop[1];
507 vset(&vtop->type, r | VT_LVAL, 0);
508 vswap();
509 vstore();
510 vtop[1] = tmp;
512 args_size += size;
513 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
514 gv(RC_ST0);
515 size = LDOUBLE_SIZE;
516 oad(0xec8148, size); /* sub $xxx, %rsp */
517 o(0x7cdb); /* fstpt 0(%rsp) */
518 g(0x24);
519 g(0x00);
520 args_size += size;
521 } else if (is_sse_float(vtop->type.t)) {
522 int j = --sse_reg;
523 if (j >= 8) {
524 gv(RC_FLOAT);
525 o(0x50); /* push $rax */
526 /* movq %xmm0, (%rsp) */
527 o(0x04d60f66);
528 o(0x24);
529 args_size += 8;
531 } else {
532 int j = --gen_reg;
533 /* simple type */
534 /* XXX: implicit cast ? */
535 if (j >= 6) {
536 r = gv(RC_INT);
537 o(0x50 + r); /* push r */
538 args_size += 8;
541 vtop--;
543 vtop = orig_vtop;
545 /* then, we prepare register passing arguments.
546 Note that we cannot set RDX and RCX in this loop because gv()
547 may break these temporary registers. Let's use R10 and R11
548 instead of them */
549 gen_reg = nb_reg_args;
550 sse_reg = nb_sse_args;
551 for(i = 0; i < nb_args; i++) {
552 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
553 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
554 } else if (is_sse_float(vtop->type.t)) {
555 int j = --sse_reg;
556 if (j < 8) {
557 gv(RC_FLOAT); /* only one float register */
558 /* movaps %xmm0, %xmmN */
559 o(0x280f);
560 o(0xc0 + (sse_reg << 3));
562 } else {
563 int j = --gen_reg;
564 /* simple type */
565 /* XXX: implicit cast ? */
566 if (j < 6) {
567 r = gv(RC_INT);
568 if (j < 2) {
569 o(0x8948); /* mov */
570 o(0xc0 + r * 8 + arg_regs[j]);
571 } else if (j < 4) {
572 o(0x8949); /* mov */
573 /* j=2: r10, j=3: r11 */
574 o(0xc0 + r * 8 + j);
575 } else {
576 o(0x8949); /* mov */
577 /* j=4: r8, j=5: r9 */
578 o(0xc0 + r * 8 + j - 4);
582 vtop--;
585 /* Copy R10 and R11 into RDX and RCX, respectively */
586 if (nb_reg_args > 2) {
587 o(0xd2894c); /* mov %r10, %rdx */
588 if (nb_reg_args > 3) {
589 o(0xd9894c); /* mov %r11, %rcx */
593 save_regs(0); /* save used temporary registers */
595 func_sym = vtop->type.ref;
596 func_call = FUNC_CALL(func_sym->r);
597 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
598 gcall_or_jmp(0);
599 if (args_size)
600 gadd_sp(args_size);
601 vtop--;
604 #ifdef TCC_TARGET_PE
605 /* XXX: support PE? */
606 #warning "PE isn't tested at all"
607 #define FUNC_PROLOG_SIZE 12
608 #else
609 #define FUNC_PROLOG_SIZE 11
610 #endif
612 static void push_arg_reg(int i) {
613 loc -= 8;
614 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
617 /* generate function prolog of type 't' */
618 void gfunc_prolog(CType *func_type)
620 int i, addr, align, size, func_call;
621 int param_index, param_addr, reg_param_index, sse_param_index;
622 Sym *sym;
623 CType *type;
625 func_ret_sub = 0;
627 sym = func_type->ref;
628 func_call = FUNC_CALL(sym->r);
629 addr = PTR_SIZE * 2;
630 loc = 0;
631 ind += FUNC_PROLOG_SIZE;
632 func_sub_sp_offset = ind;
634 if (func_type->ref->c == FUNC_ELLIPSIS) {
635 int seen_reg_num, seen_sse_num, seen_stack_size;
636 seen_reg_num = seen_sse_num = 0;
637 /* frame pointer and return address */
638 seen_stack_size = PTR_SIZE * 2;
639 /* count the number of seen parameters */
640 sym = func_type->ref;
641 while ((sym = sym->next) != NULL) {
642 type = &sym->type;
643 if (is_sse_float(type->t)) {
644 if (seen_sse_num < 8) {
645 seen_sse_num++;
646 } else {
647 seen_stack_size += 8;
649 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
650 size = type_size(type, &align);
651 size = (size + 3) & ~3;
652 seen_stack_size += size;
653 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
654 seen_stack_size += LDOUBLE_SIZE;
655 } else {
656 if (seen_reg_num < 6) {
657 seen_reg_num++;
658 } else {
659 seen_stack_size += 8;
664 loc -= 16;
665 /* movl $0x????????, -0x10(%rbp) */
666 o(0xf045c7);
667 gen_le32(seen_reg_num * 8);
668 /* movl $0x????????, -0xc(%rbp) */
669 o(0xf445c7);
670 gen_le32(seen_sse_num * 16 + 48);
671 /* movl $0x????????, -0x8(%rbp) */
672 o(0xf845c7);
673 gen_le32(seen_stack_size);
675 /* save all register passing arguments */
676 for (i = 0; i < 8; i++) {
677 loc -= 16;
678 o(0xd60f66); /* movq */
679 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
680 /* movq $0, loc+8(%rbp) */
681 o(0x85c748);
682 gen_le32(loc + 8);
683 gen_le32(0);
685 for (i = 0; i < 6; i++) {
686 push_arg_reg(5 - i);
690 sym = func_type->ref;
691 param_index = 0;
692 reg_param_index = 0;
693 sse_param_index = 0;
695 /* if the function returns a structure, then add an
696 implicit pointer parameter */
697 func_vt = sym->type;
698 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
699 push_arg_reg(reg_param_index);
700 param_addr = loc;
702 func_vc = loc;
703 param_index++;
704 reg_param_index++;
706 /* define parameters */
707 while ((sym = sym->next) != NULL) {
708 type = &sym->type;
709 size = type_size(type, &align);
710 size = (size + 3) & ~3;
711 if (is_sse_float(type->t)) {
712 if (sse_param_index < 8) {
713 /* save arguments passed by register */
714 loc -= 8;
715 o(0xd60f66); /* movq */
716 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
717 param_addr = loc;
718 } else {
719 param_addr = addr;
720 addr += size;
722 sse_param_index++;
723 } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
724 (type->t & VT_BTYPE) == VT_LDOUBLE) {
725 param_addr = addr;
726 addr += size;
727 } else {
728 if (reg_param_index < 6) {
729 /* save arguments passed by register */
730 push_arg_reg(reg_param_index);
731 param_addr = loc;
732 } else {
733 param_addr = addr;
734 addr += 8;
736 reg_param_index++;
738 sym_push(sym->v & ~SYM_FIELD, type,
739 VT_LOCAL | VT_LVAL, param_addr);
740 param_index++;
744 /* generate function epilog */
745 void gfunc_epilog(void)
747 int v, saved_ind;
749 o(0xc9); /* leave */
750 if (func_ret_sub == 0) {
751 o(0xc3); /* ret */
752 } else {
753 o(0xc2); /* ret n */
754 g(func_ret_sub);
755 g(func_ret_sub >> 8);
757 /* align local size to word & save local variables */
758 v = (-loc + 15) & -16;
759 saved_ind = ind;
760 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
761 #ifdef TCC_TARGET_PE
762 if (v >= 4096) {
763 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
764 oad(0xb8, v); /* mov stacksize, %eax */
765 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
766 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
767 } else
768 #endif
770 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
771 o(0xec8148); /* sub rsp, stacksize */
772 gen_le32(v);
773 #if FUNC_PROLOG_SIZE == 12
774 o(0x90); /* adjust to FUNC_PROLOG_SIZE */
775 #endif
777 ind = saved_ind;
780 /* generate a jump to a label */
781 int gjmp(int t)
783 return psym(0xe9, t);
786 /* generate a jump to a fixed address */
787 void gjmp_addr(int a)
789 int r;
790 r = a - ind - 2;
791 if (r == (char)r) {
792 g(0xeb);
793 g(r);
794 } else {
795 oad(0xe9, a - ind - 5);
799 /* generate a test. set 'inv' to invert test. Stack entry is popped */
800 int gtst(int inv, int t)
802 int v, *p;
804 v = vtop->r & VT_VALMASK;
805 if (v == VT_CMP) {
806 /* fast case : can jump directly since flags are set */
807 g(0x0f);
808 t = psym((vtop->c.i - 16) ^ inv, t);
809 } else if (v == VT_JMP || v == VT_JMPI) {
810 /* && or || optimization */
811 if ((v & 1) == inv) {
812 /* insert vtop->c jump list in t */
813 p = &vtop->c.i;
814 while (*p != 0)
815 p = (int *)(cur_text_section->data + *p);
816 *p = t;
817 t = vtop->c.i;
818 } else {
819 t = gjmp(t);
820 gsym(vtop->c.i);
822 } else {
823 /* XXX: not tested */
824 if (is_float(vtop->type.t) ||
825 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
826 vpushi(0);
827 gen_op(TOK_NE);
829 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
830 /* constant jmp optimization */
831 if ((vtop->c.i != 0) != inv)
832 t = gjmp(t);
833 } else {
834 v = gv(RC_INT);
835 o(0x85);
836 o(0xc0 + v * 9);
837 g(0x0f);
838 t = psym(0x85 ^ inv, t);
841 vtop--;
842 return t;
845 /* generate an integer binary operation */
846 void gen_opi(int op)
848 int r, fr, opc, c;
850 switch(op) {
851 case '+':
852 case TOK_ADDC1: /* add with carry generation */
853 opc = 0;
854 gen_op8:
855 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
856 !is64_type(vtop->type.t)) {
857 /* constant case */
858 vswap();
859 r = gv(RC_INT);
860 if (is64_type(vtop->type.t)) {
861 o(0x48 | REX_BASE(r));
863 vswap();
864 c = vtop->c.i;
865 if (c == (char)c) {
866 /* XXX: generate inc and dec for smaller code ? */
867 o(0x83);
868 o(0xc0 | (opc << 3) | REG_VALUE(r));
869 g(c);
870 } else {
871 o(0x81);
872 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
874 } else {
875 gv2(RC_INT, RC_INT);
876 r = vtop[-1].r;
877 fr = vtop[0].r;
878 if (opc != 7 ||
879 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
880 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
881 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
883 o((opc << 3) | 0x01);
884 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
886 vtop--;
887 if (op >= TOK_ULT && op <= TOK_GT) {
888 vtop->r = VT_CMP;
889 vtop->c.i = op;
891 break;
892 case '-':
893 case TOK_SUBC1: /* sub with carry generation */
894 opc = 5;
895 goto gen_op8;
896 case TOK_ADDC2: /* add with carry use */
897 opc = 2;
898 goto gen_op8;
899 case TOK_SUBC2: /* sub with carry use */
900 opc = 3;
901 goto gen_op8;
902 case '&':
903 opc = 4;
904 goto gen_op8;
905 case '^':
906 opc = 6;
907 goto gen_op8;
908 case '|':
909 opc = 1;
910 goto gen_op8;
911 case '*':
912 gv2(RC_INT, RC_INT);
913 r = vtop[-1].r;
914 fr = vtop[0].r;
915 if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
916 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
917 o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
919 vtop--;
920 o(0xaf0f); /* imul fr, r */
921 o(0xc0 + fr + r * 8);
922 break;
923 case TOK_SHL:
924 opc = 4;
925 goto gen_shift;
926 case TOK_SHR:
927 opc = 5;
928 goto gen_shift;
929 case TOK_SAR:
930 opc = 7;
931 gen_shift:
932 opc = 0xc0 | (opc << 3);
933 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
934 /* constant case */
935 vswap();
936 r = gv(RC_INT);
937 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
938 o(0x48 | REX_BASE(r));
939 c = 0x3f;
940 } else {
941 c = 0x1f;
943 vswap();
944 c &= vtop->c.i;
945 o(0xc1); /* shl/shr/sar $xxx, r */
946 o(opc | r);
947 g(c);
948 } else {
949 /* we generate the shift in ecx */
950 gv2(RC_INT, RC_RCX);
951 r = vtop[-1].r;
952 if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
953 o(0x48 | REX_BASE(r));
955 o(0xd3); /* shl/shr/sar %cl, r */
956 o(opc | r);
958 vtop--;
959 break;
960 case '/':
961 case TOK_UDIV:
962 case TOK_PDIV:
963 case '%':
964 case TOK_UMOD:
965 case TOK_UMULL:
966 /* first operand must be in eax */
967 /* XXX: need better constraint for second operand */
968 gv2(RC_RAX, RC_RCX);
969 r = vtop[-1].r;
970 fr = vtop[0].r;
971 vtop--;
972 save_reg(TREG_RDX);
973 if (op == TOK_UMULL) {
974 o(0xf7); /* mul fr */
975 o(0xe0 + fr);
976 vtop->r2 = TREG_RDX;
977 r = TREG_RAX;
978 } else {
979 if (op == TOK_UDIV || op == TOK_UMOD) {
980 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
981 o(0xf0 + fr);
982 } else {
983 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
984 o(0x9948); /* cqto */
985 o(0x48 + REX_BASE(fr));
986 } else {
987 o(0x99); /* cltd */
989 o(0xf7); /* idiv fr, %eax */
990 o(0xf8 + fr);
992 if (op == '%' || op == TOK_UMOD)
993 r = TREG_RDX;
994 else
995 r = TREG_RAX;
997 vtop->r = r;
998 break;
999 default:
1000 opc = 7;
1001 goto gen_op8;
1005 void gen_opl(int op)
1007 gen_opi(op);
1010 /* generate a floating point operation 'v = t1 op t2' instruction. The
1011 two operands are guaranted to have the same floating point type */
1012 /* XXX: need to use ST1 too */
1013 void gen_opf(int op)
1015 int a, ft, fc, swapped, r;
1016 int float_type =
1017 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1019 /* convert constants to memory references */
1020 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1021 vswap();
1022 gv(float_type);
1023 vswap();
1025 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1026 gv(float_type);
1028 /* must put at least one value in the floating point register */
1029 if ((vtop[-1].r & VT_LVAL) &&
1030 (vtop[0].r & VT_LVAL)) {
1031 vswap();
1032 gv(float_type);
1033 vswap();
1035 swapped = 0;
1036 /* swap the stack if needed so that t1 is the register and t2 is
1037 the memory reference */
1038 if (vtop[-1].r & VT_LVAL) {
1039 vswap();
1040 swapped = 1;
1042 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1043 if (op >= TOK_ULT && op <= TOK_GT) {
1044 /* load on stack second operand */
1045 load(TREG_ST0, vtop);
1046 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1047 if (op == TOK_GE || op == TOK_GT)
1048 swapped = !swapped;
1049 else if (op == TOK_EQ || op == TOK_NE)
1050 swapped = 0;
1051 if (swapped)
1052 o(0xc9d9); /* fxch %st(1) */
1053 o(0xe9da); /* fucompp */
1054 o(0xe0df); /* fnstsw %ax */
1055 if (op == TOK_EQ) {
1056 o(0x45e480); /* and $0x45, %ah */
1057 o(0x40fC80); /* cmp $0x40, %ah */
1058 } else if (op == TOK_NE) {
1059 o(0x45e480); /* and $0x45, %ah */
1060 o(0x40f480); /* xor $0x40, %ah */
1061 op = TOK_NE;
1062 } else if (op == TOK_GE || op == TOK_LE) {
1063 o(0x05c4f6); /* test $0x05, %ah */
1064 op = TOK_EQ;
1065 } else {
1066 o(0x45c4f6); /* test $0x45, %ah */
1067 op = TOK_EQ;
1069 vtop--;
1070 vtop->r = VT_CMP;
1071 vtop->c.i = op;
1072 } else {
1073 /* no memory reference possible for long double operations */
1074 load(TREG_ST0, vtop);
1075 swapped = !swapped;
1077 switch(op) {
1078 default:
1079 case '+':
1080 a = 0;
1081 break;
1082 case '-':
1083 a = 4;
1084 if (swapped)
1085 a++;
1086 break;
1087 case '*':
1088 a = 1;
1089 break;
1090 case '/':
1091 a = 6;
1092 if (swapped)
1093 a++;
1094 break;
1096 ft = vtop->type.t;
1097 fc = vtop->c.ul;
1098 o(0xde); /* fxxxp %st, %st(1) */
1099 o(0xc1 + (a << 3));
1100 vtop--;
1102 } else {
1103 if (op >= TOK_ULT && op <= TOK_GT) {
1104 /* if saved lvalue, then we must reload it */
1105 r = vtop->r;
1106 fc = vtop->c.ul;
1107 if ((r & VT_VALMASK) == VT_LLOCAL) {
1108 SValue v1;
1109 r = get_reg(RC_INT);
1110 v1.type.t = VT_INT;
1111 v1.r = VT_LOCAL | VT_LVAL;
1112 v1.c.ul = fc;
1113 load(r, &v1);
1114 fc = 0;
1117 if (op == TOK_EQ || op == TOK_NE) {
1118 swapped = 0;
1119 } else {
1120 if (op == TOK_LE || op == TOK_LT)
1121 swapped = !swapped;
1122 if (op == TOK_LE || op == TOK_GE) {
1123 op = 0x93; /* setae */
1124 } else {
1125 op = 0x97; /* seta */
1129 if (swapped) {
1130 o(0x7e0ff3); /* movq */
1131 gen_modrm(1, r, vtop->sym, fc);
1133 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1134 o(0x66);
1136 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1137 o(0xc8);
1138 } else {
1139 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1140 o(0x66);
1142 o(0x2e0f); /* ucomisd */
1143 gen_modrm(0, r, vtop->sym, fc);
1146 vtop--;
1147 vtop->r = VT_CMP;
1148 vtop->c.i = op;
1149 } else {
1150 /* no memory reference possible for long double operations */
1151 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1152 load(TREG_XMM0, vtop);
1153 swapped = !swapped;
1155 switch(op) {
1156 default:
1157 case '+':
1158 a = 0;
1159 break;
1160 case '-':
1161 a = 4;
1162 break;
1163 case '*':
1164 a = 1;
1165 break;
1166 case '/':
1167 a = 6;
1168 break;
1170 ft = vtop->type.t;
1171 fc = vtop->c.ul;
1172 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1173 o(0xde); /* fxxxp %st, %st(1) */
1174 o(0xc1 + (a << 3));
1175 } else {
1176 /* if saved lvalue, then we must reload it */
1177 r = vtop->r;
1178 if ((r & VT_VALMASK) == VT_LLOCAL) {
1179 SValue v1;
1180 r = get_reg(RC_INT);
1181 v1.type.t = VT_INT;
1182 v1.r = VT_LOCAL | VT_LVAL;
1183 v1.c.ul = fc;
1184 load(r, &v1);
1185 fc = 0;
1187 if (swapped) {
1188 /* movq %xmm0,%xmm1 */
1189 o(0x7e0ff3);
1190 o(0xc8);
1191 load(TREG_XMM0, vtop);
1192 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1193 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1194 o(0xf2);
1195 } else {
1196 o(0xf3);
1198 o(0x0f);
1199 o(0x58 + a);
1200 o(0xc1);
1201 } else {
1202 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1203 o(0xf2);
1204 } else {
1205 o(0xf3);
1207 o(0x0f);
1208 o(0x58 + a);
1209 gen_modrm(0, r, vtop->sym, fc);
1212 vtop--;
1217 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1218 and 'long long' cases. */
1219 void gen_cvt_itof(int t)
1221 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1222 save_reg(TREG_ST0);
1223 gv(RC_INT);
1224 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1225 /* signed long long to float/double/long double (unsigned case
1226 is handled generically) */
1227 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1228 o(0x242cdf); /* fildll (%rsp) */
1229 o(0x08c48348); /* add $8, %rsp */
1230 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1231 (VT_INT | VT_UNSIGNED)) {
1232 /* unsigned int to float/double/long double */
1233 o(0x6a); /* push $0 */
1234 g(0x00);
1235 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1236 o(0x242cdf); /* fildll (%rsp) */
1237 o(0x10c48348); /* add $16, %rsp */
1238 } else {
1239 /* int to float/double/long double */
1240 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1241 o(0x2404db); /* fildl (%rsp) */
1242 o(0x08c48348); /* add $8, %rsp */
1244 vtop->r = TREG_ST0;
1245 } else {
1246 save_reg(TREG_XMM0);
1247 gv(RC_INT);
1248 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1249 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1250 (VT_INT | VT_UNSIGNED) ||
1251 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1252 o(0x48); /* REX */
1254 o(0x2a0f);
1255 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1256 vtop->r = TREG_XMM0;
1260 /* convert from one floating point type to another */
1261 void gen_cvt_ftof(int t)
1263 int ft, bt, tbt;
1265 ft = vtop->type.t;
1266 bt = ft & VT_BTYPE;
1267 tbt = t & VT_BTYPE;
1269 if (bt == VT_FLOAT) {
1270 gv(RC_FLOAT);
1271 if (tbt == VT_DOUBLE) {
1272 o(0xc0140f); /* unpcklps */
1273 o(0xc05a0f); /* cvtps2pd */
1274 } else if (tbt == VT_LDOUBLE) {
1275 /* movss %xmm0,-0x10(%rsp) */
1276 o(0x44110ff3);
1277 o(0xf024);
1278 o(0xf02444d9); /* flds -0x10(%rsp) */
1279 vtop->r = TREG_ST0;
1281 } else if (bt == VT_DOUBLE) {
1282 gv(RC_FLOAT);
1283 if (tbt == VT_FLOAT) {
1284 o(0xc0140f66); /* unpcklpd */
1285 o(0xc05a0f66); /* cvtpd2ps */
1286 } else if (tbt == VT_LDOUBLE) {
1287 /* movsd %xmm0,-0x10(%rsp) */
1288 o(0x44110ff2);
1289 o(0xf024);
1290 o(0xf02444dd); /* fldl -0x10(%rsp) */
1291 vtop->r = TREG_ST0;
1293 } else {
1294 gv(RC_ST0);
1295 if (tbt == VT_DOUBLE) {
1296 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1297 /* movsd -0x10(%rsp),%xmm0 */
1298 o(0x44100ff2);
1299 o(0xf024);
1300 vtop->r = TREG_XMM0;
1301 } else if (tbt == VT_FLOAT) {
1302 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1303 /* movss -0x10(%rsp),%xmm0 */
1304 o(0x44100ff3);
1305 o(0xf024);
1306 vtop->r = TREG_XMM0;
1311 /* convert fp to int 't' type */
1312 void gen_cvt_ftoi(int t)
1314 int ft, bt, size, r;
1315 ft = vtop->type.t;
1316 bt = ft & VT_BTYPE;
1317 if (bt == VT_LDOUBLE) {
1318 gen_cvt_ftof(VT_DOUBLE);
1319 bt = VT_DOUBLE;
1322 gv(RC_FLOAT);
1323 if (t != VT_INT)
1324 size = 8;
1325 else
1326 size = 4;
1328 r = get_reg(RC_INT);
1329 if (bt == VT_FLOAT) {
1330 o(0xf3);
1331 } else if (bt == VT_DOUBLE) {
1332 o(0xf2);
1333 } else {
1334 assert(0);
1336 if (size == 8) {
1337 o(0x48 + REX_BASE(r));
1339 o(0x2c0f); /* cvttss2si or cvttsd2si */
1340 o(0xc0 + (REG_VALUE(r) << 3));
1341 vtop->r = r;
1344 /* computed goto support */
1345 void ggoto(void)
1347 gcall_or_jmp(1);
1348 vtop--;
1351 /* end of x86-64 code generator */
1352 /*************************************************************/