Add support of x86-64.
[tinycc.git] / x86_64-gen.c
blob2435d4d8dd43f1fa0f242a14c53e7d73b606d89a
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <assert.h>
25 /* number of available registers */
26 #define NB_REGS 5
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
33 #define RC_RAX 0x0004
34 #define RC_RCX 0x0008
35 #define RC_RDX 0x0010
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
43 enum {
44 TREG_RAX = 0,
45 TREG_RCX = 1,
46 TREG_RDX = 2,
47 TREG_RSI = 6,
48 TREG_RDI = 7,
49 TREG_R8 = 8,
50 TREG_R9 = 9,
51 TREG_R10 = 10,
52 TREG_R11 = 11,
54 TREG_XMM0 = 3,
55 TREG_ST0 = 4,
58 #define REX_BASE(reg) ((reg) >> 3)
59 #define REG_VALUE(reg) ((reg) & 7)
61 int reg_classes[NB_REGS] = {
62 /* eax */ RC_INT | RC_RAX,
63 /* ecx */ RC_INT | RC_RCX,
64 /* edx */ RC_INT | RC_RDX,
65 /* xmm0 */ RC_FLOAT | RC_XMM0,
66 /* st0 */ RC_ST0,
69 /* return registers for function */
70 #define REG_IRET TREG_RAX /* single word int return register */
71 #define REG_LRET TREG_RDX /* second word return register (for long long) */
72 #define REG_FRET TREG_XMM0 /* float return register */
74 /* defined if function parameters must be evaluated in reverse order */
75 #define INVERT_FUNC_PARAMS
77 /* pointer size, in bytes */
78 #define PTR_SIZE 8
80 /* long double size and alignment, in bytes */
81 #define LDOUBLE_SIZE 16
82 #define LDOUBLE_ALIGN 8
83 /* maximum alignment (for aligned attribute support) */
84 #define MAX_ALIGN 8
86 /******************************************************/
87 /* ELF defines */
89 #define EM_TCC_TARGET EM_X86_64
91 /* relocation type for 32 bit data relocation */
92 #define R_DATA_32 R_X86_64_32
93 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
94 #define R_COPY R_X86_64_COPY
96 #define ELF_START_ADDR 0x08048000
97 #define ELF_PAGE_SIZE 0x1000
99 /******************************************************/
101 static unsigned long func_sub_sp_offset;
102 static int func_ret_sub;
104 /* XXX: make it faster ? */
105 void g(int c)
107 int ind1;
108 ind1 = ind + 1;
109 if (ind1 > cur_text_section->data_allocated)
110 section_realloc(cur_text_section, ind1);
111 cur_text_section->data[ind] = c;
112 ind = ind1;
115 void o(unsigned int c)
117 while (c) {
118 g(c);
119 c = c >> 8;
123 void gen_le32(int c)
125 g(c);
126 g(c >> 8);
127 g(c >> 16);
128 g(c >> 24);
131 void gen_le64(int64_t c)
133 g(c);
134 g(c >> 8);
135 g(c >> 16);
136 g(c >> 24);
137 g(c >> 32);
138 g(c >> 40);
139 g(c >> 48);
140 g(c >> 56);
143 /* output a symbol and patch all calls to it */
144 void gsym_addr(int t, int a)
146 int n, *ptr;
147 while (t) {
148 ptr = (int *)(cur_text_section->data + t);
149 n = *ptr; /* next value */
150 *ptr = a - t - 4;
151 t = n;
155 void gsym(int t)
157 gsym_addr(t, ind);
160 /* psym is used to put an instruction with a data field which is a
161 reference to a symbol. It is in fact the same as oad ! */
162 #define psym oad
164 static int is64_type(int t)
166 return ((t & VT_BTYPE) == VT_PTR ||
167 (t & VT_BTYPE) == VT_FUNC ||
168 (t & VT_BTYPE) == VT_LLONG);
171 static int is_sse_float(int t) {
172 int bt;
173 bt = t & VT_BTYPE;
174 return bt == VT_DOUBLE || bt == VT_FLOAT;
177 /* instruction + 4 bytes data. Return the address of the data */
178 static int oad(int c, int s)
180 int ind1;
182 o(c);
183 ind1 = ind + 4;
184 if (ind1 > cur_text_section->data_allocated)
185 section_realloc(cur_text_section, ind1);
186 *(int *)(cur_text_section->data + ind) = s;
187 s = ind;
188 ind = ind1;
189 return s;
192 /* output constant with relocation if 'r & VT_SYM' is true */
193 static void gen_addr64(int r, Sym *sym, int64_t c)
195 if (r & VT_SYM)
196 greloc(cur_text_section, sym, ind, R_X86_64_64);
197 gen_le64(c);
200 /* output constant with relocation if 'r & VT_SYM' is true */
201 static void gen_addr32(int r, Sym *sym, int c)
203 if (r & VT_SYM)
204 greloc(cur_text_section, sym, ind, R_X86_64_32);
205 gen_le32(c);
208 /* output constant with relocation if 'r & VT_SYM' is true */
209 static void gen_addrpc32(int r, Sym *sym, int c)
211 if (r & VT_SYM)
212 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
213 gen_le32(c-4);
216 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
217 opcode bits */
218 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
220 op_reg = op_reg << 3;
221 if ((r & VT_VALMASK) == VT_CONST) {
222 /* constant memory reference */
223 o(0x05 | op_reg);
224 gen_addrpc32(r, sym, c);
225 } else if ((r & VT_VALMASK) == VT_LOCAL) {
226 /* currently, we use only ebp as base */
227 if (c == (char)c) {
228 /* short reference */
229 o(0x45 | op_reg);
230 g(c);
231 } else {
232 oad(0x85 | op_reg, c);
234 } else {
235 g(0x00 | op_reg | (r & VT_VALMASK));
239 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
240 opcode bits */
241 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
243 int rex = 0x48 | (REX_BASE(op_reg) << 2);
244 if ((r & VT_VALMASK) != VT_CONST &&
245 (r & VT_VALMASK) != VT_LOCAL) {
246 rex |= REX_BASE(VT_VALMASK & r);
248 o(rex);
249 o(opcode);
250 op_reg = REG_VALUE(op_reg) << 3;
251 if ((r & VT_VALMASK) == VT_CONST) {
252 /* constant memory reference */
253 o(0x05 | op_reg);
254 gen_addrpc32(r, sym, c);
255 } else if ((r & VT_VALMASK) == VT_LOCAL) {
256 /* currently, we use only ebp as base */
257 if (c == (char)c) {
258 /* short reference */
259 o(0x45 | op_reg);
260 g(c);
261 } else {
262 oad(0x85 | op_reg, c);
264 } else {
265 g(0x00 | op_reg | (r & VT_VALMASK));
270 /* load 'r' from value 'sv' */
271 void load(int r, SValue *sv)
273 int v, t, ft, fc, fr;
274 SValue v1;
276 fr = sv->r;
277 ft = sv->type.t;
278 fc = sv->c.ul;
280 v = fr & VT_VALMASK;
281 if (fr & VT_LVAL) {
282 if (v == VT_LLOCAL) {
283 v1.type.t = VT_PTR;
284 v1.r = VT_LOCAL | VT_LVAL;
285 v1.c.ul = fc;
286 load(r, &v1);
287 fr = r;
289 if ((ft & VT_BTYPE) == VT_FLOAT) {
290 o(0x6e0f66); /* movd */
291 r = 0;
292 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
293 o(0x7e0ff3); /* movq */
294 r = 0;
295 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
296 o(0xdb); /* fldt */
297 r = 5;
298 } else if ((ft & VT_TYPE) == VT_BYTE) {
299 o(0xbe0f); /* movsbl */
300 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
301 o(0xb60f); /* movzbl */
302 } else if ((ft & VT_TYPE) == VT_SHORT) {
303 o(0xbf0f); /* movswl */
304 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
305 o(0xb70f); /* movzwl */
306 } else if (is64_type(ft)) {
307 gen_modrm64(0x8b, r, fr, sv->sym, fc);
308 return;
309 } else {
310 o(0x8b); /* movl */
312 gen_modrm(r, fr, sv->sym, fc);
313 } else {
314 if (v == VT_CONST) {
315 if ((ft & VT_TYPE) == VT_LLONG) {
316 o(0x48);
317 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
318 gen_addr64(fr, sv->sym, sv->c.ull);
319 } else {
320 o(0xc748);
321 o(0xc0 + REG_VALUE(r)); /* mov $xx, r */
322 gen_addr32(fr, sv->sym, fc);
324 } else if (v == VT_LOCAL) {
325 o(0x48 | REX_BASE(r));
326 o(0x8d); /* lea xxx(%ebp), r */
327 gen_modrm(r, VT_LOCAL, sv->sym, fc);
328 } else if (v == VT_CMP) {
329 oad(0xb8 + r, 0); /* mov $0, r */
330 o(0x0f); /* setxx %br */
331 o(fc);
332 o(0xc0 + r);
333 } else if (v == VT_JMP || v == VT_JMPI) {
334 t = v & 1;
335 oad(0xb8 + r, t); /* mov $1, r */
336 o(0x05eb); /* jmp after */
337 gsym(fc);
338 oad(0xb8 + r, t ^ 1); /* mov $0, r */
339 } else if (v != r) {
340 if (r == TREG_XMM0) {
341 assert(v == TREG_ST0);
342 /* gen_cvt_ftof(VT_DOUBLE); */
343 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
344 /* movsd -0x10(%rsp),%xmm0 */
345 o(0x44100ff2);
346 o(0xf024);
347 } else if (r == TREG_ST0) {
348 assert(v == TREG_XMM0);
349 /* gen_cvt_ftof(VT_LDOUBLE); */
350 /* movsd %xmm0,-0x10(%rsp) */
351 o(0x44110ff2);
352 o(0xf024);
353 o(0xf02444dd); /* fldl -0x10(%rsp) */
354 } else {
355 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
356 o(0x89);
357 o(0xc0 + r + v * 8); /* mov v, r */
363 /* store register 'r' in lvalue 'v' */
364 void store(int r, SValue *v)
366 int fr, bt, ft, fc;
367 int op64 = 0;
369 ft = v->type.t;
370 fc = v->c.ul;
371 fr = v->r & VT_VALMASK;
372 bt = ft & VT_BTYPE;
373 /* XXX: incorrect if float reg to reg */
374 if (bt == VT_FLOAT) {
375 o(0x7e0f66); /* movd */
376 r = 0;
377 } else if (bt == VT_DOUBLE) {
378 o(0xd60f66); /* movq */
379 r = 0;
380 } else if (bt == VT_LDOUBLE) {
381 o(0xc0d9); /* fld %st(0) */
382 o(0xdb); /* fstpt */
383 r = 7;
384 } else {
385 if (bt == VT_SHORT)
386 o(0x66);
387 if (bt == VT_BYTE || bt == VT_BOOL)
388 o(0x88);
389 else if (is64_type(bt))
390 op64 = 0x89;
391 else
392 o(0x89);
394 if (op64) {
395 if (fr == VT_CONST ||
396 fr == VT_LOCAL ||
397 (v->r & VT_LVAL)) {
398 gen_modrm64(op64, r, v->r, v->sym, fc);
399 } else if (fr != r) {
400 /* XXX: don't we really come here? */
401 abort();
402 o(0xc0 + fr + r * 8); /* mov r, fr */
404 } else {
405 if (fr == VT_CONST ||
406 fr == VT_LOCAL ||
407 (v->r & VT_LVAL)) {
408 gen_modrm(r, v->r, v->sym, fc);
409 } else if (fr != r) {
410 /* XXX: don't we really come here? */
411 abort();
412 o(0xc0 + fr + r * 8); /* mov r, fr */
417 static void gadd_sp(int val)
419 if (val == (char)val) {
420 o(0xc48348);
421 g(val);
422 } else {
423 oad(0xc48148, val); /* add $xxx, %rsp */
427 /* 'is_jmp' is '1' if it is a jump */
428 static void gcall_or_jmp(int is_jmp)
430 int r;
431 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
432 /* constant case */
433 if (vtop->r & VT_SYM) {
434 /* relocation case */
435 greloc(cur_text_section, vtop->sym,
436 ind + 1, R_X86_64_PC32);
437 } else {
438 /* put an empty PC32 relocation */
439 put_elf_reloc(symtab_section, cur_text_section,
440 ind + 1, R_X86_64_PC32, 0);
442 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
443 } else {
444 /* otherwise, indirect call */
445 r = TREG_R11;
446 load(r, vtop);
447 o(0x41); /* REX */
448 o(0xff); /* call/jmp *r */
449 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
453 static uint8_t arg_regs[6] = {
454 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
456 /* Generate function call. The function address is pushed first, then
457 all the parameters in call order. This functions pops all the
458 parameters and the function address. */
459 void gfunc_call(int nb_args)
461 int size, align, r, args_size, i, func_call;
462 Sym *func_sym;
463 SValue *orig_vtop;
464 int nb_reg_args = 0;
465 int nb_sse_args = 0;
466 int sse_reg, gen_reg;
468 /* calculate the number of integer/float arguments */
469 args_size = 0;
470 for(i = 0; i < nb_args; i++) {
471 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
472 args_size += type_size(&vtop->type, &align);
473 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
474 args_size += 16;
475 } else if (is_sse_float(vtop[-i].type.t)) {
476 nb_sse_args++;
477 if (nb_sse_args > 8) args_size += 8;
478 } else {
479 nb_reg_args++;
480 if (nb_reg_args > 6) args_size += 8;
484 /* for struct arguments, we need to call memcpy and the function
485 call breaks register passing arguments we are preparing.
486 So, we process arguments which will be passed by stack first. */
487 orig_vtop = vtop;
488 gen_reg = nb_reg_args;
489 sse_reg = nb_sse_args;
490 /* adjust stack to align SSE boundary */
491 if (args_size &= 8) {
492 o(0x50); /* push $rax */
494 for(i = 0; i < nb_args; i++) {
495 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
496 size = type_size(&vtop->type, &align);
497 /* align to stack align size */
498 size = (size + 3) & ~3;
499 /* allocate the necessary size on stack */
500 o(0x48);
501 oad(0xec81, size); /* sub $xxx, %rsp */
502 /* generate structure store */
503 r = get_reg(RC_INT);
504 o(0x48 + REX_BASE(r));
505 o(0x89); /* mov %rsp, r */
506 o(0xe0 + r);
508 /* following code breaks vtop[1] */
509 SValue tmp = vtop[1];
510 vset(&vtop->type, r | VT_LVAL, 0);
511 vswap();
512 vstore();
513 vtop[1] = tmp;
515 args_size += size;
516 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
517 gv(RC_ST0);
518 size = LDOUBLE_SIZE;
519 oad(0xec8148, size); /* sub $xxx, %rsp */
520 o(0x7cdb); /* fstpt 0(%rsp) */
521 g(0x24);
522 g(0x00);
523 args_size += size;
524 } else if (is_sse_float(vtop->type.t)) {
525 int j = --sse_reg;
526 if (j >= 8) {
527 gv(RC_FLOAT);
528 o(0x50); /* push $rax */
529 /* movq %xmm0, (%rsp) */
530 o(0x04d60f66);
531 o(0x24);
532 args_size += 8;
534 } else {
535 int j = --gen_reg;
536 /* simple type */
537 /* XXX: implicit cast ? */
538 if (j >= 6) {
539 r = gv(RC_INT);
540 o(0x50 + r); /* push r */
541 args_size += 8;
544 vtop--;
546 vtop = orig_vtop;
548 /* then, we prepare register passing arguments.
549 Note that we cannot set RDX and RCX in this loop because gv()
550 may break these temporary registers. Let's use R10 and R11
551 instead of them */
552 gen_reg = nb_reg_args;
553 sse_reg = nb_sse_args;
554 for(i = 0; i < nb_args; i++) {
555 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
556 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
557 } else if (is_sse_float(vtop->type.t)) {
558 int j = --sse_reg;
559 if (j < 8) {
560 gv(RC_FLOAT); /* only one float register */
561 /* movaps %xmm0, %xmmN */
562 o(0x280f);
563 o(0xc0 + (sse_reg << 3));
565 } else {
566 int j = --gen_reg;
567 /* simple type */
568 /* XXX: implicit cast ? */
569 if (j < 6) {
570 r = gv(RC_INT);
571 if (j < 2) {
572 o(0x8948); /* mov */
573 o(0xc0 + r * 8 + arg_regs[j]);
574 } else if (j < 4) {
575 o(0x8949); /* mov */
576 /* j=2: r10, j=3: r11 */
577 o(0xc0 + r * 8 + j);
578 } else {
579 o(0x8949); /* mov */
580 /* j=4: r8, j=5: r9 */
581 o(0xc0 + r * 8 + j - 4);
585 vtop--;
588 /* Copy R10 and R11 into RDX and RCX, respectively */
589 if (nb_reg_args > 2) {
590 o(0xd2894c); /* mov %r10, %rdx */
591 if (nb_reg_args > 3) {
592 o(0xd9894c); /* mov %r11, %rcx */
596 save_regs(0); /* save used temporary registers */
598 func_sym = vtop->type.ref;
599 func_call = FUNC_CALL(func_sym->r);
600 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
601 gcall_or_jmp(0);
602 if (args_size)
603 gadd_sp(args_size);
604 vtop--;
607 #ifdef TCC_TARGET_PE
608 /* XXX: support PE? */
609 #warning "PE isn't tested at all"
610 #define FUNC_PROLOG_SIZE 12
611 #else
612 #define FUNC_PROLOG_SIZE 11
613 #endif
615 static void push_arg_reg(int i) {
616 loc -= 8;
617 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
620 /* generate function prolog of type 't' */
621 void gfunc_prolog(CType *func_type)
623 int i, addr, align, size, func_call;
624 int param_index, param_addr, reg_param_index, sse_param_index;
625 Sym *sym;
626 CType *type;
628 func_ret_sub = 0;
630 sym = func_type->ref;
631 func_call = FUNC_CALL(sym->r);
632 addr = PTR_SIZE * 2;
633 loc = 0;
634 ind += FUNC_PROLOG_SIZE;
635 func_sub_sp_offset = ind;
637 if (func_type->ref->c == FUNC_ELLIPSIS) {
638 int seen_reg_num, seen_sse_num, seen_stack_size;
639 seen_reg_num = seen_sse_num = 0;
640 /* frame pointer and return address */
641 seen_stack_size = PTR_SIZE * 2;
642 /* count the number of seen parameters */
643 sym = func_type->ref;
644 while ((sym = sym->next) != NULL) {
645 type = &sym->type;
646 if (is_sse_float(type->t)) {
647 if (seen_sse_num < 8) {
648 seen_sse_num++;
649 } else {
650 seen_stack_size += 8;
652 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
653 size = type_size(type, &align);
654 size = (size + 3) & ~3;
655 seen_stack_size += size;
656 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
657 seen_stack_size += LDOUBLE_SIZE;
658 } else {
659 if (seen_reg_num < 6) {
660 seen_reg_num++;
661 } else {
662 seen_stack_size += 8;
667 loc -= 16;
668 /* movl $0x????????, -0x10(%rbp) */
669 o(0xf045c7);
670 gen_le32(seen_reg_num * 8);
671 /* movl $0x????????, -0xc(%rbp) */
672 o(0xf445c7);
673 gen_le32(seen_sse_num * 16 + 48);
674 /* movl $0x????????, -0x8(%rbp) */
675 o(0xf845c7);
676 gen_le32(seen_stack_size);
678 /* save all register passing arguments */
679 for (i = 0; i < 8; i++) {
680 loc -= 16;
681 o(0xd60f66); /* movq */
682 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
683 /* movq $0, loc+8(%rbp) */
684 o(0x85c748);
685 gen_le32(loc + 8);
686 gen_le32(0);
688 for (i = 0; i < 6; i++) {
689 push_arg_reg(5 - i);
693 sym = func_type->ref;
694 param_index = 0;
695 reg_param_index = 0;
696 sse_param_index = 0;
698 /* if the function returns a structure, then add an
699 implicit pointer parameter */
700 func_vt = sym->type;
701 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
702 push_arg_reg(reg_param_index);
703 param_addr = loc;
705 func_vc = loc;
706 param_index++;
707 reg_param_index++;
709 /* define parameters */
710 while ((sym = sym->next) != NULL) {
711 type = &sym->type;
712 size = type_size(type, &align);
713 size = (size + 3) & ~3;
714 if (is_sse_float(type->t)) {
715 if (sse_param_index < 8) {
716 /* save arguments passed by register */
717 loc -= 8;
718 o(0xd60f66); /* movq */
719 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
720 param_addr = loc;
721 } else {
722 param_addr = addr;
723 addr += size;
725 sse_param_index++;
726 } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
727 (type->t & VT_BTYPE) == VT_LDOUBLE) {
728 param_addr = addr;
729 addr += size;
730 } else {
731 if (reg_param_index < 6) {
732 /* save arguments passed by register */
733 push_arg_reg(reg_param_index);
734 param_addr = loc;
735 } else {
736 param_addr = addr;
737 addr += 8;
739 reg_param_index++;
741 sym_push(sym->v & ~SYM_FIELD, type,
742 VT_LOCAL | VT_LVAL, param_addr);
743 param_index++;
747 /* generate function epilog */
748 void gfunc_epilog(void)
750 int v, saved_ind;
752 o(0xc9); /* leave */
753 if (func_ret_sub == 0) {
754 o(0xc3); /* ret */
755 } else {
756 o(0xc2); /* ret n */
757 g(func_ret_sub);
758 g(func_ret_sub >> 8);
760 /* align local size to word & save local variables */
761 v = (-loc + 15) & -16;
762 saved_ind = ind;
763 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
764 #ifdef TCC_TARGET_PE
765 if (v >= 4096) {
766 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
767 oad(0xb8, v); /* mov stacksize, %eax */
768 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
769 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
770 } else
771 #endif
773 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
774 o(0xec8148); /* sub rsp, stacksize */
775 gen_le32(v);
776 #if FUNC_PROLOG_SIZE == 12
777 o(0x90); /* adjust to FUNC_PROLOG_SIZE */
778 #endif
780 ind = saved_ind;
783 /* generate a jump to a label */
784 int gjmp(int t)
786 return psym(0xe9, t);
789 /* generate a jump to a fixed address */
790 void gjmp_addr(int a)
792 int r;
793 r = a - ind - 2;
794 if (r == (char)r) {
795 g(0xeb);
796 g(r);
797 } else {
798 oad(0xe9, a - ind - 5);
802 /* generate a test. set 'inv' to invert test. Stack entry is popped */
803 int gtst(int inv, int t)
805 int v, *p;
807 v = vtop->r & VT_VALMASK;
808 if (v == VT_CMP) {
809 /* fast case : can jump directly since flags are set */
810 g(0x0f);
811 t = psym((vtop->c.i - 16) ^ inv, t);
812 } else if (v == VT_JMP || v == VT_JMPI) {
813 /* && or || optimization */
814 if ((v & 1) == inv) {
815 /* insert vtop->c jump list in t */
816 p = &vtop->c.i;
817 while (*p != 0)
818 p = (int *)(cur_text_section->data + *p);
819 *p = t;
820 t = vtop->c.i;
821 } else {
822 t = gjmp(t);
823 gsym(vtop->c.i);
825 } else {
826 /* XXX: not tested */
827 if (is_float(vtop->type.t) ||
828 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
829 vpushi(0);
830 gen_op(TOK_NE);
832 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
833 /* constant jmp optimization */
834 if ((vtop->c.i != 0) != inv)
835 t = gjmp(t);
836 } else {
837 v = gv(RC_INT);
838 o(0x85);
839 o(0xc0 + v * 9);
840 g(0x0f);
841 t = psym(0x85 ^ inv, t);
844 vtop--;
845 return t;
848 /* generate an integer binary operation */
849 void gen_opi(int op)
851 int r, fr, opc, c;
853 switch(op) {
854 case '+':
855 case TOK_ADDC1: /* add with carry generation */
856 opc = 0;
857 gen_op8:
858 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
859 !is64_type(vtop->type.t)) {
860 /* constant case */
861 vswap();
862 r = gv(RC_INT);
863 if (is64_type(vtop->type.t)) {
864 o(0x48 | REX_BASE(r));
866 vswap();
867 c = vtop->c.i;
868 if (c == (char)c) {
869 /* XXX: generate inc and dec for smaller code ? */
870 o(0x83);
871 o(0xc0 | (opc << 3) | REG_VALUE(r));
872 g(c);
873 } else {
874 o(0x81);
875 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
877 } else {
878 gv2(RC_INT, RC_INT);
879 r = vtop[-1].r;
880 fr = vtop[0].r;
881 if (opc != 7 ||
882 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
883 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
884 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
886 o((opc << 3) | 0x01);
887 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
889 vtop--;
890 if (op >= TOK_ULT && op <= TOK_GT) {
891 vtop->r = VT_CMP;
892 vtop->c.i = op;
894 break;
895 case '-':
896 case TOK_SUBC1: /* sub with carry generation */
897 opc = 5;
898 goto gen_op8;
899 case TOK_ADDC2: /* add with carry use */
900 opc = 2;
901 goto gen_op8;
902 case TOK_SUBC2: /* sub with carry use */
903 opc = 3;
904 goto gen_op8;
905 case '&':
906 opc = 4;
907 goto gen_op8;
908 case '^':
909 opc = 6;
910 goto gen_op8;
911 case '|':
912 opc = 1;
913 goto gen_op8;
914 case '*':
915 gv2(RC_INT, RC_INT);
916 r = vtop[-1].r;
917 fr = vtop[0].r;
918 if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
919 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
920 o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
922 vtop--;
923 o(0xaf0f); /* imul fr, r */
924 o(0xc0 + fr + r * 8);
925 break;
926 case TOK_SHL:
927 opc = 4;
928 goto gen_shift;
929 case TOK_SHR:
930 opc = 5;
931 goto gen_shift;
932 case TOK_SAR:
933 opc = 7;
934 gen_shift:
935 opc = 0xc0 | (opc << 3);
936 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
937 /* constant case */
938 vswap();
939 r = gv(RC_INT);
940 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
941 o(0x48 | REX_BASE(r));
942 c = 0x3f;
943 } else {
944 c = 0x1f;
946 vswap();
947 c &= vtop->c.i;
948 o(0xc1); /* shl/shr/sar $xxx, r */
949 o(opc | r);
950 g(c);
951 } else {
952 /* we generate the shift in ecx */
953 gv2(RC_INT, RC_RCX);
954 r = vtop[-1].r;
955 if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
956 o(0x48 | REX_BASE(r));
958 o(0xd3); /* shl/shr/sar %cl, r */
959 o(opc | r);
961 vtop--;
962 break;
963 case '/':
964 case TOK_UDIV:
965 case TOK_PDIV:
966 case '%':
967 case TOK_UMOD:
968 case TOK_UMULL:
969 /* first operand must be in eax */
970 /* XXX: need better constraint for second operand */
971 gv2(RC_RAX, RC_RCX);
972 r = vtop[-1].r;
973 fr = vtop[0].r;
974 vtop--;
975 save_reg(TREG_RDX);
976 if (op == TOK_UMULL) {
977 o(0xf7); /* mul fr */
978 o(0xe0 + fr);
979 vtop->r2 = TREG_RDX;
980 r = TREG_RAX;
981 } else {
982 if (op == TOK_UDIV || op == TOK_UMOD) {
983 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
984 o(0xf0 + fr);
985 } else {
986 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
987 o(0x9948); /* cqto */
988 o(0x48 + REX_BASE(fr));
989 } else {
990 o(0x99); /* cltd */
992 o(0xf7); /* idiv fr, %eax */
993 o(0xf8 + fr);
995 if (op == '%' || op == TOK_UMOD)
996 r = TREG_RDX;
997 else
998 r = TREG_RAX;
1000 vtop->r = r;
1001 break;
1002 default:
1003 opc = 7;
1004 goto gen_op8;
1008 void gen_opl(int op)
1010 gen_opi(op);
1013 /* generate a floating point operation 'v = t1 op t2' instruction. The
1014 two operands are guaranted to have the same floating point type */
1015 /* XXX: need to use ST1 too */
1016 void gen_opf(int op)
1018 int a, ft, fc, swapped, r;
1019 int float_type =
1020 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1022 /* convert constants to memory references */
1023 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1024 vswap();
1025 gv(float_type);
1026 vswap();
1028 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1029 gv(float_type);
1031 /* must put at least one value in the floating point register */
1032 if ((vtop[-1].r & VT_LVAL) &&
1033 (vtop[0].r & VT_LVAL)) {
1034 vswap();
1035 gv(float_type);
1036 vswap();
1038 swapped = 0;
1039 /* swap the stack if needed so that t1 is the register and t2 is
1040 the memory reference */
1041 if (vtop[-1].r & VT_LVAL) {
1042 vswap();
1043 swapped = 1;
1045 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1046 if (op >= TOK_ULT && op <= TOK_GT) {
1047 /* load on stack second operand */
1048 load(TREG_ST0, vtop);
1049 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1050 if (op == TOK_GE || op == TOK_GT)
1051 swapped = !swapped;
1052 else if (op == TOK_EQ || op == TOK_NE)
1053 swapped = 0;
1054 if (swapped)
1055 o(0xc9d9); /* fxch %st(1) */
1056 o(0xe9da); /* fucompp */
1057 o(0xe0df); /* fnstsw %ax */
1058 if (op == TOK_EQ) {
1059 o(0x45e480); /* and $0x45, %ah */
1060 o(0x40fC80); /* cmp $0x40, %ah */
1061 } else if (op == TOK_NE) {
1062 o(0x45e480); /* and $0x45, %ah */
1063 o(0x40f480); /* xor $0x40, %ah */
1064 op = TOK_NE;
1065 } else if (op == TOK_GE || op == TOK_LE) {
1066 o(0x05c4f6); /* test $0x05, %ah */
1067 op = TOK_EQ;
1068 } else {
1069 o(0x45c4f6); /* test $0x45, %ah */
1070 op = TOK_EQ;
1072 vtop--;
1073 vtop->r = VT_CMP;
1074 vtop->c.i = op;
1075 } else {
1076 /* no memory reference possible for long double operations */
1077 load(TREG_ST0, vtop);
1078 swapped = !swapped;
1080 switch(op) {
1081 default:
1082 case '+':
1083 a = 0;
1084 break;
1085 case '-':
1086 a = 4;
1087 if (swapped)
1088 a++;
1089 break;
1090 case '*':
1091 a = 1;
1092 break;
1093 case '/':
1094 a = 6;
1095 if (swapped)
1096 a++;
1097 break;
1099 ft = vtop->type.t;
1100 fc = vtop->c.ul;
1101 o(0xde); /* fxxxp %st, %st(1) */
1102 o(0xc1 + (a << 3));
1103 vtop--;
1105 } else {
1106 if (op >= TOK_ULT && op <= TOK_GT) {
1107 /* if saved lvalue, then we must reload it */
1108 r = vtop->r;
1109 fc = vtop->c.ul;
1110 if ((r & VT_VALMASK) == VT_LLOCAL) {
1111 SValue v1;
1112 r = get_reg(RC_INT);
1113 v1.type.t = VT_INT;
1114 v1.r = VT_LOCAL | VT_LVAL;
1115 v1.c.ul = fc;
1116 load(r, &v1);
1117 fc = 0;
1120 if (op == TOK_EQ || op == TOK_NE) {
1121 swapped = 0;
1122 } else {
1123 if (op == TOK_LE || op == TOK_LT)
1124 swapped = !swapped;
1125 if (op == TOK_LE || op == TOK_GE) {
1126 op = 0x93; /* setae */
1127 } else {
1128 op = 0x97; /* seta */
1132 if (swapped) {
1133 o(0x7e0ff3); /* movq */
1134 gen_modrm(1, r, vtop->sym, fc);
1136 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1137 o(0x66);
1139 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1140 o(0xc8);
1141 } else {
1142 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1143 o(0x66);
1145 o(0x2e0f); /* ucomisd */
1146 gen_modrm(0, r, vtop->sym, fc);
1149 vtop--;
1150 vtop->r = VT_CMP;
1151 vtop->c.i = op;
1152 } else {
1153 /* no memory reference possible for long double operations */
1154 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1155 load(TREG_XMM0, vtop);
1156 swapped = !swapped;
1158 switch(op) {
1159 default:
1160 case '+':
1161 a = 0;
1162 break;
1163 case '-':
1164 a = 4;
1165 break;
1166 case '*':
1167 a = 1;
1168 break;
1169 case '/':
1170 a = 6;
1171 break;
1173 ft = vtop->type.t;
1174 fc = vtop->c.ul;
1175 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1176 o(0xde); /* fxxxp %st, %st(1) */
1177 o(0xc1 + (a << 3));
1178 } else {
1179 /* if saved lvalue, then we must reload it */
1180 r = vtop->r;
1181 if ((r & VT_VALMASK) == VT_LLOCAL) {
1182 SValue v1;
1183 r = get_reg(RC_INT);
1184 v1.type.t = VT_INT;
1185 v1.r = VT_LOCAL | VT_LVAL;
1186 v1.c.ul = fc;
1187 load(r, &v1);
1188 fc = 0;
1190 if (swapped) {
1191 /* movq %xmm0,%xmm1 */
1192 o(0x7e0ff3);
1193 o(0xc8);
1194 load(TREG_XMM0, vtop);
1195 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1196 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1197 o(0xf2);
1198 } else {
1199 o(0xf3);
1201 o(0x0f);
1202 o(0x58 + a);
1203 o(0xc1);
1204 } else {
1205 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1206 o(0xf2);
1207 } else {
1208 o(0xf3);
1210 o(0x0f);
1211 o(0x58 + a);
1212 gen_modrm(0, r, vtop->sym, fc);
1215 vtop--;
1220 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1221 and 'long long' cases. */
1222 void gen_cvt_itof(int t)
1224 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1225 save_reg(TREG_ST0);
1226 gv(RC_INT);
1227 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1228 /* signed long long to float/double/long double (unsigned case
1229 is handled generically) */
1230 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1231 o(0x242cdf); /* fildll (%rsp) */
1232 o(0x08c48348); /* add $8, %rsp */
1233 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1234 (VT_INT | VT_UNSIGNED)) {
1235 /* unsigned int to float/double/long double */
1236 o(0x6a); /* push $0 */
1237 g(0x00);
1238 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1239 o(0x242cdf); /* fildll (%rsp) */
1240 o(0x10c48348); /* add $16, %rsp */
1241 } else {
1242 /* int to float/double/long double */
1243 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1244 o(0x2404db); /* fildl (%rsp) */
1245 o(0x08c48348); /* add $8, %rsp */
1247 vtop->r = TREG_ST0;
1248 } else {
1249 save_reg(TREG_XMM0);
1250 gv(RC_INT);
1251 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1252 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1253 (VT_INT | VT_UNSIGNED) ||
1254 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1255 o(0x48); /* REX */
1257 o(0x2a0f);
1258 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1259 vtop->r = TREG_XMM0;
1263 /* convert from one floating point type to another */
1264 void gen_cvt_ftof(int t)
1266 int ft, bt, tbt;
1268 ft = vtop->type.t;
1269 bt = ft & VT_BTYPE;
1270 tbt = t & VT_BTYPE;
1272 if (bt == VT_FLOAT) {
1273 gv(RC_FLOAT);
1274 if (tbt == VT_DOUBLE) {
1275 o(0xc0140f); /* unpcklps */
1276 o(0xc05a0f); /* cvtps2pd */
1277 } else if (tbt == VT_LDOUBLE) {
1278 /* movss %xmm0,-0x10(%rsp) */
1279 o(0x44110ff3);
1280 o(0xf024);
1281 o(0xf02444d9); /* flds -0x10(%rsp) */
1282 vtop->r = TREG_ST0;
1284 } else if (bt == VT_DOUBLE) {
1285 gv(RC_FLOAT);
1286 if (tbt == VT_FLOAT) {
1287 o(0xc0140f66); /* unpcklpd */
1288 o(0xc05a0f66); /* cvtpd2ps */
1289 } else if (tbt == VT_LDOUBLE) {
1290 /* movsd %xmm0,-0x10(%rsp) */
1291 o(0x44110ff2);
1292 o(0xf024);
1293 o(0xf02444dd); /* fldl -0x10(%rsp) */
1294 vtop->r = TREG_ST0;
1296 } else {
1297 gv(RC_ST0);
1298 if (tbt == VT_DOUBLE) {
1299 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1300 /* movsd -0x10(%rsp),%xmm0 */
1301 o(0x44100ff2);
1302 o(0xf024);
1303 vtop->r = TREG_XMM0;
1304 } else if (tbt == VT_FLOAT) {
1305 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1306 /* movss -0x10(%rsp),%xmm0 */
1307 o(0x44100ff3);
1308 o(0xf024);
1309 vtop->r = TREG_XMM0;
1314 /* convert fp to int 't' type */
1315 void gen_cvt_ftoi(int t)
1317 int ft, bt, size, r;
1318 ft = vtop->type.t;
1319 bt = ft & VT_BTYPE;
1320 if (bt == VT_LDOUBLE) {
1321 gen_cvt_ftof(VT_DOUBLE);
1322 bt = VT_DOUBLE;
1325 gv(RC_FLOAT);
1326 if (t != VT_INT)
1327 size = 8;
1328 else
1329 size = 4;
1331 r = get_reg(RC_INT);
1332 if (bt == VT_FLOAT) {
1333 o(0xf3);
1334 } else if (bt == VT_DOUBLE) {
1335 o(0xf2);
1336 } else {
1337 assert(0);
1339 if (size == 8) {
1340 o(0x48 + REX_BASE(r));
1342 o(0x2c0f); /* cvttss2si or cvttsd2si */
1343 o(0xc0 + (REG_VALUE(r) << 3));
1344 vtop->r = r;
1347 /* computed goto support */
1348 void ggoto(void)
1350 gcall_or_jmp(1);
1351 vtop--;
1354 /* end of x86-64 code generator */
1355 /*************************************************************/