Fix silly typos in the previous change.
[tinycc.git] / x86_64-gen.c
blob1227f41f06da09c2fa1cba22e633b653642b3ff1
1 /*
2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <assert.h>
25 /* number of available registers */
26 #define NB_REGS 5
28 /* a register can belong to several classes. The classes must be
29 sorted from more general to more precise (see gv2() code which does
30 assumptions on it). */
31 #define RC_INT 0x0001 /* generic integer register */
32 #define RC_FLOAT 0x0002 /* generic float register */
33 #define RC_RAX 0x0004
34 #define RC_RCX 0x0008
35 #define RC_RDX 0x0010
36 #define RC_XMM0 0x0020
37 #define RC_ST0 0x0040 /* only for long double */
38 #define RC_IRET RC_RAX /* function return: integer register */
39 #define RC_LRET RC_RDX /* function return: second integer register */
40 #define RC_FRET RC_XMM0 /* function return: float register */
42 /* pretty names for the registers */
43 enum {
44 TREG_RAX = 0,
45 TREG_RCX = 1,
46 TREG_RDX = 2,
47 TREG_RSI = 6,
48 TREG_RDI = 7,
49 TREG_R8 = 8,
50 TREG_R9 = 9,
51 TREG_R10 = 10,
52 TREG_R11 = 11,
54 TREG_XMM0 = 3,
55 TREG_ST0 = 4,
57 TREG_MEM = 0x10,
60 #define REX_BASE(reg) (((reg) >> 3) & 1)
61 #define REG_VALUE(reg) ((reg) & 7)
63 int reg_classes[NB_REGS] = {
64 /* eax */ RC_INT | RC_RAX,
65 /* ecx */ RC_INT | RC_RCX,
66 /* edx */ RC_INT | RC_RDX,
67 /* xmm0 */ RC_FLOAT | RC_XMM0,
68 /* st0 */ RC_ST0,
71 /* return registers for function */
72 #define REG_IRET TREG_RAX /* single word int return register */
73 #define REG_LRET TREG_RDX /* second word return register (for long long) */
74 #define REG_FRET TREG_XMM0 /* float return register */
76 /* defined if function parameters must be evaluated in reverse order */
77 #define INVERT_FUNC_PARAMS
79 /* pointer size, in bytes */
80 #define PTR_SIZE 8
82 /* long double size and alignment, in bytes */
83 #define LDOUBLE_SIZE 16
84 #define LDOUBLE_ALIGN 8
85 /* maximum alignment (for aligned attribute support) */
86 #define MAX_ALIGN 8
88 /******************************************************/
89 /* ELF defines */
91 #define EM_TCC_TARGET EM_X86_64
93 /* relocation type for 32 bit data relocation */
94 #define R_DATA_32 R_X86_64_64
95 #define R_JMP_SLOT R_X86_64_JUMP_SLOT
96 #define R_COPY R_X86_64_COPY
98 #define ELF_START_ADDR 0x08048000
99 #define ELF_PAGE_SIZE 0x1000
101 /******************************************************/
103 static unsigned long func_sub_sp_offset;
104 static int func_ret_sub;
106 /* XXX: make it faster ? */
107 void g(int c)
109 int ind1;
110 ind1 = ind + 1;
111 if (ind1 > cur_text_section->data_allocated)
112 section_realloc(cur_text_section, ind1);
113 cur_text_section->data[ind] = c;
114 ind = ind1;
117 void o(unsigned int c)
119 while (c) {
120 g(c);
121 c = c >> 8;
125 void gen_le32(int c)
127 g(c);
128 g(c >> 8);
129 g(c >> 16);
130 g(c >> 24);
133 void gen_le64(int64_t c)
135 g(c);
136 g(c >> 8);
137 g(c >> 16);
138 g(c >> 24);
139 g(c >> 32);
140 g(c >> 40);
141 g(c >> 48);
142 g(c >> 56);
145 /* output a symbol and patch all calls to it */
146 void gsym_addr(int t, int a)
148 int n, *ptr;
149 while (t) {
150 ptr = (int *)(cur_text_section->data + t);
151 n = *ptr; /* next value */
152 *ptr = a - t - 4;
153 t = n;
157 void gsym(int t)
159 gsym_addr(t, ind);
162 /* psym is used to put an instruction with a data field which is a
163 reference to a symbol. It is in fact the same as oad ! */
164 #define psym oad
166 static int is64_type(int t)
168 return ((t & VT_BTYPE) == VT_PTR ||
169 (t & VT_BTYPE) == VT_FUNC ||
170 (t & VT_BTYPE) == VT_LLONG);
173 static int is_sse_float(int t) {
174 int bt;
175 bt = t & VT_BTYPE;
176 return bt == VT_DOUBLE || bt == VT_FLOAT;
179 /* instruction + 4 bytes data. Return the address of the data */
180 static int oad(int c, int s)
182 int ind1;
184 o(c);
185 ind1 = ind + 4;
186 if (ind1 > cur_text_section->data_allocated)
187 section_realloc(cur_text_section, ind1);
188 *(int *)(cur_text_section->data + ind) = s;
189 s = ind;
190 ind = ind1;
191 return s;
194 /* output constant with relocation if 'r & VT_SYM' is true */
195 static void gen_addr64(int r, Sym *sym, int64_t c)
197 if (r & VT_SYM)
198 greloc(cur_text_section, sym, ind, R_X86_64_64);
199 gen_le64(c);
202 /* output constant with relocation if 'r & VT_SYM' is true */
203 static void gen_addrpc32(int r, Sym *sym, int c)
205 if (r & VT_SYM)
206 greloc(cur_text_section, sym, ind, R_X86_64_PC32);
207 gen_le32(c-4);
210 /* output got address with relocation */
211 static void gen_gotpcrel(int r, Sym *sym, int c)
213 Section *sr;
214 ElfW(Rela) *rel;
215 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
216 sr = cur_text_section->reloc;
217 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
218 rel->r_addend = -4;
219 gen_le32(0);
221 if (c) {
222 /* we use add c, %xxx for displacement */
223 o(0x48 + REX_BASE(r));
224 o(0x81);
225 o(0xc0 + REG_VALUE(r));
226 gen_le32(c);
230 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
232 op_reg = REG_VALUE(op_reg) << 3;
233 if ((r & VT_VALMASK) == VT_CONST) {
234 /* constant memory reference */
235 o(0x05 | op_reg);
236 if (is_got) {
237 gen_gotpcrel(r, sym, c);
238 } else {
239 gen_addrpc32(r, sym, c);
241 } else if ((r & VT_VALMASK) == VT_LOCAL) {
242 /* currently, we use only ebp as base */
243 if (c == (char)c) {
244 /* short reference */
245 o(0x45 | op_reg);
246 g(c);
247 } else {
248 oad(0x85 | op_reg, c);
250 } else if ((r & VT_VALMASK) >= TREG_MEM) {
251 if (c) {
252 g(0x80 | op_reg | REG_VALUE(r));
253 gen_le32(c);
254 } else {
255 g(0x00 | op_reg | REG_VALUE(r));
257 } else {
258 g(0x00 | op_reg | (r & VT_VALMASK));
262 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
263 opcode bits */
264 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
266 gen_modrm_impl(op_reg, r, sym, c, 0);
269 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
270 opcode bits */
271 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
273 int is_got;
274 int rex = 0x48 | (REX_BASE(op_reg) << 2);
275 if ((r & VT_VALMASK) != VT_CONST &&
276 (r & VT_VALMASK) != VT_LOCAL) {
277 rex |= REX_BASE(VT_VALMASK & r);
279 o(rex);
280 o(opcode);
281 is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
282 gen_modrm_impl(op_reg, r, sym, c, is_got);
286 /* load 'r' from value 'sv' */
287 void load(int r, SValue *sv)
289 int v, t, ft, fc, fr;
290 SValue v1;
292 fr = sv->r;
293 ft = sv->type.t;
294 fc = sv->c.ul;
296 /* we use indirect access via got */
297 if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
298 (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
299 /* use the result register as a temporal register */
300 int tr = r | TREG_MEM;
301 if (is_float(ft)) {
302 /* we cannot use float registers as a temporal register */
303 tr = get_reg(RC_INT) | TREG_MEM;
305 gen_modrm64(0x8b, tr, fr, sv->sym, 0);
307 /* load from the temporal register */
308 fr = tr | VT_LVAL;
311 v = fr & VT_VALMASK;
312 if (fr & VT_LVAL) {
313 if (v == VT_LLOCAL) {
314 v1.type.t = VT_PTR;
315 v1.r = VT_LOCAL | VT_LVAL;
316 v1.c.ul = fc;
317 load(r, &v1);
318 fr = r;
320 if ((ft & VT_BTYPE) == VT_FLOAT) {
321 o(0x6e0f66); /* movd */
322 r = 0;
323 } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
324 o(0x7e0ff3); /* movq */
325 r = 0;
326 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
327 o(0xdb); /* fldt */
328 r = 5;
329 } else if ((ft & VT_TYPE) == VT_BYTE) {
330 o(0xbe0f); /* movsbl */
331 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
332 o(0xb60f); /* movzbl */
333 } else if ((ft & VT_TYPE) == VT_SHORT) {
334 o(0xbf0f); /* movswl */
335 } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
336 o(0xb70f); /* movzwl */
337 } else if (is64_type(ft)) {
338 gen_modrm64(0x8b, r, fr, sv->sym, fc);
339 return;
340 } else {
341 o(0x8b); /* movl */
343 gen_modrm(r, fr, sv->sym, fc);
344 } else {
345 if (v == VT_CONST) {
346 if ((ft & VT_BTYPE) == VT_LLONG) {
347 assert(!(fr & VT_SYM));
348 o(0x48);
349 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
350 gen_addr64(fr, sv->sym, sv->c.ull);
351 } else {
352 if (fr & VT_SYM) {
353 if (sv->sym->type.t & VT_STATIC) {
354 o(0x8d48);
355 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
356 gen_addrpc32(fr, sv->sym, fc);
357 } else {
358 o(0x8b48);
359 o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
360 gen_gotpcrel(r, sv->sym, fc);
362 } else {
363 o(0xb8 + REG_VALUE(r)); /* mov $xx, r */
364 gen_le32(fc);
367 } else if (v == VT_LOCAL) {
368 o(0x48 | REX_BASE(r));
369 o(0x8d); /* lea xxx(%ebp), r */
370 gen_modrm(r, VT_LOCAL, sv->sym, fc);
371 } else if (v == VT_CMP) {
372 oad(0xb8 + r, 0); /* mov $0, r */
373 o(0x0f); /* setxx %br */
374 o(fc);
375 o(0xc0 + r);
376 } else if (v == VT_JMP || v == VT_JMPI) {
377 t = v & 1;
378 oad(0xb8 + r, t); /* mov $1, r */
379 o(0x05eb); /* jmp after */
380 gsym(fc);
381 oad(0xb8 + r, t ^ 1); /* mov $0, r */
382 } else if (v != r) {
383 if (r == TREG_XMM0) {
384 assert(v == TREG_ST0);
385 /* gen_cvt_ftof(VT_DOUBLE); */
386 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
387 /* movsd -0x10(%rsp),%xmm0 */
388 o(0x44100ff2);
389 o(0xf024);
390 } else if (r == TREG_ST0) {
391 assert(v == TREG_XMM0);
392 /* gen_cvt_ftof(VT_LDOUBLE); */
393 /* movsd %xmm0,-0x10(%rsp) */
394 o(0x44110ff2);
395 o(0xf024);
396 o(0xf02444dd); /* fldl -0x10(%rsp) */
397 } else {
398 o(0x48 | REX_BASE(r) | (REX_BASE(v) << 2));
399 o(0x89);
400 o(0xc0 + r + v * 8); /* mov v, r */
406 /* store register 'r' in lvalue 'v' */
407 void store(int r, SValue *v)
409 int fr, bt, ft, fc;
410 int op64 = 0;
411 /* store the REX prefix in this variable when PIC is enabled */
412 int pic = 0;
414 ft = v->type.t;
415 fc = v->c.ul;
416 fr = v->r & VT_VALMASK;
417 bt = ft & VT_BTYPE;
419 /* we need to access the variable via got */
420 if (fr == VT_CONST && (v->r & VT_SYM)) {
421 /* mov xx(%rip), %r11 */
422 o(0x1d8b4c);
423 gen_gotpcrel(TREG_R11, v->sym, v->c.ul);
424 pic = is64_type(bt) ? 0x49 : 0x41;
427 /* XXX: incorrect if float reg to reg */
428 if (bt == VT_FLOAT) {
429 o(0x66);
430 o(pic);
431 o(0x7e0f); /* movd */
432 r = 0;
433 } else if (bt == VT_DOUBLE) {
434 o(0x66);
435 o(pic);
436 o(0xd60f); /* movq */
437 r = 0;
438 } else if (bt == VT_LDOUBLE) {
439 o(0xc0d9); /* fld %st(0) */
440 o(pic);
441 o(0xdb); /* fstpt */
442 r = 7;
443 } else {
444 if (bt == VT_SHORT)
445 o(0x66);
446 o(pic);
447 if (bt == VT_BYTE || bt == VT_BOOL)
448 o(0x88);
449 else if (is64_type(bt))
450 op64 = 0x89;
451 else
452 o(0x89);
454 if (pic) {
455 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
456 if (op64)
457 o(op64);
458 o(3 + (r << 3));
459 } else if (op64) {
460 if (fr == VT_CONST ||
461 fr == VT_LOCAL ||
462 (v->r & VT_LVAL)) {
463 gen_modrm64(op64, r, v->r, v->sym, fc);
464 } else if (fr != r) {
465 /* XXX: don't we really come here? */
466 abort();
467 o(0xc0 + fr + r * 8); /* mov r, fr */
469 } else {
470 if (fr == VT_CONST ||
471 fr == VT_LOCAL ||
472 (v->r & VT_LVAL)) {
473 gen_modrm(r, v->r, v->sym, fc);
474 } else if (fr != r) {
475 /* XXX: don't we really come here? */
476 abort();
477 o(0xc0 + fr + r * 8); /* mov r, fr */
482 static void gadd_sp(int val)
484 if (val == (char)val) {
485 o(0xc48348);
486 g(val);
487 } else {
488 oad(0xc48148, val); /* add $xxx, %rsp */
492 /* 'is_jmp' is '1' if it is a jump */
493 static void gcall_or_jmp(int is_jmp)
495 int r;
496 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
497 /* constant case */
498 if (vtop->r & VT_SYM) {
499 /* relocation case */
500 greloc(cur_text_section, vtop->sym,
501 ind + 1, R_X86_64_PC32);
502 } else {
503 /* put an empty PC32 relocation */
504 put_elf_reloc(symtab_section, cur_text_section,
505 ind + 1, R_X86_64_PC32, 0);
507 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */
508 } else {
509 /* otherwise, indirect call */
510 r = TREG_R11;
511 load(r, vtop);
512 o(0x41); /* REX */
513 o(0xff); /* call/jmp *r */
514 o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
518 static uint8_t arg_regs[6] = {
519 TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
521 /* Generate function call. The function address is pushed first, then
522 all the parameters in call order. This functions pops all the
523 parameters and the function address. */
524 void gfunc_call(int nb_args)
526 int size, align, r, args_size, i, func_call;
527 Sym *func_sym;
528 SValue *orig_vtop;
529 int nb_reg_args = 0;
530 int nb_sse_args = 0;
531 int sse_reg, gen_reg;
533 /* calculate the number of integer/float arguments */
534 args_size = 0;
535 for(i = 0; i < nb_args; i++) {
536 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
537 args_size += type_size(&vtop->type, &align);
538 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) {
539 args_size += 16;
540 } else if (is_sse_float(vtop[-i].type.t)) {
541 nb_sse_args++;
542 if (nb_sse_args > 8) args_size += 8;
543 } else {
544 nb_reg_args++;
545 if (nb_reg_args > 6) args_size += 8;
549 /* for struct arguments, we need to call memcpy and the function
550 call breaks register passing arguments we are preparing.
551 So, we process arguments which will be passed by stack first. */
552 orig_vtop = vtop;
553 gen_reg = nb_reg_args;
554 sse_reg = nb_sse_args;
555 /* adjust stack to align SSE boundary */
556 if (args_size &= 8) {
557 o(0x50); /* push $rax */
559 for(i = 0; i < nb_args; i++) {
560 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
561 size = type_size(&vtop->type, &align);
562 /* align to stack align size */
563 size = (size + 3) & ~3;
564 /* allocate the necessary size on stack */
565 o(0x48);
566 oad(0xec81, size); /* sub $xxx, %rsp */
567 /* generate structure store */
568 r = get_reg(RC_INT);
569 o(0x48 + REX_BASE(r));
570 o(0x89); /* mov %rsp, r */
571 o(0xe0 + r);
573 /* following code breaks vtop[1] */
574 SValue tmp = vtop[1];
575 vset(&vtop->type, r | VT_LVAL, 0);
576 vswap();
577 vstore();
578 vtop[1] = tmp;
580 args_size += size;
581 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
582 gv(RC_ST0);
583 size = LDOUBLE_SIZE;
584 oad(0xec8148, size); /* sub $xxx, %rsp */
585 o(0x7cdb); /* fstpt 0(%rsp) */
586 g(0x24);
587 g(0x00);
588 args_size += size;
589 } else if (is_sse_float(vtop->type.t)) {
590 int j = --sse_reg;
591 if (j >= 8) {
592 gv(RC_FLOAT);
593 o(0x50); /* push $rax */
594 /* movq %xmm0, (%rsp) */
595 o(0x04d60f66);
596 o(0x24);
597 args_size += 8;
599 } else {
600 int j = --gen_reg;
601 /* simple type */
602 /* XXX: implicit cast ? */
603 if (j >= 6) {
604 r = gv(RC_INT);
605 o(0x50 + r); /* push r */
606 args_size += 8;
609 vtop--;
611 vtop = orig_vtop;
613 /* then, we prepare register passing arguments.
614 Note that we cannot set RDX and RCX in this loop because gv()
615 may break these temporary registers. Let's use R10 and R11
616 instead of them */
617 gen_reg = nb_reg_args;
618 sse_reg = nb_sse_args;
619 for(i = 0; i < nb_args; i++) {
620 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT ||
621 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
622 } else if (is_sse_float(vtop->type.t)) {
623 int j = --sse_reg;
624 if (j < 8) {
625 gv(RC_FLOAT); /* only one float register */
626 /* movaps %xmm0, %xmmN */
627 o(0x280f);
628 o(0xc0 + (sse_reg << 3));
630 } else {
631 int j = --gen_reg;
632 /* simple type */
633 /* XXX: implicit cast ? */
634 if (j < 6) {
635 r = gv(RC_INT);
636 if (j < 2) {
637 o(0x8948); /* mov */
638 o(0xc0 + r * 8 + arg_regs[j]);
639 } else if (j < 4) {
640 o(0x8949); /* mov */
641 /* j=2: r10, j=3: r11 */
642 o(0xc0 + r * 8 + j);
643 } else {
644 o(0x8949); /* mov */
645 /* j=4: r8, j=5: r9 */
646 o(0xc0 + r * 8 + j - 4);
650 vtop--;
653 save_regs(0); /* save used temporary registers */
655 /* Copy R10 and R11 into RDX and RCX, respectively */
656 if (nb_reg_args > 2) {
657 o(0xd2894c); /* mov %r10, %rdx */
658 if (nb_reg_args > 3) {
659 o(0xd9894c); /* mov %r11, %rcx */
663 func_sym = vtop->type.ref;
664 func_call = FUNC_CALL(func_sym->r);
665 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
666 gcall_or_jmp(0);
667 if (args_size)
668 gadd_sp(args_size);
669 vtop--;
672 #ifdef TCC_TARGET_PE
673 /* XXX: support PE? */
674 #warning "PE isn't tested at all"
675 #define FUNC_PROLOG_SIZE 12
676 #else
677 #define FUNC_PROLOG_SIZE 11
678 #endif
680 static void push_arg_reg(int i) {
681 loc -= 8;
682 gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
685 /* generate function prolog of type 't' */
686 void gfunc_prolog(CType *func_type)
688 int i, addr, align, size, func_call;
689 int param_index, param_addr, reg_param_index, sse_param_index;
690 Sym *sym;
691 CType *type;
693 func_ret_sub = 0;
695 sym = func_type->ref;
696 func_call = FUNC_CALL(sym->r);
697 addr = PTR_SIZE * 2;
698 loc = 0;
699 ind += FUNC_PROLOG_SIZE;
700 func_sub_sp_offset = ind;
702 if (func_type->ref->c == FUNC_ELLIPSIS) {
703 int seen_reg_num, seen_sse_num, seen_stack_size;
704 seen_reg_num = seen_sse_num = 0;
705 /* frame pointer and return address */
706 seen_stack_size = PTR_SIZE * 2;
707 /* count the number of seen parameters */
708 sym = func_type->ref;
709 while ((sym = sym->next) != NULL) {
710 type = &sym->type;
711 if (is_sse_float(type->t)) {
712 if (seen_sse_num < 8) {
713 seen_sse_num++;
714 } else {
715 seen_stack_size += 8;
717 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {
718 size = type_size(type, &align);
719 size = (size + 3) & ~3;
720 seen_stack_size += size;
721 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) {
722 seen_stack_size += LDOUBLE_SIZE;
723 } else {
724 if (seen_reg_num < 6) {
725 seen_reg_num++;
726 } else {
727 seen_stack_size += 8;
732 loc -= 16;
733 /* movl $0x????????, -0x10(%rbp) */
734 o(0xf045c7);
735 gen_le32(seen_reg_num * 8);
736 /* movl $0x????????, -0xc(%rbp) */
737 o(0xf445c7);
738 gen_le32(seen_sse_num * 16 + 48);
739 /* movl $0x????????, -0x8(%rbp) */
740 o(0xf845c7);
741 gen_le32(seen_stack_size);
743 /* save all register passing arguments */
744 for (i = 0; i < 8; i++) {
745 loc -= 16;
746 o(0xd60f66); /* movq */
747 gen_modrm(7 - i, VT_LOCAL, NULL, loc);
748 /* movq $0, loc+8(%rbp) */
749 o(0x85c748);
750 gen_le32(loc + 8);
751 gen_le32(0);
753 for (i = 0; i < 6; i++) {
754 push_arg_reg(5 - i);
758 sym = func_type->ref;
759 param_index = 0;
760 reg_param_index = 0;
761 sse_param_index = 0;
763 /* if the function returns a structure, then add an
764 implicit pointer parameter */
765 func_vt = sym->type;
766 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
767 push_arg_reg(reg_param_index);
768 param_addr = loc;
770 func_vc = loc;
771 param_index++;
772 reg_param_index++;
774 /* define parameters */
775 while ((sym = sym->next) != NULL) {
776 type = &sym->type;
777 size = type_size(type, &align);
778 size = (size + 3) & ~3;
779 if (is_sse_float(type->t)) {
780 if (sse_param_index < 8) {
781 /* save arguments passed by register */
782 loc -= 8;
783 o(0xd60f66); /* movq */
784 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc);
785 param_addr = loc;
786 } else {
787 param_addr = addr;
788 addr += size;
790 sse_param_index++;
791 } else if ((type->t & VT_BTYPE) == VT_STRUCT ||
792 (type->t & VT_BTYPE) == VT_LDOUBLE) {
793 param_addr = addr;
794 addr += size;
795 } else {
796 if (reg_param_index < 6) {
797 /* save arguments passed by register */
798 push_arg_reg(reg_param_index);
799 param_addr = loc;
800 } else {
801 param_addr = addr;
802 addr += 8;
804 reg_param_index++;
806 sym_push(sym->v & ~SYM_FIELD, type,
807 VT_LOCAL | VT_LVAL, param_addr);
808 param_index++;
812 /* generate function epilog */
813 void gfunc_epilog(void)
815 int v, saved_ind;
817 o(0xc9); /* leave */
818 if (func_ret_sub == 0) {
819 o(0xc3); /* ret */
820 } else {
821 o(0xc2); /* ret n */
822 g(func_ret_sub);
823 g(func_ret_sub >> 8);
825 /* align local size to word & save local variables */
826 v = (-loc + 15) & -16;
827 saved_ind = ind;
828 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
829 #ifdef TCC_TARGET_PE
830 if (v >= 4096) {
831 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
832 oad(0xb8, v); /* mov stacksize, %eax */
833 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
834 greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
835 } else
836 #endif
838 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
839 o(0xec8148); /* sub rsp, stacksize */
840 gen_le32(v);
841 #if FUNC_PROLOG_SIZE == 12
842 o(0x90); /* adjust to FUNC_PROLOG_SIZE */
843 #endif
845 ind = saved_ind;
848 /* generate a jump to a label */
849 int gjmp(int t)
851 return psym(0xe9, t);
854 /* generate a jump to a fixed address */
855 void gjmp_addr(int a)
857 int r;
858 r = a - ind - 2;
859 if (r == (char)r) {
860 g(0xeb);
861 g(r);
862 } else {
863 oad(0xe9, a - ind - 5);
867 /* generate a test. set 'inv' to invert test. Stack entry is popped */
868 int gtst(int inv, int t)
870 int v, *p;
872 v = vtop->r & VT_VALMASK;
873 if (v == VT_CMP) {
874 /* fast case : can jump directly since flags are set */
875 g(0x0f);
876 t = psym((vtop->c.i - 16) ^ inv, t);
877 } else if (v == VT_JMP || v == VT_JMPI) {
878 /* && or || optimization */
879 if ((v & 1) == inv) {
880 /* insert vtop->c jump list in t */
881 p = &vtop->c.i;
882 while (*p != 0)
883 p = (int *)(cur_text_section->data + *p);
884 *p = t;
885 t = vtop->c.i;
886 } else {
887 t = gjmp(t);
888 gsym(vtop->c.i);
890 } else {
891 if (is_float(vtop->type.t) ||
892 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
893 vpushi(0);
894 gen_op(TOK_NE);
896 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
897 /* constant jmp optimization */
898 if ((vtop->c.i != 0) != inv)
899 t = gjmp(t);
900 } else {
901 v = gv(RC_INT);
902 o(0x85);
903 o(0xc0 + v * 9);
904 g(0x0f);
905 t = psym(0x85 ^ inv, t);
908 vtop--;
909 return t;
912 /* generate an integer binary operation */
913 void gen_opi(int op)
915 int r, fr, opc, c;
917 switch(op) {
918 case '+':
919 case TOK_ADDC1: /* add with carry generation */
920 opc = 0;
921 gen_op8:
922 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST &&
923 !is64_type(vtop->type.t)) {
924 /* constant case */
925 vswap();
926 r = gv(RC_INT);
927 if (is64_type(vtop->type.t)) {
928 o(0x48 | REX_BASE(r));
930 vswap();
931 c = vtop->c.i;
932 if (c == (char)c) {
933 /* XXX: generate inc and dec for smaller code ? */
934 o(0x83);
935 o(0xc0 | (opc << 3) | REG_VALUE(r));
936 g(c);
937 } else {
938 o(0x81);
939 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
941 } else {
942 gv2(RC_INT, RC_INT);
943 r = vtop[-1].r;
944 fr = vtop[0].r;
945 if (opc != 7 ||
946 is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
947 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
948 o(0x48 | REX_BASE(r) | (REX_BASE(fr) << 2));
950 o((opc << 3) | 0x01);
951 o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
953 vtop--;
954 if (op >= TOK_ULT && op <= TOK_GT) {
955 vtop->r = VT_CMP;
956 vtop->c.i = op;
958 break;
959 case '-':
960 case TOK_SUBC1: /* sub with carry generation */
961 opc = 5;
962 goto gen_op8;
963 case TOK_ADDC2: /* add with carry use */
964 opc = 2;
965 goto gen_op8;
966 case TOK_SUBC2: /* sub with carry use */
967 opc = 3;
968 goto gen_op8;
969 case '&':
970 opc = 4;
971 goto gen_op8;
972 case '^':
973 opc = 6;
974 goto gen_op8;
975 case '|':
976 opc = 1;
977 goto gen_op8;
978 case '*':
979 gv2(RC_INT, RC_INT);
980 r = vtop[-1].r;
981 fr = vtop[0].r;
982 if (is64_type(vtop[0].type.t) || (vtop[0].type.t & VT_UNSIGNED) ||
983 is64_type(vtop[-1].type.t) || (vtop[-1].type.t & VT_UNSIGNED)) {
984 o(0x48 | REX_BASE(fr) | (REX_BASE(r) << 2));
986 vtop--;
987 o(0xaf0f); /* imul fr, r */
988 o(0xc0 + fr + r * 8);
989 break;
990 case TOK_SHL:
991 opc = 4;
992 goto gen_shift;
993 case TOK_SHR:
994 opc = 5;
995 goto gen_shift;
996 case TOK_SAR:
997 opc = 7;
998 gen_shift:
999 opc = 0xc0 | (opc << 3);
1000 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1001 /* constant case */
1002 vswap();
1003 r = gv(RC_INT);
1004 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1005 o(0x48 | REX_BASE(r));
1006 c = 0x3f;
1007 } else {
1008 c = 0x1f;
1010 vswap();
1011 c &= vtop->c.i;
1012 o(0xc1); /* shl/shr/sar $xxx, r */
1013 o(opc | r);
1014 g(c);
1015 } else {
1016 /* we generate the shift in ecx */
1017 gv2(RC_INT, RC_RCX);
1018 r = vtop[-1].r;
1019 if ((vtop[-1].type.t & VT_BTYPE) == VT_LLONG) {
1020 o(0x48 | REX_BASE(r));
1022 o(0xd3); /* shl/shr/sar %cl, r */
1023 o(opc | r);
1025 vtop--;
1026 break;
1027 case '/':
1028 case TOK_UDIV:
1029 case TOK_PDIV:
1030 case '%':
1031 case TOK_UMOD:
1032 case TOK_UMULL:
1033 /* first operand must be in eax */
1034 /* XXX: need better constraint for second operand */
1035 gv2(RC_RAX, RC_RCX);
1036 r = vtop[-1].r;
1037 fr = vtop[0].r;
1038 vtop--;
1039 save_reg(TREG_RDX);
1040 if (op == TOK_UMULL) {
1041 o(0xf7); /* mul fr */
1042 o(0xe0 + fr);
1043 vtop->r2 = TREG_RDX;
1044 r = TREG_RAX;
1045 } else {
1046 if (op == TOK_UDIV || op == TOK_UMOD) {
1047 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
1048 o(0xf0 + fr);
1049 } else {
1050 if ((vtop->type.t & VT_BTYPE) & VT_LLONG) {
1051 o(0x9948); /* cqto */
1052 o(0x48 + REX_BASE(fr));
1053 } else {
1054 o(0x99); /* cltd */
1056 o(0xf7); /* idiv fr, %eax */
1057 o(0xf8 + fr);
1059 if (op == '%' || op == TOK_UMOD)
1060 r = TREG_RDX;
1061 else
1062 r = TREG_RAX;
1064 vtop->r = r;
1065 break;
1066 default:
1067 opc = 7;
1068 goto gen_op8;
1072 void gen_opl(int op)
1074 gen_opi(op);
1077 /* generate a floating point operation 'v = t1 op t2' instruction. The
1078 two operands are guaranted to have the same floating point type */
1079 /* XXX: need to use ST1 too */
1080 void gen_opf(int op)
1082 int a, ft, fc, swapped, r;
1083 int float_type =
1084 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1086 /* convert constants to memory references */
1087 if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1088 vswap();
1089 gv(float_type);
1090 vswap();
1092 if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1093 gv(float_type);
1095 /* must put at least one value in the floating point register */
1096 if ((vtop[-1].r & VT_LVAL) &&
1097 (vtop[0].r & VT_LVAL)) {
1098 vswap();
1099 gv(float_type);
1100 vswap();
1102 swapped = 0;
1103 /* swap the stack if needed so that t1 is the register and t2 is
1104 the memory reference */
1105 if (vtop[-1].r & VT_LVAL) {
1106 vswap();
1107 swapped = 1;
1109 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1110 if (op >= TOK_ULT && op <= TOK_GT) {
1111 /* load on stack second operand */
1112 load(TREG_ST0, vtop);
1113 save_reg(TREG_RAX); /* eax is used by FP comparison code */
1114 if (op == TOK_GE || op == TOK_GT)
1115 swapped = !swapped;
1116 else if (op == TOK_EQ || op == TOK_NE)
1117 swapped = 0;
1118 if (swapped)
1119 o(0xc9d9); /* fxch %st(1) */
1120 o(0xe9da); /* fucompp */
1121 o(0xe0df); /* fnstsw %ax */
1122 if (op == TOK_EQ) {
1123 o(0x45e480); /* and $0x45, %ah */
1124 o(0x40fC80); /* cmp $0x40, %ah */
1125 } else if (op == TOK_NE) {
1126 o(0x45e480); /* and $0x45, %ah */
1127 o(0x40f480); /* xor $0x40, %ah */
1128 op = TOK_NE;
1129 } else if (op == TOK_GE || op == TOK_LE) {
1130 o(0x05c4f6); /* test $0x05, %ah */
1131 op = TOK_EQ;
1132 } else {
1133 o(0x45c4f6); /* test $0x45, %ah */
1134 op = TOK_EQ;
1136 vtop--;
1137 vtop->r = VT_CMP;
1138 vtop->c.i = op;
1139 } else {
1140 /* no memory reference possible for long double operations */
1141 load(TREG_ST0, vtop);
1142 swapped = !swapped;
1144 switch(op) {
1145 default:
1146 case '+':
1147 a = 0;
1148 break;
1149 case '-':
1150 a = 4;
1151 if (swapped)
1152 a++;
1153 break;
1154 case '*':
1155 a = 1;
1156 break;
1157 case '/':
1158 a = 6;
1159 if (swapped)
1160 a++;
1161 break;
1163 ft = vtop->type.t;
1164 fc = vtop->c.ul;
1165 o(0xde); /* fxxxp %st, %st(1) */
1166 o(0xc1 + (a << 3));
1167 vtop--;
1169 } else {
1170 if (op >= TOK_ULT && op <= TOK_GT) {
1171 /* if saved lvalue, then we must reload it */
1172 r = vtop->r;
1173 fc = vtop->c.ul;
1174 if ((r & VT_VALMASK) == VT_LLOCAL) {
1175 SValue v1;
1176 r = get_reg(RC_INT);
1177 v1.type.t = VT_INT;
1178 v1.r = VT_LOCAL | VT_LVAL;
1179 v1.c.ul = fc;
1180 load(r, &v1);
1181 fc = 0;
1184 if (op == TOK_EQ || op == TOK_NE) {
1185 swapped = 0;
1186 } else {
1187 if (op == TOK_LE || op == TOK_LT)
1188 swapped = !swapped;
1189 if (op == TOK_LE || op == TOK_GE) {
1190 op = 0x93; /* setae */
1191 } else {
1192 op = 0x97; /* seta */
1196 if (swapped) {
1197 o(0x7e0ff3); /* movq */
1198 gen_modrm(1, r, vtop->sym, fc);
1200 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1201 o(0x66);
1203 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */
1204 o(0xc8);
1205 } else {
1206 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) {
1207 o(0x66);
1209 o(0x2e0f); /* ucomisd */
1210 gen_modrm(0, r, vtop->sym, fc);
1213 vtop--;
1214 vtop->r = VT_CMP;
1215 vtop->c.i = op;
1216 } else {
1217 /* no memory reference possible for long double operations */
1218 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1219 load(TREG_XMM0, vtop);
1220 swapped = !swapped;
1222 switch(op) {
1223 default:
1224 case '+':
1225 a = 0;
1226 break;
1227 case '-':
1228 a = 4;
1229 break;
1230 case '*':
1231 a = 1;
1232 break;
1233 case '/':
1234 a = 6;
1235 break;
1237 ft = vtop->type.t;
1238 fc = vtop->c.ul;
1239 if ((ft & VT_BTYPE) == VT_LDOUBLE) {
1240 o(0xde); /* fxxxp %st, %st(1) */
1241 o(0xc1 + (a << 3));
1242 } else {
1243 /* if saved lvalue, then we must reload it */
1244 r = vtop->r;
1245 if ((r & VT_VALMASK) == VT_LLOCAL) {
1246 SValue v1;
1247 r = get_reg(RC_INT);
1248 v1.type.t = VT_INT;
1249 v1.r = VT_LOCAL | VT_LVAL;
1250 v1.c.ul = fc;
1251 load(r, &v1);
1252 fc = 0;
1254 if (swapped) {
1255 /* movq %xmm0,%xmm1 */
1256 o(0x7e0ff3);
1257 o(0xc8);
1258 load(TREG_XMM0, vtop);
1259 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */
1260 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1261 o(0xf2);
1262 } else {
1263 o(0xf3);
1265 o(0x0f);
1266 o(0x58 + a);
1267 o(0xc1);
1268 } else {
1269 if ((ft & VT_BTYPE) == VT_DOUBLE) {
1270 o(0xf2);
1271 } else {
1272 o(0xf3);
1274 o(0x0f);
1275 o(0x58 + a);
1276 gen_modrm(0, r, vtop->sym, fc);
1279 vtop--;
1284 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1285 and 'long long' cases. */
1286 void gen_cvt_itof(int t)
1288 if ((t & VT_BTYPE) == VT_LDOUBLE) {
1289 save_reg(TREG_ST0);
1290 gv(RC_INT);
1291 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1292 /* signed long long to float/double/long double (unsigned case
1293 is handled generically) */
1294 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1295 o(0x242cdf); /* fildll (%rsp) */
1296 o(0x08c48348); /* add $8, %rsp */
1297 } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1298 (VT_INT | VT_UNSIGNED)) {
1299 /* unsigned int to float/double/long double */
1300 o(0x6a); /* push $0 */
1301 g(0x00);
1302 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1303 o(0x242cdf); /* fildll (%rsp) */
1304 o(0x10c48348); /* add $16, %rsp */
1305 } else {
1306 /* int to float/double/long double */
1307 o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
1308 o(0x2404db); /* fildl (%rsp) */
1309 o(0x08c48348); /* add $8, %rsp */
1311 vtop->r = TREG_ST0;
1312 } else {
1313 save_reg(TREG_XMM0);
1314 gv(RC_INT);
1315 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT));
1316 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
1317 (VT_INT | VT_UNSIGNED) ||
1318 (vtop->type.t & VT_BTYPE) == VT_LLONG) {
1319 o(0x48); /* REX */
1321 o(0x2a0f);
1322 o(0xc0 + (vtop->r & VT_VALMASK)); /* cvtsi2sd */
1323 vtop->r = TREG_XMM0;
1327 /* convert from one floating point type to another */
1328 void gen_cvt_ftof(int t)
1330 int ft, bt, tbt;
1332 ft = vtop->type.t;
1333 bt = ft & VT_BTYPE;
1334 tbt = t & VT_BTYPE;
1336 if (bt == VT_FLOAT) {
1337 gv(RC_FLOAT);
1338 if (tbt == VT_DOUBLE) {
1339 o(0xc0140f); /* unpcklps */
1340 o(0xc05a0f); /* cvtps2pd */
1341 } else if (tbt == VT_LDOUBLE) {
1342 /* movss %xmm0,-0x10(%rsp) */
1343 o(0x44110ff3);
1344 o(0xf024);
1345 o(0xf02444d9); /* flds -0x10(%rsp) */
1346 vtop->r = TREG_ST0;
1348 } else if (bt == VT_DOUBLE) {
1349 gv(RC_FLOAT);
1350 if (tbt == VT_FLOAT) {
1351 o(0xc0140f66); /* unpcklpd */
1352 o(0xc05a0f66); /* cvtpd2ps */
1353 } else if (tbt == VT_LDOUBLE) {
1354 /* movsd %xmm0,-0x10(%rsp) */
1355 o(0x44110ff2);
1356 o(0xf024);
1357 o(0xf02444dd); /* fldl -0x10(%rsp) */
1358 vtop->r = TREG_ST0;
1360 } else {
1361 gv(RC_ST0);
1362 if (tbt == VT_DOUBLE) {
1363 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
1364 /* movsd -0x10(%rsp),%xmm0 */
1365 o(0x44100ff2);
1366 o(0xf024);
1367 vtop->r = TREG_XMM0;
1368 } else if (tbt == VT_FLOAT) {
1369 o(0xf0245cd9); /* fstps -0x10(%rsp) */
1370 /* movss -0x10(%rsp),%xmm0 */
1371 o(0x44100ff3);
1372 o(0xf024);
1373 vtop->r = TREG_XMM0;
1378 /* convert fp to int 't' type */
1379 void gen_cvt_ftoi(int t)
1381 int ft, bt, size, r;
1382 ft = vtop->type.t;
1383 bt = ft & VT_BTYPE;
1384 if (bt == VT_LDOUBLE) {
1385 gen_cvt_ftof(VT_DOUBLE);
1386 bt = VT_DOUBLE;
1389 gv(RC_FLOAT);
1390 if (t != VT_INT)
1391 size = 8;
1392 else
1393 size = 4;
1395 r = get_reg(RC_INT);
1396 if (bt == VT_FLOAT) {
1397 o(0xf3);
1398 } else if (bt == VT_DOUBLE) {
1399 o(0xf2);
1400 } else {
1401 assert(0);
1403 if (size == 8) {
1404 o(0x48 + REX_BASE(r));
1406 o(0x2c0f); /* cvttss2si or cvttsd2si */
1407 o(0xc0 + (REG_VALUE(r) << 3));
1408 vtop->r = r;
1411 /* computed goto support */
1412 void ggoto(void)
1414 gcall_or_jmp(1);
1415 vtop--;
1418 /* end of x86-64 code generator */
1419 /*************************************************************/