out: exit if there is no room for more relocations or symbols
[neatcc.git] / x64.c
blob4a7272cf4dc08d6bd82fbd1c6f5dc38548560e0e
1 /* architecture-dependent code generation for x86_64 */
2 #include "tok.h"
3 #include "gen.h"
4 #include "out.h"
6 /* registers */
7 #define R_RAX 0x00
8 #define R_RCX 0x01
9 #define R_RDX 0x02
10 #define R_RBX 0x03
11 #define R_RSP 0x04
12 #define R_RBP 0x05
13 #define R_RSI 0x06
14 #define R_RDI 0x07
15 /* x86_64 registers */
16 #define R_R8 0x08
17 #define R_R9 0x09
18 #define R_R10 0x0a
19 #define R_R11 0x0b
20 #define R_R12 0x0c
21 #define R_R13 0x0d
22 #define R_R14 0x0e
23 #define R_R15 0x0f
25 /* x86 opcodes */
26 #define I_MOV 0x89
27 #define I_MOVI 0xc7
28 #define I_MOVIR 0xb8
29 #define I_MOVR 0x8b
30 #define I_MOVSXD 0x63
31 #define I_SHX 0xd3
32 #define I_CMP 0x3b
33 #define I_TST 0x85
34 #define I_LEA 0x8d
35 #define I_NOT 0xf7
36 #define I_CALL 0xff
37 #define I_MUL 0xf7
38 #define I_XOR 0x33
39 #define I_TEST 0x85
40 #define I_CQO 0x99
41 #define I_PUSH 0x50
42 #define I_POP 0x58
44 #define MIN(a, b) ((a) < (b) ? (a) : (b))
45 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
47 int tmpregs[] = {0, 7, 6, 2, 1, 8, 9, 10, 11, 3, 12, 13, 14, 15};
48 int argregs[] = {7, 6, 2, 1, 8, 9};
50 #define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1))
51 #define O2(op) (((op) >> 8) & 0xff)
52 #define O1(op) ((op) & 0xff)
53 #define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2))
54 #define REX(r1, r2) (0x48 | (((r1) & 8) >> 1) | (((r2) & 8) >> 3))
56 static void putint(char *s, long n, int l)
58 while (l--) {
59 *s++ = n;
60 n >>= 8;
64 static void op_x(int op, int r1, int r2, int bt)
66 int sz = BT_SZ(bt);
67 int rex = 0;
68 if (sz == 8)
69 rex |= 8;
70 if (sz == 1)
71 rex |= 0x40;
72 if (r1 & 0x8)
73 rex |= 4;
74 if (r2 & 0x8)
75 rex |= 1;
76 if (sz == 2)
77 oi(0x66, 1);
78 if (rex)
79 oi(rex | 0x40, 1);
80 if (op & 0x10000)
81 oi(O2(op), 1);
82 oi(sz == 1 ? O1(op) & ~0x1 : O1(op), 1);
85 #define op_mr op_rm
87 /* op_*(): r=reg, m=mem, i=imm, s=sym */
88 static void op_rm(int op, int src, int base, int off, int bt)
90 int dis = off == (char) off ? 1 : 4;
91 int mod = dis == 4 ? 2 : 1;
92 if (!off && (base & 7) != R_RBP)
93 mod = 0;
94 op_x(op, src, base, bt);
95 oi(MODRM(mod, src & 0x07, base & 0x07), 1);
96 if ((base & 7) == R_RSP)
97 oi(0x24, 1);
98 if (mod)
99 oi(off, dis);
102 static void op_rr(int op, int src, int dst, int bt)
104 op_x(op, src, dst, bt);
105 oi(MODRM(3, src & 0x07, dst & 0x07), 1);
108 #define movrx_bt(bt) (((bt) == 4) ? 4 : LONGSZ)
110 static int movrx_op(int bt, int mov)
112 int sz = BT_SZ(bt);
113 if (sz == 4)
114 return bt & BT_SIGNED ? I_MOVSXD : mov;
115 if (sz == 2)
116 return OP2(0x0f, bt & BT_SIGNED ? 0xbf : 0xb7);
117 if (sz == 1)
118 return OP2(0x0f, bt & BT_SIGNED ? 0xbe : 0xb6);
119 return mov;
122 static void mov_r2r(int rd, int r1, unsigned bt)
124 if (rd != r1 || BT_SZ(bt) != LONGSZ)
125 op_rr(movrx_op(bt, I_MOVR), rd, r1, movrx_bt(bt));
128 static void mov_m2r(int dst, int base, int off, int bt)
130 op_rm(movrx_op(bt, I_MOVR), dst, base, off, movrx_bt(bt));
133 int i_imm(int op, long imm)
135 if ((op & 0xf0) == 0x20)
136 return 0;
137 return imm <= 127 && imm >= -128;
140 static void i_push(int reg)
142 op_x(I_PUSH | (reg & 0x7), 0, reg, 4);
145 static void i_pop(int reg)
147 op_x(I_POP | (reg & 0x7), 0, reg, 4);
150 void i_mov(int rd, int rn)
152 op_rr(movrx_op(LONGSZ, I_MOVR), rd, rn, movrx_bt(LONGSZ));
155 void i_load(int rd, int rn, int off, int bt)
157 mov_m2r(rd, rn, off, bt);
160 void i_save(int rd, int rn, int off, int bt)
162 op_rm(I_MOV, rd, rn, off, bt);
165 void i_reg(int op, int *rd, int *r1, int *r2, int *tmp)
167 *rd = 0;
168 *r1 = R_TMPS;
169 *r2 = op & O_IMM ? 0 : R_TMPS;
170 *tmp = 0;
171 if ((op & 0xf0) == 0x00) /* add */
172 return;
173 if ((op & 0xf0) == 0x10) { /* shl */
174 if (~op & O_IMM) {
175 *r2 = 1 << R_RCX;
176 *r1 = R_TMPS & ~*r2;
178 return;
180 if ((op & 0xf0) == 0x20) { /* mul */
181 *rd = (op & 0xff) == O_MOD ? (1 << R_RDX) : (1 << R_RAX);
182 *r1 = (1 << R_RAX);
183 *r2 = R_TMPS & ~*rd & ~*r1;
184 if ((op & 0xff) == O_DIV)
185 *r2 &= ~(1 << R_RDX);
186 *tmp = (1 << R_RDX) | (1 << R_RAX);
187 return;
189 if ((op & 0xf0) == 0x30) { /* cmp */
190 *rd = 1 << R_RAX;
191 return;
193 if ((op & 0xf0) == 0x40) { /* uop */
194 *r2 = 0;
195 if ((op & 0xff) == O_LNOT)
196 *r1 = 1 << R_RAX;
197 return;
199 if ((op & 0xf0) == 0x50) { /* etc */
200 if (op == O_MSET) {
201 *rd = 1 << R_RDI;
202 *r1 = 1 << R_RAX;
203 *r2 = 1 << R_RCX;
205 if (op == O_MCPY) {
206 *rd = 1 << R_RDI;
207 *r1 = 1 << R_RSI;
208 *r2 = 1 << R_RCX;
210 if (op == O_SX || op == O_ZX || op == O_MOV) {
211 *rd = R_TMPS;
212 *r2 = 0;
214 return;
218 static void i_add(int op, int rd, int r1, int r2)
220 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
221 static int rx[] = {0003, 0053, 0043, 0013, 0063};
222 op_rr(rx[op & 0x0f], rd, r2, LONGSZ);
225 static void i_add_imm(int op, int rd, int rn, long n)
227 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
228 static int rx[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0};
229 unsigned char s[4] = {REX(0, rd), 0x83, rx[op & 0x0f] | (rd & 7), n & 0xff};
230 os((void *) s, 4);
233 void i_num(int rd, long n)
235 if (!n) {
236 op_rr(I_XOR, rd, rd, 4);
237 return;
239 if (n < 0 && -n <= 0xffffffff) {
240 op_rr(I_MOVI, 0, rd, LONGSZ);
241 oi(n, 4);
242 } else {
243 int len = 8;
244 if (n > 0 && n <= 0xffffffff)
245 len = 4;
246 op_x(I_MOVIR + (rd & 7), 0, rd, len);
247 oi(n, len);
251 static void i_mul(int rd, int r1, int r2)
253 if (r2 != R_RDX)
254 i_num(R_RDX, 0);
255 op_rr(I_MUL, 4, r2, LONGSZ);
258 static void i_div(int op, int rd, int r1, int r2)
260 if (r2 != R_RDX) {
261 if (op & O_SIGNED)
262 op_x(I_CQO, R_RAX, R_RDX, LONGSZ);
263 else
264 i_num(R_RDX, 0);
266 op_rr(I_MUL, op & O_SIGNED ? 7 : 6, r2, LONGSZ);
269 static void i_tst(int rn, int rm)
271 op_rr(I_TST, rn, rm, LONGSZ);
274 static void i_cmp(int rn, int rm)
276 op_rr(I_CMP, rn, rm, LONGSZ);
279 static void i_cmp_imm(int rn, long n)
281 unsigned char s[4] = {REX(0, rn), 0x83, 0xf8 | rn, n & 0xff};
282 os(s, 4);
285 static void i_shl(int op, int rd, int r1, int rs)
287 int sm = 4;
288 if ((op & 0x0f) == 1)
289 sm = op & O_SIGNED ? 7 : 5;
290 op_rr(I_SHX, sm, rd, LONGSZ);
293 static void i_shl_imm(int op, int rd, int rn, long n)
295 int sm = (op & 0x1) ? (op & O_SIGNED ? 0xf8 : 0xe8) : 0xe0 ;
296 char s[4] = {REX(0, rn), 0xc1, sm | (rn & 7), n & 0xff};
297 os(s, 4);
300 void i_sym(int rd, char *sym, int off)
302 int sz = X64_ABS_RL & OUT_RL32 ? 4 : LONGSZ;
303 if (X64_ABS_RL & OUT_RLSX)
304 op_rr(I_MOVI, 0, rd, sz);
305 else
306 op_x(I_MOVIR + (rd & 7), 0, rd, sz);
307 if (!pass1)
308 out_rel(sym, OUT_CS | X64_ABS_RL, cslen);
309 oi(off, sz);
312 static void i_neg(int rd)
314 op_rr(I_NOT, 3, rd, LONGSZ);
317 static void i_not(int rd)
319 op_rr(I_NOT, 2, rd, LONGSZ);
322 /* for optimizing cmp + tst + jmp to cmp + jmp */
323 #define OPT_ISCMP() (last_set >= 0 && last_set + 7 == cslen)
324 #define OPT_CCOND() (cs[last_set + 1])
326 static long last_set = -1;
328 static void i_set(int op, int rd)
330 /* lt, gt, le, ge, eq, neq */
331 static int ucond[] = {0x92, 0x97, 0x96, 0x93, 0x94, 0x95};
332 static int scond[] = {0x9c, 0x9f, 0x9e, 0x9d, 0x94, 0x95};
333 int cond = op & O_SIGNED ? scond[op & 0x0f] : ucond[op & 0x0f];
334 char set[] = "\x0f\x00\xc0";
335 set[1] = cond;
336 last_set = cslen;
337 os(set, 3); /* setl al */
338 os("\x48\x0f\xb6\xc0", 4); /* movzx rax, al */
341 static void i_lnot(int rd)
343 if (OPT_ISCMP()) {
344 cs[last_set + 1] ^= 0x01;
345 } else {
346 char cmp[] = "\x00\x83\xf8\x00";
347 cmp[0] = REX(0, rd);
348 cmp[2] |= rd & 7;
349 os(cmp, 4); /* cmp rax, 0 */
350 i_set(O_EQ, rd);
354 static void jx(int x, int nbytes)
356 char op[2] = {0x0f};
357 if (nbytes == 1) {
358 op[0] = 0x70 | (x & 0x0f);
359 os(op, 1); /* jx $addr */
360 } else {
361 op[1] = x;
362 os(op, 2); /* jx $addr */
364 oi(0, nbytes);
367 void i_jmp(int rn, int z, int nbytes)
369 if (!nbytes)
370 return;
371 if (nbytes > 1)
372 nbytes = 4;
373 if (rn >= 0) {
374 if (OPT_ISCMP()) {
375 int cond = OPT_CCOND();
376 cslen = last_set;
377 jx((!z ? cond : cond ^ 0x01) & ~0x10, nbytes);
378 last_set = -1;
379 } else {
380 i_tst(rn, rn);
381 jx(z ? 0x84 : 0x85, nbytes);
383 } else {
384 os(nbytes == 1 ? "\xeb" : "\xe9", 1); /* jmp $addr */
385 oi(0, nbytes);
389 long i_fill(long src, long dst, int nbytes)
391 if (!nbytes)
392 return 0;
393 if (nbytes > 1)
394 nbytes = 4;
395 putint((void *) (cs + src - nbytes), dst - src, nbytes);
396 return dst - src;
399 static void i_zx(int rd, int r1, int bits)
401 if (bits & 0x07) {
402 i_shl_imm(O_SHL, rd, rd, LONGSZ * 8 - bits);
403 i_shl_imm(O_SHR, rd, rd, LONGSZ * 8 - bits);
404 } else {
405 mov_r2r(rd, r1, bits >> 3);
409 static void i_sx(int rd, int r1, int bits)
411 mov_r2r(rd, r1, BT_SIGNED | (bits >> 3));
414 void i_op(int op, int rd, int r1, int r2)
416 if ((op & 0xf0) == 0x00)
417 i_add(op, r1, r1, r2);
418 if ((op & 0xf0) == 0x10)
419 i_shl(op, r1, r1, r2);
420 if ((op & 0xf0) == 0x20) {
421 if ((op & 0xff) == O_MUL)
422 i_mul(R_RAX, r1, r2);
423 if ((op & 0xff) == O_DIV)
424 i_div(op, R_RAX, r1, r2);
425 if ((op & 0xff) == O_MOD)
426 i_div(op, R_RDX, r1, r2);
427 return;
429 if ((op & 0xf0) == 0x30) {
430 i_cmp(r1, r2);
431 i_set(op, rd);
432 return;
434 if ((op & 0xf0) == 0x40) { /* uop */
435 if ((op & 0xff) == O_NEG)
436 i_neg(r1);
437 if ((op & 0xff) == O_NOT)
438 i_not(r1);
439 if ((op & 0xff) == O_LNOT)
440 i_lnot(r1);
441 return;
445 static void i_add_anyimm(int rd, int rn, long n)
447 op_rm(I_LEA, rd, rn, n, LONGSZ);
450 void i_op_imm(int op, int rd, int r1, long n)
452 if ((op & 0xf0) == 0x00) { /* add */
453 if (rd == r1 && i_imm(O_ADD, n))
454 i_add_imm(op, rd, r1, n);
455 else
456 i_add_anyimm(rd, r1, n);
458 if ((op & 0xf0) == 0x10) /* shl */
459 i_shl_imm(op, rd, r1, n);
460 if ((op & 0xf0) == 0x20) /* mul */
461 die("mul/imm not implemented");
462 if ((op & 0xf0) == 0x30) { /* imm */
463 i_cmp_imm(r1, n);
464 i_set(op, rd);
466 if ((op & 0xf0) == 0x50) { /* etc */
467 if ((op & 0xff) == O_ZX)
468 i_zx(rd, r1, n);
469 if ((op & 0xff) == O_SX)
470 i_sx(rd, r1, n);
471 if ((op & 0xff) == O_MOV)
472 i_mov(rd, r1);
476 void i_memcpy(int r0, int r1, int r2)
478 os("\xfc\xf3\xa4", 3); /* cld; rep movs */
481 void i_memset(int r0, int r1, int r2)
483 os("\xfc\xf3\xaa", 3); /* cld; rep stosb */
486 void i_call_reg(int rd)
488 op_rr(I_CALL, 2, rd, LONGSZ);
491 void i_call(char *sym, int off)
493 os("\xe8", 1); /* call $x */
494 if (!pass1)
495 out_rel(sym, OUT_CS | OUT_RLREL, cslen);
496 oi(-4 + off, 4);
499 static int func_argc;
500 static int func_varg;
501 static int func_spsub;
502 static int func_sargs;
503 static int func_sregs;
504 static int func_initfp;
505 static int spsub_addr;
507 int i_args(void)
509 return 16;
512 int i_sp(void)
514 int i;
515 int n = 0;
516 for (i = 0; i < N_TMPS; i++)
517 if ((1 << tmpregs[i]) & func_sregs)
518 n += 8;
519 return -n;
522 static void i_saveargs(void)
524 int i;
525 os("\x58", 1); /* pop rax */
526 for (i = N_ARGS - 1; i >= 0; i--)
527 if ((1 << argregs[i]) & func_sargs)
528 i_push(argregs[i]);
529 os("\x50", 1); /* push rax */
532 void i_prolog(int argc, int varg, int sargs, int sregs, int initfp, int subsp)
534 int i;
535 last_set = -1;
536 func_argc = argc;
537 func_varg = varg;
538 func_sargs = sargs;
539 func_sregs = sregs;
540 func_initfp = initfp;
541 func_spsub = subsp;
542 if (func_sargs)
543 i_saveargs();
544 if (initfp) {
545 os("\x55", 1); /* push rbp */
546 os("\x48\x89\xe5", 3); /* mov rbp, rsp */
548 if (func_sregs) {
549 for (i = N_TMPS - 1; i >= 0; i--)
550 if ((1 << tmpregs[i]) & func_sregs)
551 i_push(tmpregs[i]);
553 if (func_spsub) {
554 os("\x48\x81\xec", 3); /* sub rsp, $xxx */
555 spsub_addr = cslen;
556 oi(0, 4);
560 void i_epilog(int sp_max)
562 int diff;
563 int nsregs = 0;
564 int nsargs = 0;
565 int i;
566 for (i = 0; i < N_TMPS; i++)
567 if ((1 << tmpregs[i]) & func_sregs)
568 nsregs++;
569 for (i = 0; i < N_ARGS; i++)
570 if ((1 << argregs[i]) & func_sargs)
571 nsargs++;
572 diff = ALIGN(-sp_max - nsregs * LONGSZ, 16);
573 /* forcing 16-byte alignment */
574 diff = (nsregs + nsargs) & 1 ? diff + LONGSZ : diff;
575 if (func_spsub && diff) {
576 i_add_anyimm(R_RSP, R_RBP, -nsregs * LONGSZ);
577 putint(cs + spsub_addr, diff, 4);
579 if (func_sregs) {
580 for (i = 0; i < N_TMPS; i++)
581 if ((1 << tmpregs[i]) & func_sregs)
582 i_pop(tmpregs[i]);
584 if (func_initfp)
585 os("\xc9", 1); /* leave */
586 if (func_sargs) {
587 os("\xc2", 1); /* ret n */
588 oi(nsargs * LONGSZ, 2);
589 } else {
590 os("\xc3", 1); /* ret */
594 void i_done(void)