out: exit if there is no room for more relocations or symbols
[neatcc.git] / x86.c
blob7eb1cf1d4036569d24e0d978481d3863dcf7a24c
1 /* architecture-dependent code generation for x86 */
2 #include "tok.h"
3 #include "gen.h"
4 #include "out.h"
6 /* registers */
7 #define R_RAX 0x00
8 #define R_RCX 0x01
9 #define R_RDX 0x02
10 #define R_RBX 0x03
11 #define R_RSP 0x04
12 #define R_RBP 0x05
13 #define R_RSI 0x06
14 #define R_RDI 0x07
16 /* x86 opcodes */
17 #define I_MOV 0x89
18 #define I_MOVI 0xc7
19 #define I_MOVIR 0xb8
20 #define I_MOVR 0x8b
21 #define I_MOVSXD 0x63
22 #define I_SHX 0xd3
23 #define I_CMP 0x3b
24 #define I_TST 0x85
25 #define I_LEA 0x8d
26 #define I_NOT 0xf7
27 #define I_CALL 0xff
28 #define I_MUL 0xf7
29 #define I_XOR 0x33
30 #define I_TEST 0x85
31 #define I_CQO 0x99
32 #define I_PUSH 0x50
33 #define I_POP 0x58
35 #define MIN(a, b) ((a) < (b) ? (a) : (b))
36 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
38 int tmpregs[] = {0, 1, 2, 6, 7, 3};
39 int argregs[] = {0};
41 #define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1))
42 #define O2(op) (((op) >> 8) & 0xff)
43 #define O1(op) ((op) & 0xff)
44 #define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2))
46 static void putint(char *s, long n, int l)
48 while (l--) {
49 *s++ = n;
50 n >>= 8;
54 static void op_x(int op, int r1, int r2, int bt)
56 int sz = BT_SZ(bt);
57 if (sz == 2)
58 oi(0x66, 1);
59 if (op & 0x10000)
60 oi(O2(op), 1);
61 oi(sz == 1 ? O1(op) & ~0x1 : O1(op), 1);
64 #define op_mr op_rm
66 /* op_*(): r=reg, m=mem, i=imm, s=sym */
67 static void op_rm(int op, int src, int base, int off, int bt)
69 int dis = off == (char) off ? 1 : 4;
70 int mod = dis == 4 ? 2 : 1;
71 if (!off && (base & 7) != R_RBP)
72 mod = 0;
73 op_x(op, src, base, bt);
74 oi(MODRM(mod, src & 0x07, base & 0x07), 1);
75 if ((base & 7) == R_RSP)
76 oi(0x24, 1);
77 if (mod)
78 oi(off, dis);
81 static void op_rr(int op, int src, int dst, int bt)
83 op_x(op, src, dst, bt);
84 oi(MODRM(3, src & 0x07, dst & 0x07), 1);
87 #define movrx_bt(bt) (LONGSZ)
89 static int movrx_op(int bt, int mov)
91 int sz = BT_SZ(bt);
92 if (sz == 2)
93 return OP2(0x0f, bt & BT_SIGNED ? 0xbf : 0xb7);
94 if (sz == 1)
95 return OP2(0x0f, bt & BT_SIGNED ? 0xbe : 0xb6);
96 return mov;
99 static void mov_r2r(int rd, int r1, unsigned bt)
101 if (rd != r1 || BT_SZ(bt) != LONGSZ)
102 op_rr(movrx_op(bt, I_MOVR), rd, r1, movrx_bt(bt));
105 static void mov_m2r(int dst, int base, int off, int bt)
107 op_rm(movrx_op(bt, I_MOVR), dst, base, off, movrx_bt(bt));
110 int i_imm(int op, long imm)
112 if ((op & 0xf0) == 0x20)
113 return 0;
114 return imm <= 127 && imm >= -128;
117 static void i_push(int reg)
119 op_x(I_PUSH | (reg & 0x7), 0, reg, 4);
122 static void i_pop(int reg)
124 op_x(I_POP | (reg & 0x7), 0, reg, 4);
127 void i_mov(int rd, int rn)
129 op_rr(movrx_op(LONGSZ, I_MOVR), rd, rn, movrx_bt(LONGSZ));
132 void i_load(int rd, int rn, int off, int bt)
134 mov_m2r(rd, rn, off, bt);
137 void i_save(int rd, int rn, int off, int bt)
139 op_rm(I_MOV, rd, rn, off, bt);
142 void i_reg(int op, int *rd, int *r1, int *r2, int *tmp)
144 *rd = 0;
145 *r1 = R_TMPS;
146 *r2 = op & O_IMM ? 0 : R_TMPS;
147 *tmp = 0;
148 if ((op & 0xf0) == 0x00) /* add */
149 return;
150 if ((op & 0xf0) == 0x10) { /* shl */
151 if (~op & O_IMM) {
152 *r2 = 1 << R_RCX;
153 *r1 = R_TMPS & ~*r2;
155 return;
157 if ((op & 0xf0) == 0x20) { /* mul */
158 *rd = (op & 0xff) == O_MOD ? (1 << R_RDX) : (1 << R_RAX);
159 *r1 = (1 << R_RAX);
160 *r2 = R_TMPS & ~*rd & ~*r1;
161 if ((op & 0xff) == O_DIV)
162 *r2 &= ~(1 << R_RDX);
163 *tmp = (1 << R_RDX) | (1 << R_RAX);
164 return;
166 if ((op & 0xf0) == 0x30) { /* cmp */
167 *rd = 1 << R_RAX;
168 return;
170 if ((op & 0xf0) == 0x40) { /* uop */
171 *r2 = 0;
172 if ((op & 0xff) == O_LNOT)
173 *r1 = 1 << R_RAX;
174 return;
176 if ((op & 0xf0) == 0x50) { /* etc */
177 if (op == O_MSET) {
178 *rd = 1 << R_RDI;
179 *r1 = 1 << R_RAX;
180 *r2 = 1 << R_RCX;
182 if (op == O_MCPY) {
183 *rd = 1 << R_RDI;
184 *r1 = 1 << R_RSI;
185 *r2 = 1 << R_RCX;
187 if (op == O_SX || op == O_ZX) {
188 *rd = R_TMPS;
189 *r1 = R_BYTE;
190 *r2 = 0;
192 if (op == O_MOV) {
193 *rd = R_TMPS;
194 *r2 = 0;
196 return;
200 static void i_add(int op, int rd, int r1, int r2)
202 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
203 static int rx[] = {0003, 0053, 0043, 0013, 0063};
204 op_rr(rx[op & 0x0f], rd, r2, LONGSZ);
207 static void i_add_imm(int op, int rd, int rn, long n)
209 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
210 static int rx[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0};
211 unsigned char s[3] = {0x83, rx[op & 0x0f] | rd, n & 0xff};
212 os((void *) s, 3);
215 void i_num(int rd, long n)
217 if (!n) {
218 op_rr(I_XOR, rd, rd, 4);
219 return;
220 } else {
221 op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ);
222 oi(n, LONGSZ);
226 static void i_mul(int rd, int r1, int r2)
228 if (r2 != R_RDX)
229 i_num(R_RDX, 0);
230 op_rr(I_MUL, 4, r2, LONGSZ);
233 static void i_div(int op, int rd, int r1, int r2)
235 if (r2 != R_RDX) {
236 if (op & O_SIGNED)
237 op_x(I_CQO, R_RAX, R_RDX, LONGSZ);
238 else
239 i_num(R_RDX, 0);
241 op_rr(I_MUL, op & O_SIGNED ? 7 : 6, r2, LONGSZ);
244 static void i_tst(int rn, int rm)
246 op_rr(I_TST, rn, rm, LONGSZ);
249 static void i_cmp(int rn, int rm)
251 op_rr(I_CMP, rn, rm, LONGSZ);
254 static void i_cmp_imm(int rn, long n)
256 unsigned char s[3] = {0x83, 0xf8 | rn, n & 0xff};
257 os(s, 3);
260 static void i_shl(int op, int rd, int r1, int rs)
262 int sm = 4;
263 if ((op & 0x0f) == 1)
264 sm = op & O_SIGNED ? 7 : 5;
265 op_rr(I_SHX, sm, rd, LONGSZ);
268 static void i_shl_imm(int op, int rd, int rn, long n)
270 int sm = (op & 0x1) ? (op & O_SIGNED ? 0xf8 : 0xe8) : 0xe0;
271 char s[3] = {0xc1, sm | rn, n & 0xff};
272 os(s, 3);
275 void i_sym(int rd, char *sym, int off)
277 op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ);
278 if (!pass1)
279 out_rel(sym, OUT_CS, cslen);
280 oi(off, LONGSZ);
283 static void i_neg(int rd)
285 op_rr(I_NOT, 3, rd, LONGSZ);
288 static void i_not(int rd)
290 op_rr(I_NOT, 2, rd, LONGSZ);
293 /* for optimizing cmp + tst + jmp to cmp + jmp */
294 #define OPT_ISCMP() (last_set >= 0 && last_set + 6 == cslen)
295 #define OPT_CCOND() (cs[last_set + 1])
297 static long last_set = -1;
299 static void i_set(int op, int rd)
301 /* lt, gt, le, ge, eq, neq */
302 static int ucond[] = {0x92, 0x97, 0x96, 0x93, 0x94, 0x95};
303 static int scond[] = {0x9c, 0x9f, 0x9e, 0x9d, 0x94, 0x95};
304 int cond = op & O_SIGNED ? scond[op & 0x0f] : ucond[op & 0x0f];
305 char set[] = "\x0f\x00\xc0";
306 set[1] = cond;
307 last_set = cslen;
308 os(set, 3); /* setl al */
309 os("\x0f\xb6\xc0", 3); /* movzx rax, al */
312 static void i_lnot(int rd)
314 if (OPT_ISCMP()) {
315 cs[last_set + 1] ^= 0x01;
316 } else {
317 char cmp[] = "\x83\xf8\x00";
318 cmp[1] |= rd;
319 os(cmp, 3); /* cmp eax, 0 */
320 i_set(O_EQ, rd);
324 static void jx(int x, int nbytes)
326 char op[2] = {0x0f};
327 if (nbytes == 1) {
328 op[0] = 0x70 | (x & 0x0f);
329 os(op, 1); /* jx $addr */
330 } else {
331 op[1] = x;
332 os(op, 2); /* jx $addr */
334 oi(0, nbytes);
337 void i_jmp(int rn, int z, int nbytes)
339 if (!nbytes)
340 return;
341 if (nbytes > 1)
342 nbytes = 4;
343 if (rn >= 0) {
344 if (OPT_ISCMP()) {
345 int cond = OPT_CCOND();
346 cslen = last_set;
347 jx((!z ? cond : cond ^ 0x01) & ~0x10, nbytes);
348 last_set = -1;
349 } else {
350 i_tst(rn, rn);
351 jx(z ? 0x84 : 0x85, nbytes);
353 } else {
354 os(nbytes == 1 ? "\xeb" : "\xe9", 1); /* jmp $addr */
355 oi(0, nbytes);
359 long i_fill(long src, long dst, int nbytes)
361 if (!nbytes)
362 return 0;
363 if (nbytes > 1)
364 nbytes = 4;
365 putint((void *) (cs + src - nbytes), dst - src, nbytes);
366 return dst - src;
369 static void i_zx(int rd, int r1, int bits)
371 if (bits & 0x07) {
372 i_shl_imm(O_SHL, rd, rd, LONGSZ * 8 - bits);
373 i_shl_imm(O_SHR, rd, rd, LONGSZ * 8 - bits);
374 } else {
375 mov_r2r(rd, r1, bits >> 3);
379 static void i_sx(int rd, int r1, int bits)
381 mov_r2r(rd, r1, BT_SIGNED | (bits >> 3));
384 void i_op(int op, int rd, int r1, int r2)
386 if ((op & 0xf0) == 0x00)
387 i_add(op, r1, r1, r2);
388 if ((op & 0xf0) == 0x10)
389 i_shl(op, r1, r1, r2);
390 if ((op & 0xf0) == 0x20) {
391 if ((op & 0xff) == O_MUL)
392 i_mul(R_RAX, r1, r2);
393 if ((op & 0xff) == O_DIV)
394 i_div(op, R_RAX, r1, r2);
395 if ((op & 0xff) == O_MOD)
396 i_div(op, R_RDX, r1, r2);
397 return;
399 if ((op & 0xf0) == 0x30) {
400 i_cmp(r1, r2);
401 i_set(op, rd);
402 return;
404 if ((op & 0xf0) == 0x40) { /* uop */
405 if ((op & 0xff) == O_NEG)
406 i_neg(r1);
407 if ((op & 0xff) == O_NOT)
408 i_not(r1);
409 if ((op & 0xff) == O_LNOT)
410 i_lnot(r1);
411 return;
415 static void i_add_anyimm(int rd, int rn, long n)
417 op_rm(I_LEA, rd, rn, n, LONGSZ);
420 void i_op_imm(int op, int rd, int r1, long n)
422 if ((op & 0xf0) == 0x00) { /* add */
423 if (rd == r1 && i_imm(O_ADD, n))
424 i_add_imm(op, rd, r1, n);
425 else
426 i_add_anyimm(rd, r1, n);
428 if ((op & 0xf0) == 0x10) /* shl */
429 i_shl_imm(op, rd, r1, n);
430 if ((op & 0xf0) == 0x20) /* mul */
431 die("mul/imm not implemented");
432 if ((op & 0xf0) == 0x30) { /* imm */
433 i_cmp_imm(r1, n);
434 i_set(op, rd);
436 if ((op & 0xf0) == 0x50) { /* etc */
437 if ((op & 0xff) == O_ZX)
438 i_zx(rd, r1, n);
439 if ((op & 0xff) == O_SX)
440 i_sx(rd, r1, n);
441 if ((op & 0xff) == O_MOV)
442 i_mov(rd, r1);
446 void i_memcpy(int r0, int r1, int r2)
448 os("\xfc\xf3\xa4", 3); /* cld; rep movs */
451 void i_memset(int r0, int r1, int r2)
453 os("\xfc\xf3\xaa", 3); /* cld; rep stosb */
456 void i_call_reg(int rd)
458 op_rr(I_CALL, 2, rd, LONGSZ);
461 void i_call(char *sym, int off)
463 os("\xe8", 1); /* call $x */
464 if (!pass1)
465 out_rel(sym, OUT_CS | OUT_RLREL, cslen);
466 oi(-4 + off, 4);
469 static int func_argc;
470 static int func_varg;
471 static int func_spsub;
472 static int func_sargs;
473 static int func_sregs;
474 static int func_initfp;
475 static int spsub_addr;
477 int i_args(void)
479 return LONGSZ << 1;
482 int i_sp(void)
484 int i;
485 int n = 0;
486 for (i = 0; i < N_TMPS; i++)
487 if ((1 << tmpregs[i]) & func_sregs)
488 n += LONGSZ;
489 return -n;
492 void i_prolog(int argc, int varg, int sargs, int sregs, int initfp, int subsp)
494 int i;
495 last_set = -1;
496 func_argc = argc;
497 func_varg = varg;
498 func_sargs = sargs;
499 func_sregs = sregs;
500 func_initfp = initfp;
501 func_spsub = subsp;
502 if (initfp) {
503 os("\x55", 1); /* push rbp */
504 os("\x89\xe5", 2); /* mov rbp, rsp */
506 if (func_sregs) {
507 for (i = N_TMPS - 1; i >= 0; i--)
508 if ((1 << tmpregs[i]) & func_sregs)
509 i_push(tmpregs[i]);
511 if (func_spsub) {
512 os("\x81\xec", 2); /* sub rsp, $xxx */
513 spsub_addr = cslen;
514 oi(0, 4);
518 void i_epilog(int sp_max)
520 int diff;
521 int nsregs = 0;
522 int i;
523 for (i = 0; i < N_TMPS; i++)
524 if ((1 << tmpregs[i]) & func_sregs)
525 nsregs++;
526 diff = ALIGN(-sp_max - nsregs * LONGSZ, 16);
527 /* forcing 16-byte alignment */
528 diff = nsregs & 1 ? diff + LONGSZ : diff;
529 if (func_spsub && diff) {
530 i_add_anyimm(R_RSP, R_RBP, -nsregs * LONGSZ);
531 putint(cs + spsub_addr, diff, 4);
533 if (func_sregs) {
534 for (i = 0; i < N_TMPS; i++)
535 if ((1 << tmpregs[i]) & func_sregs)
536 i_pop(tmpregs[i]);
538 if (func_initfp)
539 os("\xc9", 1); /* leave */
540 os("\xc3", 1); /* ret */
543 void i_done(void)