arm: fixed minor typo
[neatcc.git] / x86.c
blob614f0ae473829732cf004a0641f545d1081aad08
1 /* architecture-dependent code generation for x86 */
2 #include <stdlib.h>
3 #include "ncc.h"
5 /* x86-64 registers, without r8-r15 */
6 #define R_RAX 0x00
7 #define R_RCX 0x01
8 #define R_RDX 0x02
9 #define R_RBX 0x03
10 #define R_RSP 0x04
11 #define R_RBP 0x05
12 #define R_RSI 0x06
13 #define R_RDI 0x07
15 #define REG_RET R_RAX
16 #define R_BYTE 0x0007
18 /* x86 opcodes */
19 #define I_MOV 0x89
20 #define I_MOVI 0xc7
21 #define I_MOVIR 0xb8
22 #define I_MOVR 0x8b
23 #define I_MOVSXD 0x63
24 #define I_SHX 0xd3
25 #define I_CMP 0x3b
26 #define I_TST 0x85
27 #define I_LEA 0x8d
28 #define I_NOT 0xf7
29 #define I_CALL 0xff
30 #define I_MUL 0xf7
31 #define I_XOR 0x33
32 #define I_CQO 0x99
33 #define I_PUSH 0x50
34 #define I_POP 0x58
36 #define MIN(a, b) ((a) < (b) ? (a) : (b))
37 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
39 int tmpregs[] = {0, 1, 2, 6, 7, 3};
40 int argregs[] = {0};
42 #define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1))
43 #define O2(op) (((op) >> 8) & 0xff)
44 #define O1(op) ((op) & 0xff)
45 #define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2))
47 static struct mem cs; /* generated code */
49 /* code generation functions */
50 static void os(void *s, int n)
52 mem_put(&cs, s, n);
55 static char *ointbuf(long n, int l)
57 static char buf[16];
58 int i;
59 for (i = 0; i < l; i++) {
60 buf[i] = n & 0xff;
61 n >>= 8;
63 return buf;
66 static void oi(long n, int l)
68 mem_put(&cs, ointbuf(n, l), l);
71 static void oi_at(long pos, long n, int l)
73 mem_cpy(&cs, pos, ointbuf(n, l), l);
76 static long opos(void)
78 return mem_len(&cs);
81 static void op_x(int op, int r1, int r2, int bt)
83 int sz = T_SZ(bt);
84 if (sz == 2)
85 oi(0x66, 1);
86 if (op & 0x10000)
87 oi(O2(op), 1);
88 oi(sz == 1 ? O1(op) & ~0x1 : O1(op), 1);
91 #define op_mr op_rm
93 /* op_*(): r=reg, m=mem, i=imm, s=sym */
94 static void op_rm(int op, int src, int base, int off, int bt)
96 int dis = off == (char) off ? 1 : 4;
97 int mod = dis == 4 ? 2 : 1;
98 if (!off && (base & 7) != R_RBP)
99 mod = 0;
100 op_x(op, src, base, bt);
101 oi(MODRM(mod, src & 0x07, base & 0x07), 1);
102 if ((base & 7) == R_RSP)
103 oi(0x24, 1);
104 if (mod)
105 oi(off, dis);
108 static void op_rr(int op, int src, int dst, int bt)
110 op_x(op, src, dst, bt);
111 oi(MODRM(3, src & 0x07, dst & 0x07), 1);
114 #define movrx_bt(bt) (LONGSZ)
116 static int movrx_op(int bt, int mov)
118 int sz = T_SZ(bt);
119 if (sz == 2)
120 return OP2(0x0f, bt & T_MSIGN ? 0xbf : 0xb7);
121 if (sz == 1)
122 return OP2(0x0f, bt & T_MSIGN ? 0xbe : 0xb6);
123 return mov;
126 static void mov_r2r(int rd, int r1, unsigned bt)
128 if (rd != r1 || T_SZ(bt) != LONGSZ)
129 op_rr(movrx_op(bt, I_MOVR), rd, r1, movrx_bt(bt));
132 static void i_push(int reg)
134 op_x(I_PUSH | (reg & 0x7), 0, reg, LONGSZ);
137 static void i_pop(int reg)
139 op_x(I_POP | (reg & 0x7), 0, reg, LONGSZ);
142 static void i_mov(int rd, int rn)
144 op_rr(movrx_op(LONGSZ, I_MOVR), rd, rn, movrx_bt(LONGSZ));
147 static void i_add(int op, int rd, int r1, int r2)
149 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
150 static int rx[] = {0003, 0053, 0043, 0013, 0063};
151 op_rr(rx[op & 0x0f], rd, r2, LONGSZ);
154 static void i_add_imm(int op, int rd, int rn, long n)
156 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
157 static int rx[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0};
158 unsigned char s[4] = {0x83, rx[op & 0x0f] | rd, n & 0xff};
159 os((void *) s, 3);
162 static void i_num(int rd, long n)
164 if (!n) {
165 op_rr(I_XOR, rd, rd, 4);
166 return;
167 } else {
168 op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ);
169 oi(n, LONGSZ);
173 static void i_mul(int rd, int r1, int r2)
175 if (r2 != R_RDX)
176 i_num(R_RDX, 0);
177 op_rr(I_MUL, 4, r2, LONGSZ);
180 static void i_div(int op, int rd, int r1, int r2)
182 long bt = O_T(op);
183 if (r2 != R_RDX) {
184 if (bt & T_MSIGN)
185 op_x(I_CQO, R_RAX, R_RDX, LONGSZ);
186 else
187 i_num(R_RDX, 0);
189 op_rr(I_MUL, bt & T_MSIGN ? 7 : 6, r2, LONGSZ);
192 static void i_tst(int rn, int rm)
194 op_rr(I_TST, rn, rm, LONGSZ);
197 static void i_cmp(int rn, int rm)
199 op_rr(I_CMP, rn, rm, LONGSZ);
202 static void i_cmp_imm(int rn, long n)
204 unsigned char s[4] = {0x83, 0xf8 | rn, n & 0xff};
205 os(s, 3);
208 static void i_shl(int op, int rd, int r1, int rs)
210 long bt = O_T(op);
211 int sm = 4;
212 if ((op & 0x0f) == 1)
213 sm = bt & T_MSIGN ? 7 : 5;
214 op_rr(I_SHX, sm, rd, LONGSZ);
217 static void i_shl_imm(int op, int rd, int rn, long n)
219 long bt = O_T(op);
220 int sm = (op & 0x1) ? (bt & T_MSIGN ? 0xf8 : 0xe8) : 0xe0;
221 char s[4] = {0xc1, sm | rn, n & 0xff};
222 os(s, 3);
225 static void i_neg(int rd)
227 op_rr(I_NOT, 3, rd, LONGSZ);
230 static void i_not(int rd)
232 op_rr(I_NOT, 2, rd, LONGSZ);
235 static int i_cond(long op)
237 /* lt, ge, eq, ne, le, gt */
238 static int ucond[] = {0x92, 0x93, 0x94, 0x95, 0x96, 0x97};
239 static int scond[] = {0x9c, 0x9d, 0x94, 0x95, 0x9e, 0x9f};
240 long bt = O_T(op);
241 return bt & T_MSIGN ? scond[op & 0x0f] : ucond[op & 0x0f];
244 static void i_set(long op, int rd)
246 char set[] = "\x0f\x00\xc0";
247 set[1] = i_cond(op);
248 os(set, 3); /* setl al */
249 os("\x0f\xb6\xc0", 3); /* movzx rax, al */
252 static void i_lnot(int rd)
254 char cmp[] = "\x83\xf8\x00";
255 cmp[1] |= rd;
256 os(cmp, 3); /* cmp rax, 0 */
257 i_set(O_EQ, rd);
260 static void jx(int x, int nbytes)
262 char op[2] = {0x0f};
263 if (nbytes == 1) {
264 oi(0x70 | (x & 0x0f), 1); /* jx $addr */
265 } else {
266 op[1] = x;
267 os(op, 2); /* jx $addr */
271 /* generate cmp or tst before a conditional jump */
272 static void i_jcmp(long op, long rn, long rm)
274 if (op & O_JZ)
275 i_tst(rn, rn);
276 if (op & O_JCC) {
277 if (op & O_NUM)
278 i_cmp_imm(rn, rm);
279 else
280 i_cmp(rn, rm);
284 /* generate a jump instruction and return the of its displacement */
285 static long i_jmp(long op, int nb)
287 if (op & O_JZ)
288 jx(O_C(op) == O_JZ ? 0x84 : 0x85, nb);
289 else if (op & O_JCC)
290 jx(i_cond(op) & ~0x10, nb);
291 else
292 os(nb == 1 ? "\xeb" : "\xe9", 1);
293 oi(0, nb);
294 return opos() - nb;
297 /* the length of a jump instruction opcode */
298 static int i_jlen(long op, int nb)
300 if (op & (O_JZ | O_JCC))
301 return nb ? 2 : 1;
302 return 1;
305 /* zero extend */
306 static void i_zx(int rd, int r1, int bits)
308 if (bits & 0x07) {
309 i_shl_imm(O_SHL, rd, rd, LONGSZ * 8 - bits);
310 i_shl_imm(O_SHR, rd, rd, LONGSZ * 8 - bits);
311 } else {
312 mov_r2r(rd, r1, bits >> 3);
316 /* sign extend */
317 static void i_sx(int rd, int r1, int bits)
319 mov_r2r(rd, r1, T_MSIGN | (bits >> 3));
322 static void i_cast(int rd, int rn, int bt)
324 if (T_SZ(bt) == 8) {
325 if (rd != rn)
326 i_mov(rd, rn);
327 } else {
328 if (bt & T_MSIGN)
329 i_sx(rd, rn, T_SZ(bt) * 8);
330 else
331 i_zx(rd, rn, T_SZ(bt) * 8);
335 static void i_add_anyimm(int rd, int rn, long n)
337 op_rm(I_LEA, rd, rn, n, LONGSZ);
340 static long *rel_sym; /* relocation symbols */
341 static long *rel_flg; /* relocation flags */
342 static long *rel_off; /* relocation offsets */
343 static long rel_n, rel_sz; /* relocation count */
345 static long lab_sz; /* label count */
346 static long *lab_loc; /* label offsets in cs */
347 static long jmp_n, jmp_sz; /* jump count */
348 static long *jmp_off; /* jump offsets */
349 static long *jmp_dst; /* jump destinations */
350 static long *jmp_op; /* jump opcode */
351 static long jmp_ret; /* the position of the last return jmp */
353 static void lab_add(long id)
355 while (id >= lab_sz) {
356 int lab_n = lab_sz;
357 lab_sz = MAX(128, lab_sz * 2);
358 lab_loc = mextend(lab_loc, lab_n, lab_sz, sizeof(*lab_loc));
360 lab_loc[id] = opos();
363 static void jmp_add(long op, long off, long dst)
365 if (jmp_n == jmp_sz) {
366 jmp_sz = MAX(128, jmp_sz * 2);
367 jmp_off = mextend(jmp_off, jmp_n, jmp_sz, sizeof(*jmp_off));
368 jmp_dst = mextend(jmp_dst, jmp_n, jmp_sz, sizeof(*jmp_dst));
369 jmp_op = mextend(jmp_op, jmp_n, jmp_sz, sizeof(*jmp_op));
371 jmp_off[jmp_n] = off;
372 jmp_dst[jmp_n] = dst;
373 jmp_op[jmp_n] = op;
374 jmp_n++;
377 void i_label(long id)
379 lab_add(id + 1);
382 static void i_rel(long sym, long flg, long off)
384 if (rel_n == rel_sz) {
385 rel_sz = MAX(128, rel_sz * 2);
386 rel_sym = mextend(rel_sym, rel_n, rel_sz, sizeof(*rel_sym));
387 rel_flg = mextend(rel_flg, rel_n, rel_sz, sizeof(*rel_flg));
388 rel_off = mextend(rel_off, rel_n, rel_sz, sizeof(*rel_off));
390 rel_sym[rel_n] = sym;
391 rel_flg[rel_n] = flg;
392 rel_off[rel_n] = off;
393 rel_n++;
396 static void i_sym(int rd, int sym, int off)
398 op_x(I_MOVIR + (rd & 7), 0, rd, LONGSZ);
399 i_rel(sym, OUT_CS, opos());
400 oi(off, LONGSZ);
403 static void i_subsp(long val)
405 if (!val)
406 return;
407 if (val <= 127 && val >= -128) {
408 os("\x83\xec", 2);
409 oi(val, 1);
410 } else {
411 os("\x81\xec", 2);
412 oi(val, 4);
416 static int regs_count(long regs)
418 int cnt = 0;
419 int i;
420 for (i = 0; i < N_REGS; i++)
421 if (((1 << i) & R_TMPS) & regs)
422 cnt++;
423 return cnt;
426 static void regs_save(long sregs, long dis)
428 int i;
429 for (i = 0; i < N_REGS; i++)
430 if (((1 << i) & R_TMPS) & sregs)
431 i_push(i);
432 if (dis)
433 i_subsp(dis);
436 static void regs_load(long sregs, long dis)
438 int i;
439 if (dis)
440 i_subsp(-dis);
441 for (i = N_REGS - 1; i >= 0; --i)
442 if (((1 << i) & R_TMPS) & sregs)
443 i_pop(i);
446 void i_wrap(int argc, long sargs, long spsub, int initfp, long sregs, long sregs_pos)
448 long body_n;
449 void *body;
450 long diff; /* prologue length */
451 int i;
452 /* removing the last jmp to the epilogue */
453 if (jmp_ret + i_jlen(O_JMP, 4) + 4 == opos()) {
454 mem_cut(&cs, jmp_ret);
455 jmp_n--;
457 lab_add(0); /* the return label */
458 body_n = mem_len(&cs);
459 body = mem_get(&cs);
460 /* generating function prologue */
461 if (initfp) {
462 os("\x55", 1); /* push rbp */
463 os("\x89\xe5", 2); /* mov rbp, rsp */
465 if (spsub) {
466 spsub = ALIGN(spsub, 8);
467 i_subsp(sregs ? -sregs_pos - regs_count(sregs) * ULNG : spsub);
469 if (sregs) /* saving registers */
470 regs_save(sregs, spsub + sregs_pos);
471 diff = mem_len(&cs);
472 mem_put(&cs, body, body_n);
473 free(body);
474 /* generating function epilogue */
475 if (sregs) /* restoring saved registers */
476 regs_load(sregs, spsub + sregs_pos);
477 if (initfp)
478 os("\xc9", 1); /* leave */
479 os("\xc3", 1); /* ret */
480 /* adjusting code offsets */
481 for (i = 0; i < rel_n; i++)
482 rel_off[i] += diff;
483 for (i = 0; i < jmp_n; i++)
484 jmp_off[i] += diff;
485 for (i = 0; i < lab_sz; i++)
486 lab_loc[i] += diff;
489 /* introduce shorter jumps, if possible */
490 static void i_shortjumps(int *nb)
492 long off = 0; /* current code offset */
493 long dif = 0; /* the difference after changing jump instructions */
494 int rel = 0; /* current relocation */
495 int lab = 1; /* current label */
496 long c_len = mem_len(&cs);
497 char *c = mem_get(&cs);
498 int i;
499 for (i = 0; i < jmp_n; i++)
500 nb[i] = abs(lab_loc[jmp_dst[i]] - jmp_off[i]) < 0x70 ? 1 : 4;
501 for (i = 0; i < jmp_n; i++) {
502 long cur = jmp_off[i] - i_jlen(jmp_op[i], 4);
503 while (rel < rel_n && rel_off[rel] <= cur)
504 rel_off[rel++] += dif;
505 while (lab < lab_sz && lab_loc[lab] <= cur)
506 lab_loc[lab++] += dif;
507 mem_put(&cs, c + off, cur - off);
508 jmp_off[i] = i_jmp(jmp_op[i], nb[i]);
509 off = cur + i_jlen(jmp_op[i], 4) + 4;
510 dif = mem_len(&cs) - off;
512 while (rel < rel_n)
513 rel_off[rel++] += dif;
514 while (lab < lab_sz)
515 lab_loc[lab++] += dif;
516 lab_loc[0] += dif;
517 mem_put(&cs, c + off, c_len - off);
518 free(c);
521 void i_code(char **c, long *c_len, long **rsym, long **rflg, long **roff, long *rcnt)
523 int *nb; /* number of bytes necessary for jump displacements */
524 int i;
525 /* more compact jmp instructions */
526 nb = malloc(jmp_n * sizeof(nb[0]));
527 for (i = 0; i < jmp_n; i++)
528 nb[i] = 4;
529 i_shortjumps(nb);
530 for (i = 0; i < jmp_n; i++) /* filling jmp destinations */
531 oi_at(jmp_off[i], lab_loc[jmp_dst[i]] -
532 jmp_off[i] - nb[i], nb[i]);
533 free(nb);
534 *c_len = mem_len(&cs);
535 *c = mem_get(&cs);
536 *rsym = rel_sym;
537 *rflg = rel_flg;
538 *roff = rel_off;
539 *rcnt = rel_n;
540 rel_sym = NULL;
541 rel_flg = NULL;
542 rel_off = NULL;
543 rel_n = 0;
544 rel_sz = 0;
545 jmp_n = 0;
548 void i_done(void)
550 free(jmp_off);
551 free(jmp_dst);
552 free(jmp_op);
553 free(lab_loc);
556 long i_reg(long op, long *rd, long *r1, long *r2, long *r3, long *tmp)
558 long oc = O_C(op);
559 long bt = O_T(op);
560 *rd = 0;
561 *r1 = 0;
562 *r2 = 0;
563 *r3 = 0;
564 *tmp = 0;
565 if (oc & O_MOV) {
566 *rd = R_TMPS;
567 if (oc & (O_NUM | O_SYM))
568 *r1 = 32;
569 else
570 *r1 = T_SZ(bt) == 1 ? R_BYTE : R_TMPS;
571 return 0;
573 if (oc & O_ADD) {
574 *r1 = R_TMPS;
575 *r2 = oc & O_NUM ? (oc == O_ADD ? 32 : 8) : R_TMPS;
576 return 0;
578 if (oc & O_SHL) {
579 if (oc & O_NUM) {
580 *r1 = R_TMPS;
581 *r2 = 8;
582 } else {
583 *r2 = 1 << R_RCX;
584 *r1 = R_TMPS & ~*r2;
586 return 0;
588 if (oc & O_MUL) {
589 if (oc & O_NUM)
590 return 1;
591 *rd = oc == O_MOD ? (1 << R_RDX) : (1 << R_RAX);
592 *r1 = (1 << R_RAX);
593 *r2 = R_TMPS & ~*rd & ~*r1;
594 if (oc == O_DIV)
595 *r2 &= ~(1 << R_RDX);
596 *tmp = (1 << R_RDX) | (1 << R_RAX);
597 return 0;
599 if (oc & O_CMP) {
600 *rd = 1 << R_RAX;
601 *r1 = R_TMPS;
602 *r2 = oc & O_NUM ? 8 : R_TMPS;
603 return 0;
605 if (oc & O_UOP) {
606 if (oc == O_LNOT)
607 *r1 = 1 << R_RAX;
608 else
609 *r1 = R_TMPS;
610 return 0;
612 if (oc == O_MSET) {
613 *r1 = 1 << R_RDI;
614 *r2 = 1 << R_RAX;
615 *r3 = 1 << R_RCX;
616 *tmp = (1 << R_RDI) | (1 << R_RCX);
617 return 0;
619 if (oc == O_MCPY) {
620 *r1 = 1 << R_RDI;
621 *r2 = 1 << R_RSI;
622 *r3 = 1 << R_RCX;
623 *tmp = (1 << R_RDI) | (1 << R_RSI) | (1 << R_RCX);
624 return 0;
626 if (oc == O_RET) {
627 *r1 = (1 << REG_RET);
628 return 0;
630 if (oc & O_CALL) {
631 *rd = (1 << REG_RET);
632 *r1 = oc & O_SYM ? 0 : R_TMPS;
633 *tmp = R_TMPS & ~R_PERM;
634 return 0;
636 if (oc & O_LD) {
637 *rd = T_SZ(bt) == 1 ? R_BYTE : R_TMPS;
638 *r1 = R_TMPS;
639 *r2 = oc & O_NUM ? 32 : R_TMPS;
640 return 0;
642 if (oc & O_ST) {
643 *r1 = T_SZ(bt) == 1 ? R_BYTE : R_TMPS;
644 *r2 = R_TMPS;
645 *r3 = oc & O_NUM ? 32 : R_TMPS;
646 return 0;
648 if (oc & O_JZ) {
649 *r1 = R_TMPS;
650 return 0;
652 if (oc & O_JCC) {
653 *r1 = R_TMPS;
654 *r2 = oc & O_NUM ? 8 : R_TMPS;
655 return 0;
657 if (oc == O_JMP)
658 return 0;
659 return 1;
662 int i_imm(long lim, long n)
664 long max = (1 << (lim - 1)) - 1;
665 return n <= max && n + 1 >= -max;
668 long i_ins(long op, long rd, long r1, long r2, long r3)
670 long oc = O_C(op);
671 long bt = O_T(op);
672 if (oc & O_ADD) {
673 if (oc & O_NUM) {
674 if (rd == r1 && r2 <= 127 && r2 >= -128)
675 i_add_imm(op, r1, r1, r2);
676 else
677 i_add_anyimm(rd, r1, r2);
678 } else {
679 i_add(op, r1, r1, r2);
682 if (oc & O_SHL) {
683 if (oc & O_NUM)
684 i_shl_imm(op, r1, r1, r2);
685 else
686 i_shl(op, r1, r1, r2);
688 if (oc & O_MUL) {
689 if (oc == O_MUL)
690 i_mul(R_RAX, r1, r2);
691 if (oc == O_DIV)
692 i_div(op, R_RAX, r1, r2);
693 if (oc == O_MOD)
694 i_div(op, R_RDX, r1, r2);
695 return 0;
697 if (oc & O_CMP) {
698 if (oc & O_NUM)
699 i_cmp_imm(r1, r2);
700 else
701 i_cmp(r1, r2);
702 i_set(op, rd);
703 return 0;
705 if (oc & O_UOP) { /* uop */
706 if (oc == O_NEG)
707 i_neg(r1);
708 if (oc == O_NOT)
709 i_not(r1);
710 if (oc == O_LNOT)
711 i_lnot(r1);
712 return 0;
714 if (oc == O_CALL) {
715 op_rr(I_CALL, 2, r1, LONGSZ);
716 return 0;
718 if (oc == (O_CALL | O_SYM)) {
719 os("\xe8", 1); /* call $x */
720 i_rel(r1, OUT_CS | OUT_RLREL, opos());
721 oi(-4, 4);
722 return 0;
724 if (oc == (O_MOV | O_SYM)) {
725 i_sym(rd, r1, r2);
726 return 0;
728 if (oc == (O_MOV | O_NUM)) {
729 i_num(rd, r1);
730 return 0;
732 if (oc == O_MSET) {
733 os("\xfc\xf3\xaa", 3); /* cld; rep stosb */
734 return 0;
736 if (oc == O_MCPY) {
737 os("\xfc\xf3\xa4", 3); /* cld; rep movs */
738 return 0;
740 if (oc == O_RET) {
741 jmp_ret = opos();
742 jmp_add(O_JMP, i_jmp(op, 4), 0);
743 return 0;
745 if (oc == (O_LD | O_NUM)) {
746 op_rm(movrx_op(bt, I_MOVR), rd, r1, r2, movrx_bt(bt));
747 return 0;
749 if (oc == (O_ST | O_NUM)) {
750 op_rm(I_MOV, r1, r2, r3, bt);
751 return 0;
753 if (oc == O_MOV) {
754 i_cast(rd, r1, bt);
755 return 0;
757 if (oc & O_JXX) {
758 i_jcmp(op, r1, r2);
759 jmp_add(op, i_jmp(op, 4), r3 + 1);
760 return 0;
762 return 1;