int: try to add address offsets before instruction immediates
[neatcc.git] / int.c
blob5d6536cebaae143b53aa64187081d3ce92f6915d
1 /* neatcc intermediate code generation */
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include "ncc.h"
7 static struct ic *ic; /* intermediate code */
8 static long ic_n, ic_sz; /* number of instructions in ic[] */
9 static long iv[NTMPS]; /* operand stack */
10 static long iv_n; /* number of values in iv[] */
11 static long *lab_loc; /* label locations */
12 static long lab_n, lab_sz; /* number of labels in lab_loc[] */
13 static long lab_last; /* the last label target */
15 static int io_num(void);
16 static int io_mul2(void);
17 static int io_cmp(void);
18 static int io_jmp(void);
19 static int io_addr(void);
20 static int io_loc(void);
21 static int io_imm(void);
22 static int io_call(void);
23 static void io_deadcode(void);
25 static struct ic *ic_new(void)
27 if (ic_n == ic_sz) {
28 ic_sz = MAX(128, ic_sz * 2);
29 ic = mextend(ic, ic_n, ic_sz, sizeof(*ic));
31 return &ic[ic_n++];
34 static struct ic *ic_put(long op, long arg0, long arg1, long arg2)
36 struct ic *c = ic_new();
37 c->op = op;
38 c->arg0 = arg0;
39 c->arg1 = arg1;
40 c->arg2 = arg2;
41 return c;
44 static void ic_back(long pos)
46 int i;
47 for (i = pos; i < ic_n; i++)
48 if (ic[i].op & O_CALL)
49 free(ic[i].args);
50 ic_n = pos;
53 static long iv_pop(void)
55 return iv[--iv_n];
58 static long iv_get(int n)
60 return iv[iv_n - n - 1];
63 static long iv_new(void)
65 iv[iv_n] = ic_n;
66 return iv[iv_n++];
69 static void iv_put(long n)
71 iv[iv_n++] = n;
74 static void iv_drop(int n)
76 iv_n = MAX(0, iv_n - n);
79 static void iv_swap(int x, int y)
81 long v = iv[iv_n - x - 1];
82 iv[iv_n - x - 1] = iv[iv_n - y - 1];
83 iv[iv_n - y - 1] = v;
86 static void iv_dup(void)
88 iv[iv_n] = iv[iv_n - 1];
89 iv_n++;
92 void o_num(long n)
94 ic_put(O_MOV | O_NUM, iv_new(), n, 0);
97 void o_local(long id)
99 ic_put(O_MOV | O_LOC, iv_new(), id, 0);
102 void o_sym(char *sym)
104 ic_put(O_MOV | O_SYM, iv_new(), out_sym(sym), 0);
107 void o_tmpdrop(int n)
109 iv_drop(n >= 0 ? n : iv_n);
112 void o_tmpswap(void)
114 iv_swap(0, 1);
117 void o_tmpcopy(void)
119 iv_dup();
122 void o_bop(long op)
124 int r1 = iv_pop();
125 int r2 = iv_pop();
126 ic_put(op, iv_new(), r2, r1);
127 io_num() && io_mul2() && io_addr() && io_imm();
130 void o_uop(long op)
132 int r1 = iv_pop();
133 ic_put(op, iv_new(), r1, 0);
134 if (io_num())
135 io_cmp();
138 void o_assign(long bt)
140 ic_put(O_MK(O_ST | O_NUM, bt), iv_get(0), iv_get(1), 0);
141 iv_swap(0, 1);
142 iv_pop();
143 io_loc();
146 void o_deref(long bt)
148 int r1 = iv_pop();
149 ic_put(O_MK(O_LD | O_NUM, bt), iv_new(), r1, 0);
150 io_loc();
153 void o_cast(long bt)
155 if (T_SZ(bt) != ULNG) {
156 int r1 = iv_pop();
157 ic_put(O_MK(O_MOV, bt), iv_new(), r1, 0);
158 io_num();
162 void o_memcpy(void)
164 int r2 = iv_pop();
165 int r1 = iv_pop();
166 int r0 = iv_pop();
167 ic_put(O_MCPY, r0, r1, r2);
170 void o_memset(void)
172 int r2 = iv_pop();
173 int r1 = iv_pop();
174 int r0 = iv_pop();
175 ic_put(O_MSET, r0, r1, r2);
178 void o_call(int argc, int ret)
180 struct ic *c;
181 long *args = malloc(argc * sizeof(c->args[0]));
182 int r1, i;
183 for (i = argc - 1; i >= 0; --i)
184 args[i] = iv_pop();
185 r1 = iv_pop();
186 c = ic_put(O_CALL, iv_new(), r1, argc);
187 c->args = args;
188 iv_drop(ret == 0);
189 io_call();
192 void o_ret(int ret)
194 if (!ret)
195 o_num(0);
196 ic_put(O_RET, iv_pop(), 0, 0);
199 void o_label(long id)
201 while (id >= lab_sz) {
202 lab_sz = MAX(128, lab_sz * 2);
203 lab_loc = mextend(lab_loc, lab_n, lab_sz, sizeof(*lab_loc));
205 while (lab_n <= id)
206 lab_loc[lab_n++] = -1;
207 lab_loc[id] = ic_n;
208 lab_last = ic_n;
211 void o_jmp(long id)
213 ic_put(O_JMP, 0, 0, id);
216 void o_jz(long id)
218 ic_put(O_JZ, iv_pop(), 0, id);
219 io_jmp();
222 int o_popnum(long *n)
224 if (ic_num(ic, iv_get(0), n))
225 return 1;
226 iv_drop(1);
227 return 0;
230 int o_popsym(long *sym, long *off)
232 if (ic_sym(ic, iv_get(0), sym, off))
233 return 1;
234 iv_drop(1);
235 return 0;
238 long o_mark(void)
240 return ic_n;
243 void o_back(long mark)
245 ic_back(mark);
248 void ic_get(struct ic **c, long *n)
250 int i;
251 if (!ic_n || ~ic[ic_n - 1].op & O_RET || lab_last == ic_n)
252 o_ret(0);
253 for (i = 0; i < ic_n; i++) /* filling branch targets */
254 if (ic[i].op & O_JXX)
255 ic[i].arg2 = lab_loc[ic[i].arg2];
256 io_deadcode(); /* removing dead code */
257 *c = ic;
258 *n = ic_n;
259 ic = NULL;
260 ic_n = 0;
261 ic_sz = 0;
262 iv_n = 0;
263 free(lab_loc);
264 lab_loc = NULL;
265 lab_n = 0;
266 lab_sz = 0;
267 lab_last = 0;
270 void ic_free(struct ic *ic)
272 if (ic->op & O_CALL)
273 free(ic->args);
276 /* intermediate code queries */
278 static long cb(long op, long a, long b)
280 switch (O_C(op)) {
281 case O_ADD:
282 return a + b;
283 case O_SUB:
284 return a - b;
285 case O_AND:
286 return a & b;
287 case O_OR:
288 return a | b;
289 case O_XOR:
290 return a ^ b;
291 case O_MUL:
292 return a * b;
293 case O_DIV:
294 return a / b;
295 case O_MOD:
296 return a % b;
297 case O_SHL:
298 return a << b;
299 case O_SHR:
300 return O_T(op) & T_MSIGN ? a >> b : (unsigned long) a >> b;
301 case O_LT:
302 return a < b;
303 case O_GT:
304 return a > b;
305 case O_LE:
306 return a <= b;
307 case O_GE:
308 return a >= b;
309 case O_EQ:
310 return a == b;
311 case O_NE:
312 return a != b;
314 return 0;
317 static long cu(int op, long i)
319 switch (O_C(op)) {
320 case O_NEG:
321 return -i;
322 case O_NOT:
323 return ~i;
324 case O_LNOT:
325 return !i;
327 return 0;
330 static long c_cast(long n, unsigned bt)
332 if (!(bt & T_MSIGN) && T_SZ(bt) != ULNG)
333 n &= ((1l << (long) (T_SZ(bt) * 8)) - 1);
334 if (bt & T_MSIGN && T_SZ(bt) != ULNG &&
335 n > (1l << (T_SZ(bt) * 8 - 1)))
336 n = -((1l << (T_SZ(bt) * 8)) - n);
337 return n;
340 int ic_num(struct ic *ic, long iv, long *n)
342 long n1, n2;
343 long oc = O_C(ic[iv].op);
344 long bt = O_T(ic[iv].op);
345 if (oc & O_MOV && oc & O_NUM) {
346 *n = ic[iv].arg1;
347 return 0;
349 if (oc & O_BOP) {
350 if (ic_num(ic, ic[iv].arg1, &n1))
351 return 1;
352 if (ic_num(ic, ic[iv].arg2, &n2))
353 return 1;
354 *n = cb(ic[iv].op, n1, n2);
355 return 0;
357 if (oc & O_UOP) {
358 if (ic_num(ic, ic[iv].arg1, &n1))
359 return 1;
360 *n = cu(ic[iv].op, n1);
361 return 0;
363 if (oc & O_MOV && !(oc & (O_NUM | O_LOC | O_SYM))) {
364 if (ic_num(ic, ic[iv].arg1, &n1))
365 return 1;
366 *n = c_cast(n1, bt);
367 return 0;
369 return 1;
372 int ic_sym(struct ic *ic, long iv, long *sym, long *off)
374 long n;
375 long oc = O_C(ic[iv].op);
376 if (oc & O_MOV && oc & O_SYM) {
377 *sym = ic[iv].arg1;
378 *off = ic[iv].arg2;
379 return 0;
381 if (oc == O_ADD) {
382 if ((ic_sym(ic, ic[iv].arg1, sym, off) ||
383 ic_num(ic, ic[iv].arg2, &n)) &&
384 (ic_sym(ic, ic[iv].arg2, sym, off) ||
385 ic_num(ic, ic[iv].arg1, &n)))
386 return 1;
387 *off += n;
388 return 0;
390 if (oc == O_SUB) {
391 if (ic_sym(ic, ic[iv].arg1, sym, off) ||
392 ic_num(ic, ic[iv].arg2, &n))
393 return 1;
394 *off -= n;
395 return 0;
397 return 1;
400 static int ic_off(struct ic *ic, long iv, long *base_iv, long *off)
402 long n;
403 long oc = O_C(ic[iv].op);
404 if (oc != O_ADD && oc != O_SUB) {
405 *base_iv = iv;
406 *off = 0;
407 return 0;
409 if (oc == O_ADD) {
410 if ((ic_off(ic, ic[iv].arg1, base_iv, off) ||
411 ic_num(ic, ic[iv].arg2, &n)) &&
412 (ic_off(ic, ic[iv].arg2, base_iv, off) ||
413 ic_num(ic, ic[iv].arg1, &n)))
414 return 1;
415 *off += n;
416 return 0;
418 if (oc == O_SUB) {
419 if (ic_off(ic, ic[iv].arg1, base_iv, off) ||
420 ic_num(ic, ic[iv].arg2, &n))
421 return 1;
422 *off -= n;
423 return 0;
425 return 1;
428 /* number of register arguments */
429 int ic_regcnt(struct ic *ic)
431 long o = ic->op;
432 if (o & O_BOP)
433 return o & (O_NUM | O_SYM | O_LOC) ? 2 : 3;
434 if (o & O_UOP)
435 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
436 if (o & O_CALL)
437 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
438 if (o & O_MOV)
439 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
440 if (o & O_MEM)
441 return 3;
442 if (o & O_JMP)
443 return 0;
444 if (o & O_JZ)
445 return 1;
446 if (o & O_JCC)
447 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
448 if (o & O_RET)
449 return 1;
450 if (o & (O_LD | O_ST) && o & (O_SYM | O_LOC))
451 return 1;
452 if (o & (O_LD | O_ST))
453 return o & O_NUM ? 2 : 3;
454 return 0;
457 /* return the values written to and read from in the given instruction */
458 static void ic_info(struct ic *ic, long **w, long **r1, long **r2, long **r3)
460 long n = ic_regcnt(ic);
461 long o = ic->op & O_OUT;
462 *r1 = NULL;
463 *r2 = NULL;
464 *r3 = NULL;
465 *w = NULL;
466 if (o) {
467 *w = &ic->arg0;
468 *r1 = n >= 2 ? &ic->arg1 : NULL;
469 *r2 = n >= 3 ? &ic->arg2 : NULL;
470 } else {
471 *r1 = n >= 1 ? &ic->arg0 : NULL;
472 *r2 = n >= 2 ? &ic->arg1 : NULL;
473 *r3 = n >= 3 ? &ic->arg2 : NULL;
478 * The returned array indicates the last instruction in
479 * which the value produced by each instruction is used.
481 long *ic_lastuse(struct ic *ic, long ic_n)
483 long *luse = calloc(ic_n, sizeof(luse[0]));
484 int i, j;
485 for (i = ic_n - 1; i >= 0; --i) {
486 long *w, *r1, *r2, *r3;
487 ic_info(ic + i, &w, &r1, &r2, &r3);
488 if (!luse[i])
489 if (!w || ic[i].op & O_CALL)
490 luse[i] = -1;
491 if (!luse[i])
492 continue;
493 if (r1 && !luse[*r1])
494 luse[*r1] = i;
495 if (r2 && !luse[*r2])
496 luse[*r2] = i;
497 if (r3 && !luse[*r3])
498 luse[*r3] = i;
499 if (ic[i].op & O_CALL)
500 for (j = 0; j < ic[i].arg2; j++)
501 if (!luse[ic[i].args[j]])
502 luse[ic[i].args[j]] = i;
504 return luse;
507 /* intermediate code optimisations */
509 /* constant folding */
510 static int io_num(void)
512 long n;
513 if (!ic_num(ic, iv_get(0), &n)) {
514 iv_drop(1);
515 o_num(n);
516 return 0;
518 return 1;
521 static int log2a(unsigned long n)
523 int i = 0;
524 for (i = 0; i < LONGSZ * 8; i++)
525 if (n & (1u << i))
526 break;
527 if (i == LONGSZ * 8 || !(n >> (i + 1)))
528 return i;
529 return -1;
532 static long iv_num(long n)
534 o_num(n);
535 return iv_pop();
538 /* optimised multiplication operations for powers of two */
539 static int io_mul2(void)
541 long iv = iv_get(0);
542 long n, p;
543 long r1, r2;
544 long oc = O_C(ic[iv].op);
545 long bt = O_T(ic[iv].op);
546 if (!(oc & O_MUL))
547 return 1;
548 if (oc == O_MUL && !ic_num(ic, ic[iv].arg1, &n)) {
549 long t = ic[iv].arg1;
550 ic[iv].arg1 = ic[iv].arg2;
551 ic[iv].arg2 = t;
553 if (ic_num(ic, ic[iv].arg2, &n))
554 return 1;
555 p = log2a(n);
556 if (n && p < 0)
557 return 1;
558 if (oc == O_MUL) {
559 iv_drop(1);
560 if (n == 1) {
561 iv_put(ic[iv].arg1);
562 return 0;
564 if (n == 0) {
565 o_num(0);
566 return 0;
568 r2 = iv_num(p);
569 ic_put(O_MK(O_SHL, ULNG), iv_new(), ic[iv].arg1, r2);
570 return 0;
572 if (oc == O_DIV && ~bt & T_MSIGN) {
573 iv_drop(1);
574 if (n == 1) {
575 iv_put(ic[iv].arg1);
576 return 0;
578 r2 = iv_num(p);
579 ic_put(O_MK(O_SHR, ULNG), iv_new(), ic[iv].arg1, r2);
580 return 0;
582 if (oc == O_MOD && ~bt & T_MSIGN) {
583 iv_drop(1);
584 if (n == 1) {
585 o_num(0);
586 return 0;
588 r2 = iv_num(LONGSZ * 8 - p);
589 ic_put(O_MK(O_SHL, ULNG), iv_new(), ic[iv].arg1, r2);
590 r1 = iv_pop();
591 r2 = iv_num(LONGSZ * 8 - p);
592 ic_put(O_MK(O_SHR, ULNG), iv_new(), r1, r2);
593 return 0;
595 return 1;
598 /* optimise comparison */
599 static int io_cmp(void)
601 long iv = iv_get(0);
602 long cmp = ic[iv].arg1;
603 if (O_C(ic[iv].op) == O_LNOT && ic[cmp].op & O_CMP) {
604 iv_drop(1);
605 ic[cmp].op ^= 1;
606 iv_put(ic[cmp].arg0);
607 return 0;
609 return 1;
612 /* optimise branch instructions after comparison */
613 static int io_jmp(void)
615 struct ic *c = &ic[ic_n - 1];
616 long oc = O_C(c->op);
617 if (oc & O_JZ && O_C(ic[c->arg0].op) == O_LNOT) {
618 c->arg0 = ic[c->arg0].arg1;
619 c->op ^= 1;
620 return 0;
622 if (oc & O_JZ && O_C(ic[c->arg0].op) & O_CMP) {
623 long cop = (ic[c->arg0].op & ~O_CMP) | O_JCC;
624 c->op = O_C(c->op) == O_JZ ? cop ^ 1 : cop;
625 c->arg1 = ic[c->arg0].arg2;
626 c->arg0 = ic[c->arg0].arg1;
627 return 0;
629 return 1;
632 /* optimise accessing locals or symbols with an offset */
633 static int io_addr(void)
635 long iv, off;
636 if (ic_off(ic, iv_get(0), &iv, &off) || iv == iv_get(0))
637 return 1;
638 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
639 iv_drop(1);
640 ic_put(O_MOV | O_LOC, iv_new(), ic[iv].arg1, ic[iv].arg2 + off);
641 return 0;
643 if (ic[iv].op & O_MOV && ic[iv].op & O_SYM) {
644 iv_drop(1);
645 ic_put(O_MOV | O_SYM, iv_new(), ic[iv].arg1, ic[iv].arg2 + off);
646 return 0;
648 return 1;
651 /* optimise loading and storing locals */
652 static int io_loc(void)
654 struct ic *c = &ic[ic_n - 1];
655 long iv, off;
656 if (!(c->op & (O_LD | O_ST)) || !(c->op & O_NUM))
657 return 1;
658 if (ic_off(ic, c->arg1, &iv, &off))
659 return 1;
660 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
661 c->op = (c->op & ~O_NUM) | O_LOC;
662 c->arg1 = ic[iv].arg1;
663 c->arg2 += ic[iv].arg2 + off;
664 return 0;
666 c->arg1 = iv;
667 c->arg2 += off;
668 return 0;
671 static int imm_ok(long op, long n, int arg)
673 long m0, m1, m2, mt;
674 if (i_reg(op | O_NUM, &m0, &m1, &m2, &mt))
675 return 0;
676 return i_imm(arg == 2 ? m2 : m1, n);
679 /* use instruction immediates */
680 static int io_imm(void)
682 struct ic *c = &ic[ic_n - 1];
683 long oc = O_C(c->op);
684 long n;
685 if (oc & (O_NUM | O_LOC | O_SYM))
686 return 1;
687 if (oc == O_ADD || oc == O_MUL || oc == O_AND || oc == O_OR ||
688 oc == O_XOR || oc == O_EQ || oc == O_NE) {
689 if (!ic_num(ic, c->arg1, &n)) {
690 long t = c->arg1;
691 c->arg1 = c->arg2;
692 c->arg2 = t;
695 if (oc == O_LT || oc == O_GE || oc == O_LE || oc == O_GT) {
696 if (!ic_num(ic, c->arg1, &n)) {
697 int t = c->arg1;
698 c->arg1 = c->arg2;
699 c->arg2 = t;
700 c->op ^= 1;
703 if (oc & O_JCC && !ic_num(ic, c->arg0, &n)) {
704 int t = c->arg0;
705 c->arg0 = c->arg1;
706 c->arg1 = t;
707 c->op ^= 1;
709 if (oc & O_JCC && !ic_num(ic, c->arg1, &n) && imm_ok(c->op, n, 1)) {
710 c->op |= O_NUM;
711 c->arg1 = n;
712 return 0;
714 if (!(oc & O_BOP) || ic_num(ic, c->arg2, &n))
715 return 1;
716 if ((oc == O_ADD || oc == O_SUB || oc & O_SHL) && n == 0) {
717 iv_drop(1);
718 iv_put(c->arg1);
719 return 0;
721 if (imm_ok(c->op, n, 2)) {
722 c->op |= O_NUM;
723 c->arg2 = n;
724 return 0;
726 return 1;
729 /* calling symbols */
730 static int io_call(void)
732 struct ic *c = &ic[ic_n - 1];
733 long sym, off;
734 if (c->op & O_CALL && !ic_sym(ic, c->arg1, &sym, &off) && !off) {
735 c->op |= O_SYM;
736 c->arg1 = sym;
737 return 0;
739 return 1;
742 /* remove dead code */
743 static void io_deadcode(void)
745 char *live;
746 long *nidx;
747 long src = 0, dst = 0;
748 int i, j;
749 /* liveness analysis */
750 live = calloc(ic_n, sizeof(live[0]));
751 for (i = ic_n - 1; i >= 0; i--) {
752 long *w, *r1, *r2, *r3;
753 ic_info(ic + i, &w, &r1, &r2, &r3);
754 if (!w || ic[i].op & O_CALL)
755 live[i] = 1;
756 if (!live[i])
757 continue;
758 if (r1)
759 live[*r1] = 1;
760 if (r2)
761 live[*r2] = 1;
762 if (r3)
763 live[*r3] = 1;
764 if (ic[i].op & O_CALL)
765 for (j = 0; j < ic[i].arg2; j++)
766 live[ic[i].args[j]] = 1;
768 /* the new indices of intermediate instructions */
769 nidx = calloc(ic_n, sizeof(nidx[0]));
770 while (src < ic_n) {
771 while (src < ic_n && !live[src]) {
772 nidx[src] = dst;
773 ic_free(&ic[src++]);
775 if (src < ic_n) {
776 nidx[src] = dst;
777 if (src != dst)
778 memcpy(ic + dst, ic + src, sizeof(ic[src]));
779 src++;
780 dst++;
783 ic_n = dst;
784 /* adjusting arguments and branch targets */
785 for (i = 0; i < ic_n; i++) {
786 long *w, *r1, *r2, *r3;
787 ic_info(ic + i, &w, &r1, &r2, &r3);
788 if (r1)
789 *r1 = nidx[*r1];
790 if (r2)
791 *r2 = nidx[*r2];
792 if (r3)
793 *r3 = nidx[*r3];
794 if (w)
795 *w = nidx[*w];
796 if (ic[i].op & O_JXX)
797 ic[i].arg2 = nidx[ic[i].arg2];
798 if (ic[i].op & O_CALL)
799 for (j = 0; j < ic[i].arg2; j++)
800 ic[i].args[j] = nidx[ic[i].args[j]];
802 free(live);
803 free(nidx);