int: load assignment destination last if possible
[neatcc.git] / int.c
blobbdcf4f0986f790271660f4846d57c1c2b2324b00
1 /* neatcc intermediate code generation */
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include "ncc.h"
7 static struct ic *ic; /* intermediate code */
8 static long ic_n, ic_sz; /* number of instructions in ic[] */
9 static long iv[NTMPS]; /* operand stack */
10 static long iv_n; /* number of values in iv[] */
11 static long *lab_loc; /* label locations */
12 static long lab_n, lab_sz; /* number of labels in lab_loc[] */
13 static long lab_last; /* the last label target */
15 static int io_num(void);
16 static int io_mul2(void);
17 static int io_cmp(void);
18 static int io_jmp(void);
19 static int io_addr(void);
20 static int io_loc(void);
21 static int io_imm(void);
22 static int io_call(void);
23 static void io_deadcode(void);
25 static void iv_put(long n);
27 static struct ic *ic_put(long op, long arg1, long arg2, long arg3)
29 struct ic *c;
30 if (ic_n == ic_sz) {
31 ic_sz = MAX(128, ic_sz * 2);
32 ic = mextend(ic, ic_n, ic_sz, sizeof(*ic));
34 c = &ic[ic_n++];
35 c->op = op;
36 c->a1 = arg1;
37 c->a2 = arg2;
38 c->a3 = arg3;
39 if (op & O_OUT)
40 iv_put(ic_n - 1);
41 return c;
44 static void ic_back(long pos)
46 int i;
47 for (i = pos; i < ic_n; i++)
48 if (ic[i].op & O_CALL)
49 free(ic[i].args);
50 ic_n = pos;
53 static long iv_pop(void)
55 return iv[--iv_n];
58 static long iv_get(int n)
60 return iv[iv_n - n - 1];
63 static void iv_put(long n)
65 iv[iv_n++] = n;
68 static void iv_drop(int n)
70 iv_n = MAX(0, iv_n - n);
73 static void iv_swap(int x, int y)
75 long v = iv[iv_n - x - 1];
76 iv[iv_n - x - 1] = iv[iv_n - y - 1];
77 iv[iv_n - y - 1] = v;
80 static void iv_dup(void)
82 iv[iv_n] = iv[iv_n - 1];
83 iv_n++;
86 void o_num(long n)
88 ic_put(O_MOV | O_NUM, n, 0, 0);
91 void o_local(long id)
93 ic_put(O_MOV | O_LOC, id, 0, 0);
96 void o_sym(char *sym)
98 ic_put(O_MOV | O_SYM, out_sym(sym), 0, 0);
101 void o_tmpdrop(int n)
103 iv_drop(n >= 0 ? n : iv_n);
106 void o_tmpswap(void)
108 iv_swap(0, 1);
111 void o_tmpcopy(void)
113 iv_dup();
116 /* return one if the given value is constant */
117 static int ic_const(long iv)
119 long oc = O_C(ic[iv].op);
120 return oc & O_MOV && oc & (O_NUM | O_SYM | O_LOC);
123 /* return one if the given value is a simple load */
124 static int ic_load(long iv)
126 long oc = O_C(ic[iv].op);
127 int i;
128 if (oc & O_LD && oc & (O_NUM | O_SYM | O_LOC)) {
129 for (i = iv + 1; i < ic_n; i++)
130 if (ic[i].op & O_ST)
131 return 0;
132 return 1;
134 return 0;
137 void o_bop(long op)
139 int r1 = iv_pop();
140 int r2 = iv_pop();
141 if (ic_const(r2) && !ic_const(r1)) { /* load constants last */
142 ic_put(ic[r2].op, ic[r2].a1, ic[r2].a2, ic[r2].a3);
143 r2 = iv_pop();
145 ic_put(op, r2, r1, 0);
146 io_num() && io_mul2() && io_addr() && io_imm();
149 void o_uop(long op)
151 int r1 = iv_pop();
152 ic_put(op, r1, 0, 0);
153 if (io_num())
154 io_cmp();
157 void o_assign(long bt)
159 int rv = iv_pop();
160 int lv = iv_pop();
161 if (ic_const(lv) || ic_load(lv)) { /* load constants last */
162 ic_put(ic[lv].op, ic[lv].a1, ic[lv].a2, ic[lv].a3);
163 lv = iv_pop();
165 ic_put(O_MK(O_ST | O_NUM, bt), rv, lv, 0);
166 iv_put(rv);
167 io_loc();
170 void o_deref(long bt)
172 int r1 = iv_pop();
173 ic_put(O_MK(O_LD | O_NUM, bt), r1, 0, 0);
174 io_loc();
177 void o_cast(long bt)
179 if (T_SZ(bt) != ULNG) {
180 int r1 = iv_pop();
181 ic_put(O_MK(O_MOV, bt), r1, 0, 0);
182 io_num();
186 void o_memcpy(void)
188 int r2 = iv_pop();
189 int r1 = iv_pop();
190 int r0 = iv_pop();
191 ic_put(O_MCPY, r0, r1, r2);
194 void o_memset(void)
196 int r2 = iv_pop();
197 int r1 = iv_pop();
198 int r0 = iv_pop();
199 ic_put(O_MSET, r0, r1, r2);
202 void o_call(int argc, int ret)
204 struct ic *c;
205 long *args = malloc(argc * sizeof(c->args[0]));
206 int r1, i;
207 for (i = argc - 1; i >= 0; --i)
208 args[i] = iv_pop();
209 for (i = argc - 1; i >= 0; --i) {
210 int iv = args[i];
211 if (ic_const(iv) || ic_load(iv)) { /* load constants last */
212 ic_put(ic[iv].op, ic[iv].a1, ic[iv].a2, ic[iv].a3);
213 args[i] = iv_pop();
216 r1 = iv_pop();
217 c = ic_put(O_CALL, r1, 0, argc);
218 c->args = args;
219 iv_drop(ret == 0);
220 io_call();
223 void o_ret(int ret)
225 if (!ret)
226 o_num(0);
227 ic_put(O_RET, iv_pop(), 0, 0);
230 void o_label(long id)
232 while (id >= lab_sz) {
233 lab_sz = MAX(128, lab_sz * 2);
234 lab_loc = mextend(lab_loc, lab_n, lab_sz, sizeof(*lab_loc));
236 while (lab_n <= id)
237 lab_loc[lab_n++] = -1;
238 lab_loc[id] = ic_n;
239 lab_last = ic_n;
242 void o_jmp(long id)
244 ic_put(O_JMP, 0, 0, id);
247 void o_jz(long id)
249 ic_put(O_JZ, iv_pop(), 0, id);
250 io_jmp();
253 int o_popnum(long *n)
255 if (ic_num(ic, iv_get(0), n))
256 return 1;
257 iv_drop(1);
258 return 0;
261 int o_popsym(long *sym, long *off)
263 if (ic_sym(ic, iv_get(0), sym, off))
264 return 1;
265 iv_drop(1);
266 return 0;
269 long o_mark(void)
271 return ic_n;
274 void o_back(long mark)
276 ic_back(mark);
279 void ic_get(struct ic **c, long *n)
281 int i;
282 if (!ic_n || ~ic[ic_n - 1].op & O_RET || lab_last == ic_n)
283 o_ret(0);
284 for (i = 0; i < ic_n; i++) /* filling branch targets */
285 if (ic[i].op & O_JXX)
286 ic[i].a3 = lab_loc[ic[i].a3];
287 io_deadcode(); /* removing dead code */
288 *c = ic;
289 *n = ic_n;
290 ic = NULL;
291 ic_n = 0;
292 ic_sz = 0;
293 iv_n = 0;
294 free(lab_loc);
295 lab_loc = NULL;
296 lab_n = 0;
297 lab_sz = 0;
298 lab_last = 0;
301 void ic_free(struct ic *ic)
303 if (ic->op & O_CALL)
304 free(ic->args);
307 /* intermediate code queries */
309 static long cb(long op, long a, long b)
311 switch (O_C(op)) {
312 case O_ADD:
313 return a + b;
314 case O_SUB:
315 return a - b;
316 case O_AND:
317 return a & b;
318 case O_OR:
319 return a | b;
320 case O_XOR:
321 return a ^ b;
322 case O_MUL:
323 return a * b;
324 case O_DIV:
325 return a / b;
326 case O_MOD:
327 return a % b;
328 case O_SHL:
329 return a << b;
330 case O_SHR:
331 return O_T(op) & T_MSIGN ? a >> b : (unsigned long) a >> b;
332 case O_LT:
333 return a < b;
334 case O_GT:
335 return a > b;
336 case O_LE:
337 return a <= b;
338 case O_GE:
339 return a >= b;
340 case O_EQ:
341 return a == b;
342 case O_NE:
343 return a != b;
345 return 0;
348 static long cu(int op, long i)
350 switch (O_C(op)) {
351 case O_NEG:
352 return -i;
353 case O_NOT:
354 return ~i;
355 case O_LNOT:
356 return !i;
358 return 0;
361 static long c_cast(long n, unsigned bt)
363 if (!(bt & T_MSIGN) && T_SZ(bt) != ULNG)
364 n &= ((1l << (long) (T_SZ(bt) * 8)) - 1);
365 if (bt & T_MSIGN && T_SZ(bt) != ULNG &&
366 n > (1l << (T_SZ(bt) * 8 - 1)))
367 n = -((1l << (T_SZ(bt) * 8)) - n);
368 return n;
371 int ic_num(struct ic *ic, long iv, long *n)
373 long n1, n2;
374 long oc = O_C(ic[iv].op);
375 long bt = O_T(ic[iv].op);
376 if (oc & O_MOV && oc & O_NUM) {
377 *n = ic[iv].a1;
378 return 0;
380 if (oc & O_BOP) {
381 if (ic_num(ic, ic[iv].a1, &n1))
382 return 1;
383 if (ic_num(ic, ic[iv].a2, &n2))
384 return 1;
385 *n = cb(ic[iv].op, n1, n2);
386 return 0;
388 if (oc & O_UOP) {
389 if (ic_num(ic, ic[iv].a1, &n1))
390 return 1;
391 *n = cu(ic[iv].op, n1);
392 return 0;
394 if (oc & O_MOV && !(oc & (O_NUM | O_LOC | O_SYM))) {
395 if (ic_num(ic, ic[iv].a1, &n1))
396 return 1;
397 *n = c_cast(n1, bt);
398 return 0;
400 return 1;
403 int ic_sym(struct ic *ic, long iv, long *sym, long *off)
405 long n;
406 long oc = O_C(ic[iv].op);
407 if (oc & O_MOV && oc & O_SYM) {
408 *sym = ic[iv].a1;
409 *off = ic[iv].a2;
410 return 0;
412 if (oc == O_ADD) {
413 if ((ic_sym(ic, ic[iv].a1, sym, off) ||
414 ic_num(ic, ic[iv].a2, &n)) &&
415 (ic_sym(ic, ic[iv].a2, sym, off) ||
416 ic_num(ic, ic[iv].a1, &n)))
417 return 1;
418 *off += n;
419 return 0;
421 if (oc == O_SUB) {
422 if (ic_sym(ic, ic[iv].a1, sym, off) ||
423 ic_num(ic, ic[iv].a2, &n))
424 return 1;
425 *off -= n;
426 return 0;
428 return 1;
431 static int ic_off(struct ic *ic, long iv, long *base_iv, long *off)
433 long n;
434 long oc = O_C(ic[iv].op);
435 if (oc != O_ADD && oc != O_SUB) {
436 *base_iv = iv;
437 *off = 0;
438 return 0;
440 if (oc == O_ADD) {
441 if ((ic_off(ic, ic[iv].a1, base_iv, off) ||
442 ic_num(ic, ic[iv].a2, &n)) &&
443 (ic_off(ic, ic[iv].a2, base_iv, off) ||
444 ic_num(ic, ic[iv].a1, &n)))
445 return 1;
446 *off += n;
447 return 0;
449 if (oc == O_SUB) {
450 if (ic_off(ic, ic[iv].a1, base_iv, off) ||
451 ic_num(ic, ic[iv].a2, &n))
452 return 1;
453 *off -= n;
454 return 0;
456 return 1;
459 /* number of register arguments */
460 int ic_regcnt(struct ic *ic)
462 long o = ic->op;
463 if (o & O_BOP)
464 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
465 if (o & O_UOP)
466 return o & (O_NUM | O_SYM | O_LOC) ? 0 : 1;
467 if (o & O_CALL)
468 return o & (O_NUM | O_SYM | O_LOC) ? 0 : 1;
469 if (o & O_MOV)
470 return o & (O_NUM | O_SYM | O_LOC) ? 0 : 1;
471 if (o & O_MEM)
472 return 3;
473 if (o & O_JMP)
474 return 0;
475 if (o & O_JZ)
476 return 1;
477 if (o & O_JCC)
478 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
479 if (o & O_RET)
480 return 1;
481 if (o & O_LD)
482 return ((o & O_NUM) != 0);
483 if (o & O_ST)
484 return 1 + ((o & O_NUM) != 0);
485 return 0;
489 * The returned array indicates the last instruction in
490 * which the value produced by each instruction is used.
492 long *ic_lastuse(struct ic *ic, long ic_n)
494 long *luse = calloc(ic_n, sizeof(luse[0]));
495 int i, j;
496 for (i = ic_n - 1; i >= 0; --i) {
497 int n = ic_regcnt(ic + i);
498 if (!luse[i])
499 if (!(ic[i].op & O_OUT) || ic[i].op & O_CALL)
500 luse[i] = -1;
501 if (!luse[i])
502 continue;
503 if (n >= 1 && !luse[ic[i].a1])
504 luse[ic[i].a1] = i;
505 if (n >= 2 && !luse[ic[i].a2])
506 luse[ic[i].a2] = i;
507 if (n >= 3 && !luse[ic[i].a3])
508 luse[ic[i].a3] = i;
509 if (ic[i].op & O_CALL)
510 for (j = 0; j < ic[i].a3; j++)
511 if (!luse[ic[i].args[j]])
512 luse[ic[i].args[j]] = i;
514 return luse;
517 /* intermediate code optimisations */
519 /* constant folding */
520 static int io_num(void)
522 long n;
523 if (!ic_num(ic, iv_get(0), &n)) {
524 iv_drop(1);
525 o_num(n);
526 return 0;
528 return 1;
531 static int log2a(unsigned long n)
533 int i = 0;
534 for (i = 0; i < LONGSZ * 8; i++)
535 if (n & (1u << i))
536 break;
537 if (i == LONGSZ * 8 || !(n >> (i + 1)))
538 return i;
539 return -1;
542 static long iv_num(long n)
544 o_num(n);
545 return iv_pop();
548 /* optimised multiplication operations for powers of two */
549 static int io_mul2(void)
551 long iv = iv_get(0);
552 long n, p;
553 long r1, r2;
554 long oc = O_C(ic[iv].op);
555 long bt = O_T(ic[iv].op);
556 if (!(oc & O_MUL))
557 return 1;
558 if (oc == O_MUL && !ic_num(ic, ic[iv].a1, &n)) {
559 long t = ic[iv].a1;
560 ic[iv].a1 = ic[iv].a2;
561 ic[iv].a2 = t;
563 if (ic_num(ic, ic[iv].a2, &n))
564 return 1;
565 p = log2a(n);
566 if (n && p < 0)
567 return 1;
568 if (oc == O_MUL) {
569 iv_drop(1);
570 if (n == 1) {
571 iv_put(ic[iv].a1);
572 return 0;
574 if (n == 0) {
575 o_num(0);
576 return 0;
578 r2 = iv_num(p);
579 ic_put(O_MK(O_SHL, ULNG), ic[iv].a1, r2, 0);
580 return 0;
582 if (oc == O_DIV && ~bt & T_MSIGN) {
583 iv_drop(1);
584 if (n == 1) {
585 iv_put(ic[iv].a1);
586 return 0;
588 r2 = iv_num(p);
589 ic_put(O_MK(O_SHR, ULNG), ic[iv].a1, r2, 0);
590 return 0;
592 if (oc == O_MOD && ~bt & T_MSIGN) {
593 iv_drop(1);
594 if (n == 1) {
595 o_num(0);
596 return 0;
598 r2 = iv_num(LONGSZ * 8 - p);
599 ic_put(O_MK(O_SHL, ULNG), ic[iv].a1, r2, 0);
600 r1 = iv_pop();
601 r2 = iv_num(LONGSZ * 8 - p);
602 ic_put(O_MK(O_SHR, ULNG), r1, r2, 0);
603 return 0;
605 return 1;
608 /* optimise comparison */
609 static int io_cmp(void)
611 long iv = iv_get(0);
612 long cmp = ic[iv].a1;
613 if (O_C(ic[iv].op) == O_LNOT && ic[cmp].op & O_CMP) {
614 iv_drop(1);
615 ic[cmp].op ^= 1;
616 iv_put(cmp);
617 return 0;
619 return 1;
622 /* optimise branch instructions after comparison */
623 static int io_jmp(void)
625 struct ic *c = &ic[ic_n - 1];
626 long oc = O_C(c->op);
627 if (oc & O_JZ && O_C(ic[c->a1].op) == O_LNOT) {
628 c->a1 = ic[c->a1].a1;
629 c->op ^= 1;
630 return 0;
632 if (oc & O_JZ && O_C(ic[c->a1].op) & O_CMP) {
633 long cop = (ic[c->a1].op & ~O_CMP) | O_JCC;
634 c->op = O_C(c->op) == O_JZ ? cop ^ 1 : cop;
635 c->a2 = ic[c->a1].a2;
636 c->a1 = ic[c->a1].a1;
637 return 0;
639 return 1;
642 /* optimise accessing locals or symbols with an offset */
643 static int io_addr(void)
645 long iv, off;
646 if (ic_off(ic, iv_get(0), &iv, &off) || iv == iv_get(0))
647 return 1;
648 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
649 iv_drop(1);
650 ic_put(O_MOV | O_LOC, ic[iv].a1, ic[iv].a2 + off, 0);
651 return 0;
653 if (ic[iv].op & O_MOV && ic[iv].op & O_SYM) {
654 iv_drop(1);
655 ic_put(O_MOV | O_SYM, ic[iv].a1, ic[iv].a2 + off, 0);
656 return 0;
658 return 1;
661 /* optimise loading and storing locals */
662 static int io_loc(void)
664 struct ic *c = &ic[ic_n - 1];
665 long iv, off;
666 if (c->op & O_LD && c->op & O_NUM) {
667 if (ic_off(ic, c->a1, &iv, &off))
668 return 1;
669 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
670 c->op = (c->op & ~O_NUM) | O_LOC;
671 c->a1 = ic[iv].a1;
672 c->a2 += ic[iv].a2 + off;
673 return 0;
675 c->a1 = iv;
676 c->a2 += off;
677 return 0;
679 if (c->op & O_ST && c->op & O_NUM) {
680 if (ic_off(ic, c->a2, &iv, &off))
681 return 1;
682 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
683 c->op = (c->op & ~O_NUM) | O_LOC;
684 c->a2 = ic[iv].a1;
685 c->a3 += ic[iv].a2 + off;
686 return 0;
688 c->a2 = iv;
689 c->a3 += off;
690 return 0;
692 return 1;
695 static int imm_ok(long op, long n, int arg)
697 long m0, m1, m2, m3, mt;
698 if (i_reg(op | O_NUM, &m0, &m1, &m2, &m3, &mt))
699 return 0;
700 return i_imm(arg == 2 ? m2 : m1, n);
703 /* use instruction immediates */
704 static int io_imm(void)
706 struct ic *c = &ic[ic_n - 1];
707 long oc = O_C(c->op);
708 long n;
709 if (oc & (O_NUM | O_LOC | O_SYM))
710 return 1;
711 if (oc == O_ADD || oc == O_MUL || oc == O_AND || oc == O_OR ||
712 oc == O_XOR || oc == O_EQ || oc == O_NE) {
713 if (!ic_num(ic, c->a1, &n)) {
714 long t = c->a1;
715 c->a1 = c->a2;
716 c->a2 = t;
719 if (oc == O_LT || oc == O_GE || oc == O_LE || oc == O_GT) {
720 if (!ic_num(ic, c->a1, &n)) {
721 int t = c->a1;
722 c->a1 = c->a2;
723 c->a2 = t;
724 c->op ^= 1;
727 if (oc & O_JCC && !ic_num(ic, c->a1, &n)) {
728 int t = c->a1;
729 c->a1 = c->a2;
730 c->a2 = t;
731 c->op ^= 1;
733 if (oc & O_JCC && !ic_num(ic, c->a2, &n) && imm_ok(c->op, n, 1)) {
734 c->op |= O_NUM;
735 c->a2 = n;
736 return 0;
738 if (!(oc & O_BOP) || ic_num(ic, c->a2, &n))
739 return 1;
740 if ((oc == O_ADD || oc == O_SUB || oc & O_SHL) && n == 0) {
741 iv_drop(1);
742 iv_put(c->a1);
743 return 0;
745 if (imm_ok(c->op, n, 2)) {
746 c->op |= O_NUM;
747 c->a2 = n;
748 return 0;
750 return 1;
753 /* calling symbols */
754 static int io_call(void)
756 struct ic *c = &ic[ic_n - 1];
757 long sym, off;
758 if (c->op & O_CALL && !ic_sym(ic, c->a1, &sym, &off) && !off) {
759 c->op |= O_SYM;
760 c->a1 = sym;
761 return 0;
763 return 1;
766 /* remove dead code */
767 static void io_deadcode(void)
769 char *live;
770 long *nidx;
771 long src = 0, dst = 0;
772 int i, j;
773 /* liveness analysis */
774 live = calloc(ic_n, sizeof(live[0]));
775 for (i = ic_n - 1; i >= 0; i--) {
776 int n = ic_regcnt(ic + i);
777 if (!(ic[i].op & O_OUT) || ic[i].op & O_CALL)
778 live[i] = 1;
779 if (!live[i])
780 continue;
781 if (n >= 1)
782 live[ic[i].a1] = 1;
783 if (n >= 2)
784 live[ic[i].a2] = 1;
785 if (n >= 3)
786 live[ic[i].a3] = 1;
787 if (ic[i].op & O_CALL)
788 for (j = 0; j < ic[i].a3; j++)
789 live[ic[i].args[j]] = 1;
791 /* the new indices of intermediate instructions */
792 nidx = calloc(ic_n, sizeof(nidx[0]));
793 while (src < ic_n) {
794 while (src < ic_n && !live[src]) {
795 nidx[src] = dst;
796 ic_free(&ic[src++]);
798 if (src < ic_n) {
799 nidx[src] = dst;
800 if (src != dst)
801 memcpy(ic + dst, ic + src, sizeof(ic[src]));
802 src++;
803 dst++;
806 ic_n = dst;
807 /* adjusting arguments and branch targets */
808 for (i = 0; i < ic_n; i++) {
809 int n = ic_regcnt(ic + i);
810 if (n >= 1)
811 ic[i].a1 = nidx[ic[i].a1];
812 if (n >= 2)
813 ic[i].a2 = nidx[ic[i].a2];
814 if (n >= 3)
815 ic[i].a3 = nidx[ic[i].a3];
816 if (ic[i].op & O_JXX)
817 ic[i].a3 = nidx[ic[i].a3];
818 if (ic[i].op & O_CALL)
819 for (j = 0; j < ic[i].a3; j++)
820 ic[i].args[j] = nidx[ic[i].args[j]];
822 free(live);
823 free(nidx);