int: load constant call arguments as late as possible
[neatcc.git] / int.c
blob7aa8fe4739e0c054c3fe431a2480988f949f639f
1 /* neatcc intermediate code generation */
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include "ncc.h"
7 static struct ic *ic; /* intermediate code */
8 static long ic_n, ic_sz; /* number of instructions in ic[] */
9 static long iv[NTMPS]; /* operand stack */
10 static long iv_n; /* number of values in iv[] */
11 static long *lab_loc; /* label locations */
12 static long lab_n, lab_sz; /* number of labels in lab_loc[] */
13 static long lab_last; /* the last label target */
15 static int io_num(void);
16 static int io_mul2(void);
17 static int io_cmp(void);
18 static int io_jmp(void);
19 static int io_addr(void);
20 static int io_loc(void);
21 static int io_imm(void);
22 static int io_call(void);
23 static void io_deadcode(void);
25 static struct ic *ic_new(void)
27 if (ic_n == ic_sz) {
28 ic_sz = MAX(128, ic_sz * 2);
29 ic = mextend(ic, ic_n, ic_sz, sizeof(*ic));
31 return &ic[ic_n++];
34 static struct ic *ic_put(long op, long arg0, long arg1, long arg2)
36 struct ic *c = ic_new();
37 c->op = op;
38 c->arg0 = arg0;
39 c->arg1 = arg1;
40 c->arg2 = arg2;
41 return c;
44 static void ic_back(long pos)
46 int i;
47 for (i = pos; i < ic_n; i++)
48 if (ic[i].op & O_CALL)
49 free(ic[i].args);
50 ic_n = pos;
53 static long iv_pop(void)
55 return iv[--iv_n];
58 static long iv_get(int n)
60 return iv[iv_n - n - 1];
63 static long iv_new(void)
65 iv[iv_n] = ic_n;
66 return iv[iv_n++];
69 static void iv_put(long n)
71 iv[iv_n++] = n;
74 static void iv_drop(int n)
76 iv_n = MAX(0, iv_n - n);
79 static void iv_swap(int x, int y)
81 long v = iv[iv_n - x - 1];
82 iv[iv_n - x - 1] = iv[iv_n - y - 1];
83 iv[iv_n - y - 1] = v;
86 static void iv_dup(void)
88 iv[iv_n] = iv[iv_n - 1];
89 iv_n++;
92 void o_num(long n)
94 ic_put(O_MOV | O_NUM, iv_new(), n, 0);
97 void o_local(long id)
99 ic_put(O_MOV | O_LOC, iv_new(), id, 0);
102 void o_sym(char *sym)
104 ic_put(O_MOV | O_SYM, iv_new(), out_sym(sym), 0);
107 void o_tmpdrop(int n)
109 iv_drop(n >= 0 ? n : iv_n);
112 void o_tmpswap(void)
114 iv_swap(0, 1);
117 void o_tmpcopy(void)
119 iv_dup();
122 /* return one if the given value is constant */
123 static int ic_const(long iv)
125 long oc = O_C(ic[iv].op);
126 return oc & O_MOV && oc & (O_NUM | O_SYM | O_LOC);
129 /* return one if the given value is a simple load */
130 static int ic_load(long iv)
132 long oc = O_C(ic[iv].op);
133 return oc & O_LD && oc & (O_NUM | O_SYM | O_LOC);
136 void o_bop(long op)
138 int r1 = iv_pop();
139 int r2 = iv_pop();
140 if (ic_const(r2) && !ic_const(r1)) { /* load constants last */
141 ic_put(ic[r2].op, iv_new(), ic[r2].arg1, ic[r2].arg2);
142 r2 = iv_pop();
144 ic_put(op, iv_new(), r2, r1);
145 io_num() && io_mul2() && io_addr() && io_imm();
148 void o_uop(long op)
150 int r1 = iv_pop();
151 ic_put(op, iv_new(), r1, 0);
152 if (io_num())
153 io_cmp();
156 void o_assign(long bt)
158 ic_put(O_MK(O_ST | O_NUM, bt), iv_get(0), iv_get(1), 0);
159 iv_swap(0, 1);
160 iv_pop();
161 io_loc();
164 void o_deref(long bt)
166 int r1 = iv_pop();
167 ic_put(O_MK(O_LD | O_NUM, bt), iv_new(), r1, 0);
168 io_loc();
171 void o_cast(long bt)
173 if (T_SZ(bt) != ULNG) {
174 int r1 = iv_pop();
175 ic_put(O_MK(O_MOV, bt), iv_new(), r1, 0);
176 io_num();
180 void o_memcpy(void)
182 int r2 = iv_pop();
183 int r1 = iv_pop();
184 int r0 = iv_pop();
185 ic_put(O_MCPY, r0, r1, r2);
188 void o_memset(void)
190 int r2 = iv_pop();
191 int r1 = iv_pop();
192 int r0 = iv_pop();
193 ic_put(O_MSET, r0, r1, r2);
196 void o_call(int argc, int ret)
198 struct ic *c;
199 long *args = malloc(argc * sizeof(c->args[0]));
200 int r1, i;
201 for (i = argc - 1; i >= 0; --i)
202 args[i] = iv_pop();
203 for (i = argc - 1; i >= 0; --i) {
204 int iv = args[i];
205 if (ic_const(iv) || ic_load(iv)) { /* load constants last */
206 ic_put(ic[iv].op, iv_new(), ic[iv].arg1, ic[iv].arg2);
207 args[i] = iv_pop();
210 r1 = iv_pop();
211 c = ic_put(O_CALL, iv_new(), r1, argc);
212 c->args = args;
213 iv_drop(ret == 0);
214 io_call();
217 void o_ret(int ret)
219 if (!ret)
220 o_num(0);
221 ic_put(O_RET, iv_pop(), 0, 0);
224 void o_label(long id)
226 while (id >= lab_sz) {
227 lab_sz = MAX(128, lab_sz * 2);
228 lab_loc = mextend(lab_loc, lab_n, lab_sz, sizeof(*lab_loc));
230 while (lab_n <= id)
231 lab_loc[lab_n++] = -1;
232 lab_loc[id] = ic_n;
233 lab_last = ic_n;
236 void o_jmp(long id)
238 ic_put(O_JMP, 0, 0, id);
241 void o_jz(long id)
243 ic_put(O_JZ, iv_pop(), 0, id);
244 io_jmp();
247 int o_popnum(long *n)
249 if (ic_num(ic, iv_get(0), n))
250 return 1;
251 iv_drop(1);
252 return 0;
255 int o_popsym(long *sym, long *off)
257 if (ic_sym(ic, iv_get(0), sym, off))
258 return 1;
259 iv_drop(1);
260 return 0;
263 long o_mark(void)
265 return ic_n;
268 void o_back(long mark)
270 ic_back(mark);
273 void ic_get(struct ic **c, long *n)
275 int i;
276 if (!ic_n || ~ic[ic_n - 1].op & O_RET || lab_last == ic_n)
277 o_ret(0);
278 for (i = 0; i < ic_n; i++) /* filling branch targets */
279 if (ic[i].op & O_JXX)
280 ic[i].arg2 = lab_loc[ic[i].arg2];
281 io_deadcode(); /* removing dead code */
282 *c = ic;
283 *n = ic_n;
284 ic = NULL;
285 ic_n = 0;
286 ic_sz = 0;
287 iv_n = 0;
288 free(lab_loc);
289 lab_loc = NULL;
290 lab_n = 0;
291 lab_sz = 0;
292 lab_last = 0;
295 void ic_free(struct ic *ic)
297 if (ic->op & O_CALL)
298 free(ic->args);
301 /* intermediate code queries */
303 static long cb(long op, long a, long b)
305 switch (O_C(op)) {
306 case O_ADD:
307 return a + b;
308 case O_SUB:
309 return a - b;
310 case O_AND:
311 return a & b;
312 case O_OR:
313 return a | b;
314 case O_XOR:
315 return a ^ b;
316 case O_MUL:
317 return a * b;
318 case O_DIV:
319 return a / b;
320 case O_MOD:
321 return a % b;
322 case O_SHL:
323 return a << b;
324 case O_SHR:
325 return O_T(op) & T_MSIGN ? a >> b : (unsigned long) a >> b;
326 case O_LT:
327 return a < b;
328 case O_GT:
329 return a > b;
330 case O_LE:
331 return a <= b;
332 case O_GE:
333 return a >= b;
334 case O_EQ:
335 return a == b;
336 case O_NE:
337 return a != b;
339 return 0;
342 static long cu(int op, long i)
344 switch (O_C(op)) {
345 case O_NEG:
346 return -i;
347 case O_NOT:
348 return ~i;
349 case O_LNOT:
350 return !i;
352 return 0;
355 static long c_cast(long n, unsigned bt)
357 if (!(bt & T_MSIGN) && T_SZ(bt) != ULNG)
358 n &= ((1l << (long) (T_SZ(bt) * 8)) - 1);
359 if (bt & T_MSIGN && T_SZ(bt) != ULNG &&
360 n > (1l << (T_SZ(bt) * 8 - 1)))
361 n = -((1l << (T_SZ(bt) * 8)) - n);
362 return n;
365 int ic_num(struct ic *ic, long iv, long *n)
367 long n1, n2;
368 long oc = O_C(ic[iv].op);
369 long bt = O_T(ic[iv].op);
370 if (oc & O_MOV && oc & O_NUM) {
371 *n = ic[iv].arg1;
372 return 0;
374 if (oc & O_BOP) {
375 if (ic_num(ic, ic[iv].arg1, &n1))
376 return 1;
377 if (ic_num(ic, ic[iv].arg2, &n2))
378 return 1;
379 *n = cb(ic[iv].op, n1, n2);
380 return 0;
382 if (oc & O_UOP) {
383 if (ic_num(ic, ic[iv].arg1, &n1))
384 return 1;
385 *n = cu(ic[iv].op, n1);
386 return 0;
388 if (oc & O_MOV && !(oc & (O_NUM | O_LOC | O_SYM))) {
389 if (ic_num(ic, ic[iv].arg1, &n1))
390 return 1;
391 *n = c_cast(n1, bt);
392 return 0;
394 return 1;
397 int ic_sym(struct ic *ic, long iv, long *sym, long *off)
399 long n;
400 long oc = O_C(ic[iv].op);
401 if (oc & O_MOV && oc & O_SYM) {
402 *sym = ic[iv].arg1;
403 *off = ic[iv].arg2;
404 return 0;
406 if (oc == O_ADD) {
407 if ((ic_sym(ic, ic[iv].arg1, sym, off) ||
408 ic_num(ic, ic[iv].arg2, &n)) &&
409 (ic_sym(ic, ic[iv].arg2, sym, off) ||
410 ic_num(ic, ic[iv].arg1, &n)))
411 return 1;
412 *off += n;
413 return 0;
415 if (oc == O_SUB) {
416 if (ic_sym(ic, ic[iv].arg1, sym, off) ||
417 ic_num(ic, ic[iv].arg2, &n))
418 return 1;
419 *off -= n;
420 return 0;
422 return 1;
425 static int ic_off(struct ic *ic, long iv, long *base_iv, long *off)
427 long n;
428 long oc = O_C(ic[iv].op);
429 if (oc != O_ADD && oc != O_SUB) {
430 *base_iv = iv;
431 *off = 0;
432 return 0;
434 if (oc == O_ADD) {
435 if ((ic_off(ic, ic[iv].arg1, base_iv, off) ||
436 ic_num(ic, ic[iv].arg2, &n)) &&
437 (ic_off(ic, ic[iv].arg2, base_iv, off) ||
438 ic_num(ic, ic[iv].arg1, &n)))
439 return 1;
440 *off += n;
441 return 0;
443 if (oc == O_SUB) {
444 if (ic_off(ic, ic[iv].arg1, base_iv, off) ||
445 ic_num(ic, ic[iv].arg2, &n))
446 return 1;
447 *off -= n;
448 return 0;
450 return 1;
453 /* number of register arguments */
454 int ic_regcnt(struct ic *ic)
456 long o = ic->op;
457 if (o & O_BOP)
458 return o & (O_NUM | O_SYM | O_LOC) ? 2 : 3;
459 if (o & O_UOP)
460 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
461 if (o & O_CALL)
462 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
463 if (o & O_MOV)
464 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
465 if (o & O_MEM)
466 return 3;
467 if (o & O_JMP)
468 return 0;
469 if (o & O_JZ)
470 return 1;
471 if (o & O_JCC)
472 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
473 if (o & O_RET)
474 return 1;
475 if (o & (O_LD | O_ST) && o & (O_SYM | O_LOC))
476 return 1;
477 if (o & (O_LD | O_ST))
478 return o & O_NUM ? 2 : 3;
479 return 0;
482 /* return the values written to and read from in the given instruction */
483 static void ic_info(struct ic *ic, long **w, long **r1, long **r2, long **r3)
485 long n = ic_regcnt(ic);
486 long o = ic->op & O_OUT;
487 *r1 = NULL;
488 *r2 = NULL;
489 *r3 = NULL;
490 *w = NULL;
491 if (o) {
492 *w = &ic->arg0;
493 *r1 = n >= 2 ? &ic->arg1 : NULL;
494 *r2 = n >= 3 ? &ic->arg2 : NULL;
495 } else {
496 *r1 = n >= 1 ? &ic->arg0 : NULL;
497 *r2 = n >= 2 ? &ic->arg1 : NULL;
498 *r3 = n >= 3 ? &ic->arg2 : NULL;
503 * The returned array indicates the last instruction in
504 * which the value produced by each instruction is used.
506 long *ic_lastuse(struct ic *ic, long ic_n)
508 long *luse = calloc(ic_n, sizeof(luse[0]));
509 int i, j;
510 for (i = ic_n - 1; i >= 0; --i) {
511 long *w, *r1, *r2, *r3;
512 ic_info(ic + i, &w, &r1, &r2, &r3);
513 if (!luse[i])
514 if (!w || ic[i].op & O_CALL)
515 luse[i] = -1;
516 if (!luse[i])
517 continue;
518 if (r1 && !luse[*r1])
519 luse[*r1] = i;
520 if (r2 && !luse[*r2])
521 luse[*r2] = i;
522 if (r3 && !luse[*r3])
523 luse[*r3] = i;
524 if (ic[i].op & O_CALL)
525 for (j = 0; j < ic[i].arg2; j++)
526 if (!luse[ic[i].args[j]])
527 luse[ic[i].args[j]] = i;
529 return luse;
532 /* intermediate code optimisations */
534 /* constant folding */
535 static int io_num(void)
537 long n;
538 if (!ic_num(ic, iv_get(0), &n)) {
539 iv_drop(1);
540 o_num(n);
541 return 0;
543 return 1;
546 static int log2a(unsigned long n)
548 int i = 0;
549 for (i = 0; i < LONGSZ * 8; i++)
550 if (n & (1u << i))
551 break;
552 if (i == LONGSZ * 8 || !(n >> (i + 1)))
553 return i;
554 return -1;
557 static long iv_num(long n)
559 o_num(n);
560 return iv_pop();
563 /* optimised multiplication operations for powers of two */
564 static int io_mul2(void)
566 long iv = iv_get(0);
567 long n, p;
568 long r1, r2;
569 long oc = O_C(ic[iv].op);
570 long bt = O_T(ic[iv].op);
571 if (!(oc & O_MUL))
572 return 1;
573 if (oc == O_MUL && !ic_num(ic, ic[iv].arg1, &n)) {
574 long t = ic[iv].arg1;
575 ic[iv].arg1 = ic[iv].arg2;
576 ic[iv].arg2 = t;
578 if (ic_num(ic, ic[iv].arg2, &n))
579 return 1;
580 p = log2a(n);
581 if (n && p < 0)
582 return 1;
583 if (oc == O_MUL) {
584 iv_drop(1);
585 if (n == 1) {
586 iv_put(ic[iv].arg1);
587 return 0;
589 if (n == 0) {
590 o_num(0);
591 return 0;
593 r2 = iv_num(p);
594 ic_put(O_MK(O_SHL, ULNG), iv_new(), ic[iv].arg1, r2);
595 return 0;
597 if (oc == O_DIV && ~bt & T_MSIGN) {
598 iv_drop(1);
599 if (n == 1) {
600 iv_put(ic[iv].arg1);
601 return 0;
603 r2 = iv_num(p);
604 ic_put(O_MK(O_SHR, ULNG), iv_new(), ic[iv].arg1, r2);
605 return 0;
607 if (oc == O_MOD && ~bt & T_MSIGN) {
608 iv_drop(1);
609 if (n == 1) {
610 o_num(0);
611 return 0;
613 r2 = iv_num(LONGSZ * 8 - p);
614 ic_put(O_MK(O_SHL, ULNG), iv_new(), ic[iv].arg1, r2);
615 r1 = iv_pop();
616 r2 = iv_num(LONGSZ * 8 - p);
617 ic_put(O_MK(O_SHR, ULNG), iv_new(), r1, r2);
618 return 0;
620 return 1;
623 /* optimise comparison */
624 static int io_cmp(void)
626 long iv = iv_get(0);
627 long cmp = ic[iv].arg1;
628 if (O_C(ic[iv].op) == O_LNOT && ic[cmp].op & O_CMP) {
629 iv_drop(1);
630 ic[cmp].op ^= 1;
631 iv_put(ic[cmp].arg0);
632 return 0;
634 return 1;
637 /* optimise branch instructions after comparison */
638 static int io_jmp(void)
640 struct ic *c = &ic[ic_n - 1];
641 long oc = O_C(c->op);
642 if (oc & O_JZ && O_C(ic[c->arg0].op) == O_LNOT) {
643 c->arg0 = ic[c->arg0].arg1;
644 c->op ^= 1;
645 return 0;
647 if (oc & O_JZ && O_C(ic[c->arg0].op) & O_CMP) {
648 long cop = (ic[c->arg0].op & ~O_CMP) | O_JCC;
649 c->op = O_C(c->op) == O_JZ ? cop ^ 1 : cop;
650 c->arg1 = ic[c->arg0].arg2;
651 c->arg0 = ic[c->arg0].arg1;
652 return 0;
654 return 1;
657 /* optimise accessing locals or symbols with an offset */
658 static int io_addr(void)
660 long iv, off;
661 if (ic_off(ic, iv_get(0), &iv, &off) || iv == iv_get(0))
662 return 1;
663 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
664 iv_drop(1);
665 ic_put(O_MOV | O_LOC, iv_new(), ic[iv].arg1, ic[iv].arg2 + off);
666 return 0;
668 if (ic[iv].op & O_MOV && ic[iv].op & O_SYM) {
669 iv_drop(1);
670 ic_put(O_MOV | O_SYM, iv_new(), ic[iv].arg1, ic[iv].arg2 + off);
671 return 0;
673 return 1;
676 /* optimise loading and storing locals */
677 static int io_loc(void)
679 struct ic *c = &ic[ic_n - 1];
680 long iv, off;
681 if (!(c->op & (O_LD | O_ST)) || !(c->op & O_NUM))
682 return 1;
683 if (ic_off(ic, c->arg1, &iv, &off))
684 return 1;
685 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
686 c->op = (c->op & ~O_NUM) | O_LOC;
687 c->arg1 = ic[iv].arg1;
688 c->arg2 += ic[iv].arg2 + off;
689 return 0;
691 c->arg1 = iv;
692 c->arg2 += off;
693 return 0;
696 static int imm_ok(long op, long n, int arg)
698 long m0, m1, m2, mt;
699 if (i_reg(op | O_NUM, &m0, &m1, &m2, &mt))
700 return 0;
701 return i_imm(arg == 2 ? m2 : m1, n);
704 /* use instruction immediates */
705 static int io_imm(void)
707 struct ic *c = &ic[ic_n - 1];
708 long oc = O_C(c->op);
709 long n;
710 if (oc & (O_NUM | O_LOC | O_SYM))
711 return 1;
712 if (oc == O_ADD || oc == O_MUL || oc == O_AND || oc == O_OR ||
713 oc == O_XOR || oc == O_EQ || oc == O_NE) {
714 if (!ic_num(ic, c->arg1, &n)) {
715 long t = c->arg1;
716 c->arg1 = c->arg2;
717 c->arg2 = t;
720 if (oc == O_LT || oc == O_GE || oc == O_LE || oc == O_GT) {
721 if (!ic_num(ic, c->arg1, &n)) {
722 int t = c->arg1;
723 c->arg1 = c->arg2;
724 c->arg2 = t;
725 c->op ^= 1;
728 if (oc & O_JCC && !ic_num(ic, c->arg0, &n)) {
729 int t = c->arg0;
730 c->arg0 = c->arg1;
731 c->arg1 = t;
732 c->op ^= 1;
734 if (oc & O_JCC && !ic_num(ic, c->arg1, &n) && imm_ok(c->op, n, 1)) {
735 c->op |= O_NUM;
736 c->arg1 = n;
737 return 0;
739 if (!(oc & O_BOP) || ic_num(ic, c->arg2, &n))
740 return 1;
741 if ((oc == O_ADD || oc == O_SUB || oc & O_SHL) && n == 0) {
742 iv_drop(1);
743 iv_put(c->arg1);
744 return 0;
746 if (imm_ok(c->op, n, 2)) {
747 c->op |= O_NUM;
748 c->arg2 = n;
749 return 0;
751 return 1;
754 /* calling symbols */
755 static int io_call(void)
757 struct ic *c = &ic[ic_n - 1];
758 long sym, off;
759 if (c->op & O_CALL && !ic_sym(ic, c->arg1, &sym, &off) && !off) {
760 c->op |= O_SYM;
761 c->arg1 = sym;
762 return 0;
764 return 1;
767 /* remove dead code */
768 static void io_deadcode(void)
770 char *live;
771 long *nidx;
772 long src = 0, dst = 0;
773 int i, j;
774 /* liveness analysis */
775 live = calloc(ic_n, sizeof(live[0]));
776 for (i = ic_n - 1; i >= 0; i--) {
777 long *w, *r1, *r2, *r3;
778 ic_info(ic + i, &w, &r1, &r2, &r3);
779 if (!w || ic[i].op & O_CALL)
780 live[i] = 1;
781 if (!live[i])
782 continue;
783 if (r1)
784 live[*r1] = 1;
785 if (r2)
786 live[*r2] = 1;
787 if (r3)
788 live[*r3] = 1;
789 if (ic[i].op & O_CALL)
790 for (j = 0; j < ic[i].arg2; j++)
791 live[ic[i].args[j]] = 1;
793 /* the new indices of intermediate instructions */
794 nidx = calloc(ic_n, sizeof(nidx[0]));
795 while (src < ic_n) {
796 while (src < ic_n && !live[src]) {
797 nidx[src] = dst;
798 ic_free(&ic[src++]);
800 if (src < ic_n) {
801 nidx[src] = dst;
802 if (src != dst)
803 memcpy(ic + dst, ic + src, sizeof(ic[src]));
804 src++;
805 dst++;
808 ic_n = dst;
809 /* adjusting arguments and branch targets */
810 for (i = 0; i < ic_n; i++) {
811 long *w, *r1, *r2, *r3;
812 ic_info(ic + i, &w, &r1, &r2, &r3);
813 if (r1)
814 *r1 = nidx[*r1];
815 if (r2)
816 *r2 = nidx[*r2];
817 if (r3)
818 *r3 = nidx[*r3];
819 if (w)
820 *w = nidx[*w];
821 if (ic[i].op & O_JXX)
822 ic[i].arg2 = nidx[ic[i].arg2];
823 if (ic[i].op & O_CALL)
824 for (j = 0; j < ic[i].arg2; j++)
825 ic[i].args[j] = nidx[ic[i].args[j]];
827 free(live);
828 free(nidx);