x86: use push instruction for saving registers
[neatcc.git] / int.c
blobabd964329f92f7f78c4e626008cbe51903edd4f4
1 /* neatcc intermediate code generation */
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include "ncc.h"
7 static struct ic *ic; /* intermediate code */
8 static long ic_n, ic_sz; /* number of instructions in ic[] */
9 static long iv[NTMPS]; /* operand stack */
10 static long iv_n; /* number of values in iv[] */
11 static long *lab_loc; /* label locations */
12 static long lab_n, lab_sz; /* number of labels in lab_loc[] */
13 static long lab_last; /* the last label target */
15 static int io_num(void);
16 static int io_mul2(void);
17 static int io_cmp(void);
18 static int io_jmp(void);
19 static int io_addr(void);
20 static int io_loc(void);
21 static int io_imm(void);
22 static int io_call(void);
23 static void io_deadcode(void);
25 static void iv_put(long n);
27 static struct ic *ic_put(long op, long arg1, long arg2, long arg3)
29 struct ic *c;
30 if (ic_n == ic_sz) {
31 ic_sz = MAX(128, ic_sz * 2);
32 ic = mextend(ic, ic_n, ic_sz, sizeof(*ic));
34 c = &ic[ic_n++];
35 c->op = op;
36 c->a1 = arg1;
37 c->a2 = arg2;
38 c->a3 = arg3;
39 if (op & O_OUT)
40 iv_put(ic_n - 1);
41 return c;
44 static void ic_back(long pos)
46 int i;
47 for (i = pos; i < ic_n; i++)
48 if (ic[i].op & O_CALL)
49 free(ic[i].args);
50 ic_n = pos;
53 static long iv_pop(void)
55 return iv[--iv_n];
58 static long iv_get(int n)
60 return iv[iv_n - n - 1];
63 static void iv_put(long n)
65 iv[iv_n++] = n;
68 static void iv_drop(int n)
70 iv_n = MAX(0, iv_n - n);
73 static void iv_swap(int x, int y)
75 long v = iv[iv_n - x - 1];
76 iv[iv_n - x - 1] = iv[iv_n - y - 1];
77 iv[iv_n - y - 1] = v;
80 static void iv_dup(void)
82 iv[iv_n] = iv[iv_n - 1];
83 iv_n++;
86 void o_num(long n)
88 ic_put(O_MOV | O_NUM, n, 0, 0);
91 void o_local(long id)
93 ic_put(O_MOV | O_LOC, id, 0, 0);
96 void o_sym(char *sym)
98 ic_put(O_MOV | O_SYM, out_sym(sym), 0, 0);
101 void o_tmpdrop(int n)
103 iv_drop(n >= 0 ? n : iv_n);
106 void o_tmpswap(void)
108 iv_swap(0, 1);
111 void o_tmpcopy(void)
113 iv_dup();
116 /* return one if the given value is constant */
117 static int ic_const(long iv)
119 long oc = O_C(ic[iv].op);
120 return oc & O_MOV && oc & (O_NUM | O_SYM | O_LOC);
123 /* return one if the given value is a simple load */
124 static int ic_load(long iv)
126 long oc = O_C(ic[iv].op);
127 int i;
128 if (oc & O_LD && oc & (O_NUM | O_SYM | O_LOC)) {
129 for (i = iv + 1; i < ic_n; i++)
130 if (ic[i].op & O_ST)
131 return 0;
132 return 1;
134 return 0;
137 void o_bop(long op)
139 int r1 = iv_pop();
140 int r2 = iv_pop();
141 /* load constants as late as possible */
142 if (opt(1) && ic_const(r2) && !ic_const(r1)) {
143 ic_put(ic[r2].op, ic[r2].a1, ic[r2].a2, ic[r2].a3);
144 r2 = iv_pop();
146 ic_put(op, r2, r1, 0);
147 if (opt(1))
148 io_num() && io_mul2() && io_addr() && io_imm();
151 void o_uop(long op)
153 int r1 = iv_pop();
154 ic_put(op, r1, 0, 0);
155 if (opt(1))
156 io_num() && io_cmp();
159 void o_assign(long bt)
161 int rv = iv_pop();
162 int lv = iv_pop();
163 /* load constants as late as possible */
164 if (opt(1) && (ic_const(lv) || ic_load(lv))) {
165 ic_put(ic[lv].op, ic[lv].a1, ic[lv].a2, ic[lv].a3);
166 lv = iv_pop();
168 ic_put(O_MK(O_ST | O_NUM, bt), rv, lv, 0);
169 iv_put(rv);
170 if (opt(1))
171 io_loc();
174 void o_deref(long bt)
176 int r1 = iv_pop();
177 ic_put(O_MK(O_LD | O_NUM, bt), r1, 0, 0);
178 if (opt(1))
179 io_loc();
182 void o_cast(long bt)
184 if (T_SZ(bt) != ULNG) {
185 int r1 = iv_pop();
186 ic_put(O_MK(O_MOV, bt), r1, 0, 0);
187 if (opt(1))
188 io_num();
192 void o_memcpy(void)
194 int r2 = iv_pop();
195 int r1 = iv_pop();
196 int r0 = iv_pop();
197 ic_put(O_MCPY, r0, r1, r2);
200 void o_memset(void)
202 int r2 = iv_pop();
203 int r1 = iv_pop();
204 int r0 = iv_pop();
205 ic_put(O_MSET, r0, r1, r2);
208 void o_call(int argc, int ret)
210 struct ic *c;
211 long *args = malloc(argc * sizeof(c->args[0]));
212 int r1, i;
213 for (i = argc - 1; i >= 0; --i)
214 args[i] = iv_pop();
215 for (i = argc - 1; i >= 0; --i) {
216 int iv = args[i];
217 /* load constants as late as possible */
218 if (opt(1) && (ic_const(iv) || ic_load(iv))) {
219 ic_put(ic[iv].op, ic[iv].a1, ic[iv].a2, ic[iv].a3);
220 args[i] = iv_pop();
223 r1 = iv_pop();
224 c = ic_put(O_CALL, r1, 0, argc);
225 c->args = args;
226 iv_drop(ret == 0);
227 if (opt(1))
228 io_call();
231 void o_ret(int ret)
233 if (!ret)
234 o_num(0);
235 ic_put(O_RET, iv_pop(), 0, 0);
238 void o_label(long id)
240 while (id >= lab_sz) {
241 lab_sz = MAX(128, lab_sz * 2);
242 lab_loc = mextend(lab_loc, lab_n, lab_sz, sizeof(*lab_loc));
244 while (lab_n <= id)
245 lab_loc[lab_n++] = -1;
246 lab_loc[id] = ic_n;
247 lab_last = ic_n;
250 void o_jmp(long id)
252 ic_put(O_JMP, 0, 0, id);
255 void o_jz(long id)
257 ic_put(O_JZ, iv_pop(), 0, id);
258 if (opt(1))
259 io_jmp();
262 int o_popnum(long *n)
264 if (ic_num(ic, iv_get(0), n))
265 return 1;
266 iv_drop(1);
267 return 0;
270 int o_popsym(long *sym, long *off)
272 if (ic_sym(ic, iv_get(0), sym, off))
273 return 1;
274 iv_drop(1);
275 return 0;
278 long o_mark(void)
280 return ic_n;
283 void o_back(long mark)
285 ic_back(mark);
288 void ic_get(struct ic **c, long *n)
290 int i;
291 if (!ic_n || ~ic[ic_n - 1].op & O_RET || lab_last == ic_n)
292 o_ret(0);
293 for (i = 0; i < ic_n; i++) /* filling branch targets */
294 if (ic[i].op & O_JXX)
295 ic[i].a3 = lab_loc[ic[i].a3];
296 io_deadcode(); /* removing dead code */
297 *c = ic;
298 *n = ic_n;
299 ic = NULL;
300 ic_n = 0;
301 ic_sz = 0;
302 iv_n = 0;
303 free(lab_loc);
304 lab_loc = NULL;
305 lab_n = 0;
306 lab_sz = 0;
307 lab_last = 0;
310 void ic_free(struct ic *ic)
312 if (ic->op & O_CALL)
313 free(ic->args);
316 /* intermediate code queries */
318 static long cb(long op, long a, long b)
320 switch (O_C(op)) {
321 case O_ADD:
322 return a + b;
323 case O_SUB:
324 return a - b;
325 case O_AND:
326 return a & b;
327 case O_OR:
328 return a | b;
329 case O_XOR:
330 return a ^ b;
331 case O_MUL:
332 return a * b;
333 case O_DIV:
334 return a / b;
335 case O_MOD:
336 return a % b;
337 case O_SHL:
338 return a << b;
339 case O_SHR:
340 return O_T(op) & T_MSIGN ? a >> b : (unsigned long) a >> b;
341 case O_LT:
342 return a < b;
343 case O_GT:
344 return a > b;
345 case O_LE:
346 return a <= b;
347 case O_GE:
348 return a >= b;
349 case O_EQ:
350 return a == b;
351 case O_NE:
352 return a != b;
354 return 0;
357 static long cu(int op, long i)
359 switch (O_C(op)) {
360 case O_NEG:
361 return -i;
362 case O_NOT:
363 return ~i;
364 case O_LNOT:
365 return !i;
367 return 0;
370 static long c_cast(long n, unsigned bt)
372 if (!(bt & T_MSIGN) && T_SZ(bt) != ULNG)
373 n &= ((1l << (long) (T_SZ(bt) * 8)) - 1);
374 if (bt & T_MSIGN && T_SZ(bt) != ULNG &&
375 n > (1l << (T_SZ(bt) * 8 - 1)))
376 n = -((1l << (T_SZ(bt) * 8)) - n);
377 return n;
380 int ic_num(struct ic *ic, long iv, long *n)
382 long n1, n2;
383 long oc = O_C(ic[iv].op);
384 long bt = O_T(ic[iv].op);
385 if (oc & O_MOV && oc & O_NUM) {
386 *n = ic[iv].a1;
387 return 0;
389 if (oc & O_BOP) {
390 if (ic_num(ic, ic[iv].a1, &n1))
391 return 1;
392 if (ic_num(ic, ic[iv].a2, &n2))
393 return 1;
394 *n = cb(ic[iv].op, n1, n2);
395 return 0;
397 if (oc & O_UOP) {
398 if (ic_num(ic, ic[iv].a1, &n1))
399 return 1;
400 *n = cu(ic[iv].op, n1);
401 return 0;
403 if (oc & O_MOV && !(oc & (O_NUM | O_LOC | O_SYM))) {
404 if (ic_num(ic, ic[iv].a1, &n1))
405 return 1;
406 *n = c_cast(n1, bt);
407 return 0;
409 return 1;
412 int ic_sym(struct ic *ic, long iv, long *sym, long *off)
414 long n;
415 long oc = O_C(ic[iv].op);
416 if (oc & O_MOV && oc & O_SYM) {
417 *sym = ic[iv].a1;
418 *off = ic[iv].a2;
419 return 0;
421 if (oc == O_ADD) {
422 if ((ic_sym(ic, ic[iv].a1, sym, off) ||
423 ic_num(ic, ic[iv].a2, &n)) &&
424 (ic_sym(ic, ic[iv].a2, sym, off) ||
425 ic_num(ic, ic[iv].a1, &n)))
426 return 1;
427 *off += n;
428 return 0;
430 if (oc == O_SUB) {
431 if (ic_sym(ic, ic[iv].a1, sym, off) ||
432 ic_num(ic, ic[iv].a2, &n))
433 return 1;
434 *off -= n;
435 return 0;
437 return 1;
440 static int ic_off(struct ic *ic, long iv, long *base_iv, long *off)
442 long n;
443 long oc = O_C(ic[iv].op);
444 if (oc != O_ADD && oc != O_SUB) {
445 *base_iv = iv;
446 *off = 0;
447 return 0;
449 if (oc == O_ADD) {
450 if ((ic_off(ic, ic[iv].a1, base_iv, off) ||
451 ic_num(ic, ic[iv].a2, &n)) &&
452 (ic_off(ic, ic[iv].a2, base_iv, off) ||
453 ic_num(ic, ic[iv].a1, &n)))
454 return 1;
455 *off += n;
456 return 0;
458 if (oc == O_SUB) {
459 if (ic_off(ic, ic[iv].a1, base_iv, off) ||
460 ic_num(ic, ic[iv].a2, &n))
461 return 1;
462 *off -= n;
463 return 0;
465 return 1;
468 /* number of register arguments */
469 int ic_regcnt(struct ic *ic)
471 long o = ic->op;
472 if (o & O_BOP)
473 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
474 if (o & O_UOP)
475 return o & (O_NUM | O_SYM | O_LOC) ? 0 : 1;
476 if (o & O_CALL)
477 return o & (O_NUM | O_SYM | O_LOC) ? 0 : 1;
478 if (o & O_MOV)
479 return o & (O_NUM | O_SYM | O_LOC) ? 0 : 1;
480 if (o & O_MEM)
481 return 3;
482 if (o & O_JMP)
483 return 0;
484 if (o & O_JZ)
485 return 1;
486 if (o & O_JCC)
487 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
488 if (o & O_RET)
489 return 1;
490 if (o & O_LD)
491 return ((o & O_NUM) != 0);
492 if (o & O_ST)
493 return 1 + ((o & O_NUM) != 0);
494 return 0;
498 * The returned array indicates the last instruction in
499 * which the value produced by each instruction is used.
501 long *ic_lastuse(struct ic *ic, long ic_n)
503 long *luse = calloc(ic_n, sizeof(luse[0]));
504 int i, j;
505 for (i = ic_n - 1; i >= 0; --i) {
506 int n = ic_regcnt(ic + i);
507 if (!luse[i])
508 if (!(ic[i].op & O_OUT) || ic[i].op & O_CALL)
509 luse[i] = -1;
510 if (!luse[i])
511 continue;
512 if (n >= 1 && !luse[ic[i].a1])
513 luse[ic[i].a1] = i;
514 if (n >= 2 && !luse[ic[i].a2])
515 luse[ic[i].a2] = i;
516 if (n >= 3 && !luse[ic[i].a3])
517 luse[ic[i].a3] = i;
518 if (ic[i].op & O_CALL)
519 for (j = 0; j < ic[i].a3; j++)
520 if (!luse[ic[i].args[j]])
521 luse[ic[i].args[j]] = i;
523 return luse;
526 /* intermediate code optimisations */
528 /* constant folding */
529 static int io_num(void)
531 long n;
532 if (!ic_num(ic, iv_get(0), &n)) {
533 iv_drop(1);
534 o_num(n);
535 return 0;
537 return 1;
540 static int log2a(unsigned long n)
542 int i = 0;
543 for (i = 0; i < LONGSZ * 8; i++)
544 if (n & (1u << i))
545 break;
546 if (i == LONGSZ * 8 || !(n >> (i + 1)))
547 return i;
548 return -1;
551 static long iv_num(long n)
553 o_num(n);
554 return iv_pop();
557 /* optimised multiplication operations for powers of two */
558 static int io_mul2(void)
560 long iv = iv_get(0);
561 long n, p;
562 long r1, r2;
563 long oc = O_C(ic[iv].op);
564 long bt = O_T(ic[iv].op);
565 if (!(oc & O_MUL))
566 return 1;
567 if (oc == O_MUL && !ic_num(ic, ic[iv].a1, &n)) {
568 long t = ic[iv].a1;
569 ic[iv].a1 = ic[iv].a2;
570 ic[iv].a2 = t;
572 if (ic_num(ic, ic[iv].a2, &n))
573 return 1;
574 p = log2a(n);
575 if (n && p < 0)
576 return 1;
577 if (oc == O_MUL) {
578 iv_drop(1);
579 if (n == 1) {
580 iv_put(ic[iv].a1);
581 return 0;
583 if (n == 0) {
584 o_num(0);
585 return 0;
587 r2 = iv_num(p);
588 ic_put(O_MK(O_SHL, ULNG), ic[iv].a1, r2, 0);
589 return 0;
591 if (oc == O_DIV && ~bt & T_MSIGN) {
592 iv_drop(1);
593 if (n == 1) {
594 iv_put(ic[iv].a1);
595 return 0;
597 r2 = iv_num(p);
598 ic_put(O_MK(O_SHR, ULNG), ic[iv].a1, r2, 0);
599 return 0;
601 if (oc == O_MOD && ~bt & T_MSIGN) {
602 iv_drop(1);
603 if (n == 1) {
604 o_num(0);
605 return 0;
607 r2 = iv_num(LONGSZ * 8 - p);
608 ic_put(O_MK(O_SHL, ULNG), ic[iv].a1, r2, 0);
609 r1 = iv_pop();
610 r2 = iv_num(LONGSZ * 8 - p);
611 ic_put(O_MK(O_SHR, ULNG), r1, r2, 0);
612 return 0;
614 return 1;
617 /* optimise comparison */
618 static int io_cmp(void)
620 long iv = iv_get(0);
621 long cmp = ic[iv].a1;
622 if (O_C(ic[iv].op) == O_LNOT && ic[cmp].op & O_CMP) {
623 iv_drop(1);
624 ic[cmp].op ^= 1;
625 iv_put(cmp);
626 return 0;
628 return 1;
631 /* optimise branch instructions after comparison */
632 static int io_jmp(void)
634 struct ic *c = &ic[ic_n - 1];
635 long oc = O_C(c->op);
636 if (oc & O_JZ && O_C(ic[c->a1].op) == O_LNOT) {
637 c->a1 = ic[c->a1].a1;
638 c->op ^= 1;
639 return 0;
641 if (oc & O_JZ && O_C(ic[c->a1].op) & O_CMP) {
642 long cop = (ic[c->a1].op & ~O_CMP) | O_JCC;
643 c->op = O_C(c->op) == O_JZ ? cop ^ 1 : cop;
644 c->a2 = ic[c->a1].a2;
645 c->a1 = ic[c->a1].a1;
646 return 0;
648 return 1;
651 /* optimise accessing locals or symbols with an offset */
652 static int io_addr(void)
654 long iv, off;
655 if (ic_off(ic, iv_get(0), &iv, &off) || iv == iv_get(0))
656 return 1;
657 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
658 iv_drop(1);
659 ic_put(O_MOV | O_LOC, ic[iv].a1, ic[iv].a2 + off, 0);
660 return 0;
662 if (ic[iv].op & O_MOV && ic[iv].op & O_SYM) {
663 iv_drop(1);
664 ic_put(O_MOV | O_SYM, ic[iv].a1, ic[iv].a2 + off, 0);
665 return 0;
667 return 1;
670 /* optimise loading and storing locals */
671 static int io_loc(void)
673 struct ic *c = &ic[ic_n - 1];
674 long iv, off;
675 if (c->op & O_LD && c->op & O_NUM) {
676 if (ic_off(ic, c->a1, &iv, &off))
677 return 1;
678 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
679 c->op = (c->op & ~O_NUM) | O_LOC;
680 c->a1 = ic[iv].a1;
681 c->a2 += ic[iv].a2 + off;
682 return 0;
684 c->a1 = iv;
685 c->a2 += off;
686 return 0;
688 if (c->op & O_ST && c->op & O_NUM) {
689 if (ic_off(ic, c->a2, &iv, &off))
690 return 1;
691 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
692 c->op = (c->op & ~O_NUM) | O_LOC;
693 c->a2 = ic[iv].a1;
694 c->a3 += ic[iv].a2 + off;
695 return 0;
697 c->a2 = iv;
698 c->a3 += off;
699 return 0;
701 return 1;
704 static int imm_ok(long op, long n, int arg)
706 long m0, m1, m2, m3, mt;
707 if (i_reg(op | O_NUM, &m0, &m1, &m2, &m3, &mt))
708 return 0;
709 return i_imm(arg == 2 ? m2 : m1, n);
712 /* use instruction immediates */
713 static int io_imm(void)
715 struct ic *c = &ic[ic_n - 1];
716 long oc = O_C(c->op);
717 long n;
718 if (oc & (O_NUM | O_LOC | O_SYM))
719 return 1;
720 if (oc == O_ADD || oc == O_MUL || oc == O_AND || oc == O_OR ||
721 oc == O_XOR || oc == O_EQ || oc == O_NE) {
722 if (!ic_num(ic, c->a1, &n)) {
723 long t = c->a1;
724 c->a1 = c->a2;
725 c->a2 = t;
728 if (oc == O_LT || oc == O_GE || oc == O_LE || oc == O_GT) {
729 if (!ic_num(ic, c->a1, &n)) {
730 int t = c->a1;
731 c->a1 = c->a2;
732 c->a2 = t;
733 c->op ^= 1;
736 if (oc & O_JCC && !ic_num(ic, c->a1, &n)) {
737 int t = c->a1;
738 c->a1 = c->a2;
739 c->a2 = t;
740 c->op ^= 1;
742 if (oc & O_JCC && !ic_num(ic, c->a2, &n) && imm_ok(c->op, n, 1)) {
743 c->op |= O_NUM;
744 c->a2 = n;
745 return 0;
747 if (!(oc & O_BOP) || ic_num(ic, c->a2, &n))
748 return 1;
749 if ((oc == O_ADD || oc == O_SUB || oc & O_SHL) && n == 0) {
750 iv_drop(1);
751 iv_put(c->a1);
752 return 0;
754 if (imm_ok(c->op, n, 2)) {
755 c->op |= O_NUM;
756 c->a2 = n;
757 return 0;
759 return 1;
762 /* calling symbols */
763 static int io_call(void)
765 struct ic *c = &ic[ic_n - 1];
766 long sym, off;
767 if (c->op & O_CALL && !ic_sym(ic, c->a1, &sym, &off) && !off) {
768 c->op |= O_SYM;
769 c->a1 = sym;
770 return 0;
772 return 1;
775 /* remove dead code */
776 static void io_deadcode(void)
778 char *live;
779 long *nidx;
780 long src = 0, dst = 0;
781 int i, j;
782 /* liveness analysis */
783 live = calloc(ic_n, sizeof(live[0]));
784 for (i = ic_n - 1; i >= 0; i--) {
785 int n = ic_regcnt(ic + i);
786 if (!(ic[i].op & O_OUT) || ic[i].op & O_CALL)
787 live[i] = 1;
788 if (!live[i])
789 continue;
790 if (n >= 1)
791 live[ic[i].a1] = 1;
792 if (n >= 2)
793 live[ic[i].a2] = 1;
794 if (n >= 3)
795 live[ic[i].a3] = 1;
796 if (ic[i].op & O_CALL)
797 for (j = 0; j < ic[i].a3; j++)
798 live[ic[i].args[j]] = 1;
800 /* the new indices of intermediate instructions */
801 nidx = calloc(ic_n, sizeof(nidx[0]));
802 while (src < ic_n) {
803 while (src < ic_n && !live[src]) {
804 nidx[src] = dst;
805 ic_free(&ic[src++]);
807 if (src < ic_n) {
808 nidx[src] = dst;
809 if (src != dst)
810 memcpy(ic + dst, ic + src, sizeof(ic[src]));
811 src++;
812 dst++;
815 ic_n = dst;
816 /* adjusting arguments and branch targets */
817 for (i = 0; i < ic_n; i++) {
818 int n = ic_regcnt(ic + i);
819 if (n >= 1)
820 ic[i].a1 = nidx[ic[i].a1];
821 if (n >= 2)
822 ic[i].a2 = nidx[ic[i].a2];
823 if (n >= 3)
824 ic[i].a3 = nidx[ic[i].a3];
825 if (ic[i].op & O_JXX)
826 ic[i].a3 = nidx[ic[i].a3];
827 if (ic[i].op & O_CALL)
828 for (j = 0; j < ic[i].a3; j++)
829 ic[i].args[j] = nidx[ic[i].args[j]];
831 free(live);
832 free(nidx);