x64: check immediates for O_LD and O_ST
[neatcc.git] / int.c
blob3b288c69807a293c7bef57b27608cb861460ec92
1 /* neatcc intermediate code generation */
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include "ncc.h"
7 static struct ic *ic; /* intermediate code */
8 static long ic_n, ic_sz; /* number of instructions in ic[] */
9 static long iv[NTMPS]; /* operand stack */
10 static long iv_n; /* number of values in iv[] */
11 static long *lab_loc; /* label locations */
12 static long lab_n, lab_sz; /* number of labels in lab_loc[] */
13 static long lab_last; /* the last label target */
15 static int io_num(void);
16 static int io_mul2(void);
17 static int io_cmp(void);
18 static int io_jmp(void);
19 static int io_addr(void);
20 static int io_loc(void);
21 static int io_imm(void);
22 static int io_call(void);
23 static void io_deadcode(void);
25 static void iv_put(long n);
27 static struct ic *ic_put(long op, long arg1, long arg2, long arg3)
29 struct ic *c;
30 if (ic_n == ic_sz) {
31 ic_sz = MAX(128, ic_sz * 2);
32 ic = mextend(ic, ic_n, ic_sz, sizeof(*ic));
34 c = &ic[ic_n++];
35 c->op = op;
36 c->a1 = arg1;
37 c->a2 = arg2;
38 c->a3 = arg3;
39 if (op & O_OUT)
40 iv_put(ic_n - 1);
41 return c;
44 static void ic_back(long pos)
46 int i;
47 for (i = pos; i < ic_n; i++)
48 if (ic[i].op & O_CALL)
49 free(ic[i].args);
50 ic_n = pos;
53 static long iv_pop(void)
55 return iv[--iv_n];
58 static long iv_get(int n)
60 return iv[iv_n - n - 1];
63 static void iv_put(long n)
65 iv[iv_n++] = n;
68 static void iv_drop(int n)
70 iv_n = MAX(0, iv_n - n);
73 static void iv_swap(int x, int y)
75 long v = iv[iv_n - x - 1];
76 iv[iv_n - x - 1] = iv[iv_n - y - 1];
77 iv[iv_n - y - 1] = v;
80 static void iv_dup(void)
82 iv[iv_n] = iv[iv_n - 1];
83 iv_n++;
86 void o_num(long n)
88 ic_put(O_MOV | O_NUM, n, 0, 0);
91 void o_local(long id)
93 ic_put(O_MOV | O_LOC, id, 0, 0);
96 void o_sym(char *sym)
98 ic_put(O_MOV | O_SYM, out_sym(sym), 0, 0);
101 void o_tmpdrop(int n)
103 iv_drop(n >= 0 ? n : iv_n);
106 void o_tmpswap(void)
108 iv_swap(0, 1);
111 void o_tmpcopy(void)
113 iv_dup();
116 /* return one if the given value is constant */
117 static int ic_const(long iv)
119 long oc = O_C(ic[iv].op);
120 return oc & O_MOV && oc & (O_NUM | O_SYM | O_LOC);
123 /* return one if the given value is a simple load */
124 static int ic_load(long iv)
126 long oc = O_C(ic[iv].op);
127 int i;
128 if (oc & O_LD && oc & (O_NUM | O_SYM | O_LOC)) {
129 for (i = iv + 1; i < ic_n; i++)
130 if (ic[i].op & O_ST)
131 return 0;
132 return 1;
134 return 0;
137 void o_bop(long op)
139 int r1 = iv_pop();
140 int r2 = iv_pop();
141 /* load constants as late as possible */
142 if (opt(1) && ic_const(r2) && !ic_const(r1)) {
143 ic_put(ic[r2].op, ic[r2].a1, ic[r2].a2, ic[r2].a3);
144 r2 = iv_pop();
146 ic_put(op, r2, r1, 0);
147 if (opt(1))
148 io_num() && io_mul2() && io_addr() && io_imm();
151 void o_uop(long op)
153 int r1 = iv_pop();
154 ic_put(op, r1, 0, 0);
155 if (opt(1))
156 io_num() && io_cmp();
159 void o_assign(long bt)
161 int rv = iv_pop();
162 int lv = iv_pop();
163 /* load constants as late as possible */
164 if (opt(1) && (ic_const(lv) || ic_load(lv))) {
165 ic_put(ic[lv].op, ic[lv].a1, ic[lv].a2, ic[lv].a3);
166 lv = iv_pop();
168 ic_put(O_MK(O_ST | O_NUM, bt), rv, lv, 0);
169 iv_put(rv);
170 if (opt(1))
171 io_loc();
174 void o_deref(long bt)
176 int r1 = iv_pop();
177 ic_put(O_MK(O_LD | O_NUM, bt), r1, 0, 0);
178 if (opt(1))
179 io_loc();
182 void o_cast(long bt)
184 if (T_SZ(bt) != ULNG) {
185 int r1 = iv_pop();
186 ic_put(O_MK(O_MOV, bt), r1, 0, 0);
187 if (opt(1))
188 io_num();
192 void o_memcpy(void)
194 int r2 = iv_pop();
195 int r1 = iv_pop();
196 int r0 = iv_pop();
197 ic_put(O_MCPY, r0, r1, r2);
200 void o_memset(void)
202 int r2 = iv_pop();
203 int r1 = iv_pop();
204 int r0 = iv_pop();
205 ic_put(O_MSET, r0, r1, r2);
208 void o_call(int argc, int ret)
210 struct ic *c;
211 long *args = malloc(argc * sizeof(c->args[0]));
212 int r1, i;
213 for (i = argc - 1; i >= 0; --i)
214 args[i] = iv_pop();
215 for (i = argc - 1; i >= 0; --i) {
216 int iv = args[i];
217 /* load constants as late as possible */
218 if (opt(1) && (ic_const(iv) || ic_load(iv))) {
219 ic_put(ic[iv].op, ic[iv].a1, ic[iv].a2, ic[iv].a3);
220 args[i] = iv_pop();
223 r1 = iv_pop();
224 c = ic_put(O_CALL, r1, 0, argc);
225 c->args = args;
226 iv_drop(ret == 0);
227 if (opt(1))
228 io_call();
231 void o_ret(int ret)
233 if (!ret)
234 o_num(0);
235 ic_put(O_RET, iv_pop(), 0, 0);
238 void o_label(long id)
240 while (id >= lab_sz) {
241 lab_sz = MAX(128, lab_sz * 2);
242 lab_loc = mextend(lab_loc, lab_n, lab_sz, sizeof(*lab_loc));
244 while (lab_n <= id)
245 lab_loc[lab_n++] = -1;
246 lab_loc[id] = ic_n;
247 lab_last = ic_n;
250 void o_jmp(long id)
252 ic_put(O_JMP, 0, 0, id);
255 void o_jz(long id)
257 ic_put(O_JZ, iv_pop(), 0, id);
258 if (opt(1))
259 io_jmp();
262 int o_popnum(long *n)
264 if (ic_num(ic, iv_get(0), n))
265 return 1;
266 iv_drop(1);
267 return 0;
270 int o_popsym(long *sym, long *off)
272 if (ic_sym(ic, iv_get(0), sym, off))
273 return 1;
274 iv_drop(1);
275 return 0;
278 long o_mark(void)
280 return ic_n;
283 void o_back(long mark)
285 ic_back(mark);
288 void ic_get(struct ic **c, long *n)
290 int i;
291 if (!ic_n || ~ic[ic_n - 1].op & O_RET || lab_last == ic_n)
292 o_ret(0);
293 for (i = 0; i < ic_n; i++) /* filling branch targets */
294 if (ic[i].op & O_JXX)
295 ic[i].a3 = lab_loc[ic[i].a3];
296 io_deadcode(); /* removing dead code */
297 *c = ic;
298 *n = ic_n;
299 ic = NULL;
300 ic_n = 0;
301 ic_sz = 0;
302 iv_n = 0;
303 free(lab_loc);
304 lab_loc = NULL;
305 lab_n = 0;
306 lab_sz = 0;
307 lab_last = 0;
310 void ic_free(struct ic *ic)
312 if (ic->op & O_CALL)
313 free(ic->args);
316 /* intermediate code queries */
318 static long cb(long op, long a, long b)
320 switch (O_C(op)) {
321 case O_ADD:
322 return a + b;
323 case O_SUB:
324 return a - b;
325 case O_AND:
326 return a & b;
327 case O_OR:
328 return a | b;
329 case O_XOR:
330 return a ^ b;
331 case O_MUL:
332 return a * b;
333 case O_DIV:
334 return a / b;
335 case O_MOD:
336 return a % b;
337 case O_SHL:
338 return a << b;
339 case O_SHR:
340 return O_T(op) & T_MSIGN ? a >> b : (unsigned long) a >> b;
341 case O_LT:
342 return a < b;
343 case O_GT:
344 return a > b;
345 case O_LE:
346 return a <= b;
347 case O_GE:
348 return a >= b;
349 case O_EQ:
350 return a == b;
351 case O_NE:
352 return a != b;
354 return 0;
357 static long cu(int op, long i)
359 switch (O_C(op)) {
360 case O_NEG:
361 return -i;
362 case O_NOT:
363 return ~i;
364 case O_LNOT:
365 return !i;
367 return 0;
370 static long c_cast(long n, unsigned bt)
372 if (!(bt & T_MSIGN) && T_SZ(bt) != ULNG)
373 n &= ((1l << (long) (T_SZ(bt) * 8)) - 1);
374 if (bt & T_MSIGN && T_SZ(bt) != ULNG &&
375 n > (1l << (T_SZ(bt) * 8 - 1)))
376 n = -((1l << (T_SZ(bt) * 8)) - n);
377 return n;
380 int ic_num(struct ic *ic, long iv, long *n)
382 long n1, n2;
383 long oc = O_C(ic[iv].op);
384 long bt = O_T(ic[iv].op);
385 if (oc & O_MOV && oc & O_NUM) {
386 *n = ic[iv].a1;
387 return 0;
389 if (oc & O_BOP) {
390 if (ic_num(ic, ic[iv].a1, &n1))
391 return 1;
392 if (ic_num(ic, ic[iv].a2, &n2))
393 return 1;
394 *n = cb(ic[iv].op, n1, n2);
395 return 0;
397 if (oc & O_UOP) {
398 if (ic_num(ic, ic[iv].a1, &n1))
399 return 1;
400 *n = cu(ic[iv].op, n1);
401 return 0;
403 if (oc & O_MOV && !(oc & (O_NUM | O_LOC | O_SYM))) {
404 if (ic_num(ic, ic[iv].a1, &n1))
405 return 1;
406 *n = c_cast(n1, bt);
407 return 0;
409 return 1;
412 int ic_sym(struct ic *ic, long iv, long *sym, long *off)
414 long n;
415 long oc = O_C(ic[iv].op);
416 if (oc & O_MOV && oc & O_SYM) {
417 *sym = ic[iv].a1;
418 *off = ic[iv].a2;
419 return 0;
421 if (oc == O_ADD) {
422 if ((ic_sym(ic, ic[iv].a1, sym, off) ||
423 ic_num(ic, ic[iv].a2, &n)) &&
424 (ic_sym(ic, ic[iv].a2, sym, off) ||
425 ic_num(ic, ic[iv].a1, &n)))
426 return 1;
427 *off += n;
428 return 0;
430 if (oc == O_SUB) {
431 if (ic_sym(ic, ic[iv].a1, sym, off) ||
432 ic_num(ic, ic[iv].a2, &n))
433 return 1;
434 *off -= n;
435 return 0;
437 return 1;
440 static int ic_off(struct ic *ic, long iv, long *base_iv, long *off)
442 long n;
443 long oc = O_C(ic[iv].op);
444 if (oc == (O_ADD | O_NUM)) {
445 *base_iv = ic[iv].a1;
446 *off = ic[iv].a2;
447 return 0;
449 if (oc == (O_SUB | O_NUM)) {
450 *base_iv = ic[iv].a1;
451 *off = -ic[iv].a2;
452 return 0;
454 if (oc == O_ADD) {
455 if ((ic_off(ic, ic[iv].a1, base_iv, off) ||
456 ic_num(ic, ic[iv].a2, &n)) &&
457 (ic_off(ic, ic[iv].a2, base_iv, off) ||
458 ic_num(ic, ic[iv].a1, &n)))
459 return 1;
460 *off += n;
461 return 0;
463 if (oc == O_SUB) {
464 if (ic_off(ic, ic[iv].a1, base_iv, off) ||
465 ic_num(ic, ic[iv].a2, &n))
466 return 1;
467 *off -= n;
468 return 0;
470 *base_iv = iv;
471 *off = 0;
472 return 0;
475 /* number of register arguments */
476 int ic_regcnt(struct ic *ic)
478 long o = ic->op;
479 if (o & O_BOP)
480 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
481 if (o & O_UOP)
482 return o & (O_NUM | O_SYM | O_LOC) ? 0 : 1;
483 if (o & O_CALL)
484 return o & (O_NUM | O_SYM | O_LOC) ? 0 : 1;
485 if (o & O_MOV)
486 return o & (O_NUM | O_SYM | O_LOC) ? 0 : 1;
487 if (o & O_MEM)
488 return 3;
489 if (o & O_JMP)
490 return 0;
491 if (o & O_JZ)
492 return 1;
493 if (o & O_JCC)
494 return o & (O_NUM | O_SYM | O_LOC) ? 1 : 2;
495 if (o & O_RET)
496 return 1;
497 if (o & O_LD)
498 return ((o & O_NUM) != 0);
499 if (o & O_ST)
500 return 1 + ((o & O_NUM) != 0);
501 return 0;
505 * The returned array indicates the last instruction in
506 * which the value produced by each instruction is used.
508 long *ic_lastuse(struct ic *ic, long ic_n)
510 long *luse = calloc(ic_n, sizeof(luse[0]));
511 int i, j;
512 for (i = ic_n - 1; i >= 0; --i) {
513 int n = ic_regcnt(ic + i);
514 if (!luse[i])
515 if (!(ic[i].op & O_OUT) || ic[i].op & O_CALL)
516 luse[i] = -1;
517 if (!luse[i])
518 continue;
519 if (n >= 1 && !luse[ic[i].a1])
520 luse[ic[i].a1] = i;
521 if (n >= 2 && !luse[ic[i].a2])
522 luse[ic[i].a2] = i;
523 if (n >= 3 && !luse[ic[i].a3])
524 luse[ic[i].a3] = i;
525 if (ic[i].op & O_CALL)
526 for (j = 0; j < ic[i].a3; j++)
527 if (!luse[ic[i].args[j]])
528 luse[ic[i].args[j]] = i;
530 return luse;
533 /* intermediate code optimisations */
535 /* constant folding */
536 static int io_num(void)
538 long n;
539 if (!ic_num(ic, iv_get(0), &n)) {
540 iv_drop(1);
541 o_num(n);
542 return 0;
544 return 1;
547 static int log2a(unsigned long n)
549 int i = 0;
550 for (i = 0; i < LONGSZ * 8; i++)
551 if (n & (1u << i))
552 break;
553 if (i == LONGSZ * 8 || !(n >> (i + 1)))
554 return i;
555 return -1;
558 static long iv_num(long n)
560 o_num(n);
561 return iv_pop();
564 /* optimised multiplication operations for powers of two */
565 static int io_mul2(void)
567 long iv = iv_get(0);
568 long n, p;
569 long r1, r2;
570 long oc = O_C(ic[iv].op);
571 long bt = O_T(ic[iv].op);
572 if (!(oc & O_MUL))
573 return 1;
574 if (oc == O_MUL && !ic_num(ic, ic[iv].a1, &n)) {
575 long t = ic[iv].a1;
576 ic[iv].a1 = ic[iv].a2;
577 ic[iv].a2 = t;
579 if (ic_num(ic, ic[iv].a2, &n))
580 return 1;
581 p = log2a(n);
582 if (n && p < 0)
583 return 1;
584 if (oc == O_MUL) {
585 iv_drop(1);
586 if (n == 1) {
587 iv_put(ic[iv].a1);
588 return 0;
590 if (n == 0) {
591 o_num(0);
592 return 0;
594 r2 = iv_num(p);
595 ic_put(O_MK(O_SHL, ULNG), ic[iv].a1, r2, 0);
596 return 0;
598 if (oc == O_DIV && ~bt & T_MSIGN) {
599 iv_drop(1);
600 if (n == 1) {
601 iv_put(ic[iv].a1);
602 return 0;
604 r2 = iv_num(p);
605 ic_put(O_MK(O_SHR, ULNG), ic[iv].a1, r2, 0);
606 return 0;
608 if (oc == O_MOD && ~bt & T_MSIGN) {
609 iv_drop(1);
610 if (n == 1) {
611 o_num(0);
612 return 0;
614 r2 = iv_num(LONGSZ * 8 - p);
615 ic_put(O_MK(O_SHL, ULNG), ic[iv].a1, r2, 0);
616 r1 = iv_pop();
617 r2 = iv_num(LONGSZ * 8 - p);
618 ic_put(O_MK(O_SHR, ULNG), r1, r2, 0);
619 return 0;
621 return 1;
624 /* optimise comparison */
625 static int io_cmp(void)
627 long iv = iv_get(0);
628 long cmp = ic[iv].a1;
629 if (O_C(ic[iv].op) == O_LNOT && ic[cmp].op & O_CMP) {
630 iv_drop(1);
631 ic[cmp].op ^= 1;
632 iv_put(cmp);
633 return 0;
635 return 1;
638 /* optimise branch instructions after comparison */
639 static int io_jmp(void)
641 struct ic *c = &ic[ic_n - 1];
642 long oc = O_C(c->op);
643 if (oc & O_JZ && O_C(ic[c->a1].op) == O_LNOT) {
644 c->a1 = ic[c->a1].a1;
645 c->op ^= 1;
646 return 0;
648 if (oc & O_JZ && O_C(ic[c->a1].op) & O_CMP) {
649 long cop = (ic[c->a1].op & ~O_CMP) | O_JCC;
650 c->op = O_C(c->op) == O_JZ ? cop ^ 1 : cop;
651 c->a2 = ic[c->a1].a2;
652 c->a1 = ic[c->a1].a1;
653 return 0;
655 return 1;
658 /* optimise accessing locals or symbols with an offset */
659 static int io_addr(void)
661 long iv, off;
662 if (ic_off(ic, iv_get(0), &iv, &off) || iv == iv_get(0))
663 return 1;
664 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
665 iv_drop(1);
666 ic_put(O_MOV | O_LOC, ic[iv].a1, ic[iv].a2 + off, 0);
667 return 0;
669 if (ic[iv].op & O_MOV && ic[iv].op & O_SYM) {
670 iv_drop(1);
671 ic_put(O_MOV | O_SYM, ic[iv].a1, ic[iv].a2 + off, 0);
672 return 0;
674 return 1;
677 static int imm_ok(long op, long n, int arg)
679 long m[5];
680 if (i_reg(op | O_NUM, m + 0, m + 1, m + 2, m + 3, m + 4))
681 return 0;
682 return i_imm(m[arg], n);
685 /* optimise loading and storing locals */
686 static int io_loc(void)
688 struct ic *c = &ic[ic_n - 1];
689 long iv, off;
690 if (c->op & O_LD && c->op & O_NUM) {
691 if (ic_off(ic, c->a1, &iv, &off))
692 return 1;
693 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
694 c->op = (c->op & ~O_NUM) | O_LOC;
695 c->a1 = ic[iv].a1;
696 c->a2 += ic[iv].a2 + off;
697 return 0;
699 if (imm_ok(c->op, off, 2)) {
700 c->a1 = iv;
701 c->a2 += off;
703 return 0;
705 if (c->op & O_ST && c->op & O_NUM) {
706 if (ic_off(ic, c->a2, &iv, &off))
707 return 1;
708 if (ic[iv].op & O_MOV && ic[iv].op & O_LOC) {
709 c->op = (c->op & ~O_NUM) | O_LOC;
710 c->a2 = ic[iv].a1;
711 c->a3 += ic[iv].a2 + off;
712 return 0;
714 if (imm_ok(c->op, off, 3)) {
715 c->a2 = iv;
716 c->a3 += off;
718 return 0;
720 return 1;
723 /* use instruction immediates */
724 static int io_imm(void)
726 struct ic *c = &ic[ic_n - 1];
727 long oc = O_C(c->op);
728 long n;
729 if (oc & (O_NUM | O_LOC | O_SYM))
730 return 1;
731 if (oc == O_ADD || oc == O_MUL || oc == O_AND || oc == O_OR ||
732 oc == O_XOR || oc == O_EQ || oc == O_NE) {
733 if (!ic_num(ic, c->a1, &n)) {
734 long t = c->a1;
735 c->a1 = c->a2;
736 c->a2 = t;
739 if (oc == O_LT || oc == O_GE || oc == O_LE || oc == O_GT) {
740 if (!ic_num(ic, c->a1, &n)) {
741 int t = c->a1;
742 c->a1 = c->a2;
743 c->a2 = t;
744 c->op ^= 1;
747 if (oc & O_JCC && !ic_num(ic, c->a1, &n)) {
748 int t = c->a1;
749 c->a1 = c->a2;
750 c->a2 = t;
751 c->op ^= 1;
753 if (oc & O_JCC && !ic_num(ic, c->a2, &n) && imm_ok(c->op, n, 1)) {
754 c->op |= O_NUM;
755 c->a2 = n;
756 return 0;
758 if (!(oc & O_BOP) || ic_num(ic, c->a2, &n))
759 return 1;
760 if ((oc == O_ADD || oc == O_SUB || oc & O_SHL) && n == 0) {
761 iv_drop(1);
762 iv_put(c->a1);
763 return 0;
765 if (imm_ok(c->op, n, 2)) {
766 c->op |= O_NUM;
767 c->a2 = n;
768 return 0;
770 return 1;
773 /* calling symbols */
774 static int io_call(void)
776 struct ic *c = &ic[ic_n - 1];
777 long sym, off;
778 if (c->op & O_CALL && !ic_sym(ic, c->a1, &sym, &off) && !off) {
779 c->op |= O_SYM;
780 c->a1 = sym;
781 return 0;
783 return 1;
786 /* remove dead code */
787 static void io_deadcode(void)
789 char *live;
790 long *nidx;
791 long src = 0, dst = 0;
792 int i, j;
793 /* liveness analysis */
794 live = calloc(ic_n, sizeof(live[0]));
795 for (i = ic_n - 1; i >= 0; i--) {
796 int n = ic_regcnt(ic + i);
797 if (!(ic[i].op & O_OUT) || ic[i].op & O_CALL)
798 live[i] = 1;
799 if (!live[i])
800 continue;
801 if (n >= 1)
802 live[ic[i].a1] = 1;
803 if (n >= 2)
804 live[ic[i].a2] = 1;
805 if (n >= 3)
806 live[ic[i].a3] = 1;
807 if (ic[i].op & O_CALL)
808 for (j = 0; j < ic[i].a3; j++)
809 live[ic[i].args[j]] = 1;
811 /* the new indices of intermediate instructions */
812 nidx = calloc(ic_n, sizeof(nidx[0]));
813 while (src < ic_n) {
814 while (src < ic_n && !live[src]) {
815 nidx[src] = dst;
816 ic_free(&ic[src++]);
818 if (src < ic_n) {
819 nidx[src] = dst;
820 if (src != dst)
821 memcpy(ic + dst, ic + src, sizeof(ic[src]));
822 src++;
823 dst++;
826 ic_n = dst;
827 /* adjusting arguments and branch targets */
828 for (i = 0; i < ic_n; i++) {
829 int n = ic_regcnt(ic + i);
830 if (n >= 1)
831 ic[i].a1 = nidx[ic[i].a1];
832 if (n >= 2)
833 ic[i].a2 = nidx[ic[i].a2];
834 if (n >= 3)
835 ic[i].a3 = nidx[ic[i].a3];
836 if (ic[i].op & O_JXX)
837 ic[i].a3 = nidx[ic[i].a3];
838 if (ic[i].op & O_CALL)
839 for (j = 0; j < ic[i].a3; j++)
840 ic[i].args[j] = nidx[ic[i].args[j]];
842 free(live);
843 free(nidx);