gen: back-port cmp+bcc optimization again
[neatcc/cc.git] / gen.c
blob537bc64755d2c1ac13f1e6b352ceb30fd0927b8a
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include "gen.h"
5 #include "out.h"
6 #include "tok.h"
8 #define LOC_REG 0x01
9 #define LOC_MEM 0x02
10 #define LOC_NUM 0x04
11 #define LOC_SYM 0x08
12 #define LOC_LOCAL 0x10
14 #define NREGS 16
16 #define REG_PC 15 /* program counter */
17 #define REG_LR 14 /* link register */
18 #define REG_SP 13 /* stack pointer */
19 #define REG_TMP 12 /* temporary register */
20 #define REG_FP 11 /* frame pointer register */
21 #define REG_DP 10 /* data pointer register */
22 #define REG_RET 0 /* returned value register */
23 #define REG_FORK 0 /* result of conditional branches */
25 #define MIN(a, b) ((a) < (b) ? (a) : (b))
26 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
28 static char cs[SECSIZE]; /* code segment */
29 static int cslen;
30 static char ds[SECSIZE]; /* data segment */
31 static int dslen;
32 static long bsslen; /* bss segment size */
34 static int nogen; /* don't generate code */
35 static long sp;
36 static long func_beg;
37 static long maxsp;
39 #define TMP(i) (((i) < ntmp) ? &tmps[ntmp - 1 - (i)] : NULL)
41 static struct tmp {
42 long addr;
43 char sym[NAMELEN];
44 long off; /* offset from a symbol or a local */
45 unsigned loc; /* variable location */
46 unsigned bt; /* type of address; zero when not a pointer */
47 } tmps[MAXTMP];
48 static int ntmp;
50 static int tmpsp;
52 /* arch-specific functions */
53 static void i_ldr(int l, int rd, int rn, int off, int bt);
54 static void i_mov(int rd, int rn);
55 static void i_add(int op, int rd, int rn, int rm);
56 static void i_shl(int op, int rd, int rm, int rs);
57 static void i_mul(int rd, int rn, int rm);
58 static void i_cmp(int rn, int rm);
59 static int i_decodeable(long imm);
60 static void i_add_imm(int op, int rd, int rn, long n);
61 static void i_shl_imm(int op, int rd, int rn, long n);
62 static void i_cmp_imm(int rn, long n);
63 static void i_add_anyimm(int rd, int rn, long n);
64 static void i_num(int rd, long n);
65 static void i_sym(int rd, char *sym, int off);
66 static void i_set(int op, int rd);
67 static void i_neg(int rd);
68 static void i_not(int rd);
69 static void i_lnot(int rd);
70 static void i_zx(int rd, int bits);
71 static void i_sx(int rd, int bits);
72 static void i_b(long addr);
73 static void i_b_fill(long *dst, int diff);
74 static void i_b_if(long addr, int rn, int z);
75 static void i_memcpy(int rd, int rs, int rn);
76 static void i_memset(int rd, int rs, int rn);
77 static void i_call(char *sym, int off);
78 static void i_call_reg(int rd);
79 static void i_prolog(void);
80 static void i_epilog(void);
82 static struct tmp *regs[NREGS];
83 static int tmpregs[] = {4, 5, 6, 7, 8, 9, 0, 1, 2, 3};
84 static int argregs[] = {0, 1, 2, 3};
86 #define MAXRET (1 << 8)
88 static long ret[MAXRET];
89 static int nret;
91 /* output div/mod functions */
92 static int putdiv = 0;
94 static void os(void *s, int n)
96 memcpy(cs + cslen, s, n);
97 cslen += n;
100 static void oi(long n)
102 if (nogen)
103 return;
104 *(int *) (cs + cslen) = n;
105 cslen += 4;
108 static long sp_push(int size)
110 sp += size;
111 if (sp > maxsp)
112 maxsp = sp;
113 return sp;
116 static void tmp_mem(struct tmp *tmp)
118 int src = tmp->addr;
119 if (!(tmp->loc == LOC_REG))
120 return;
121 if (tmpsp == -1)
122 tmpsp = sp;
123 tmp->addr = -sp_push(LONGSZ);
124 i_ldr(0, src, REG_FP, tmp->addr, LONGSZ);
125 regs[src] = NULL;
126 tmp->loc = LOC_MEM;
129 static void num_cast(struct tmp *t, unsigned bt)
131 if (!(bt & BT_SIGNED) && BT_SZ(bt) != LONGSZ)
132 t->addr &= ((1l << (long) (BT_SZ(bt) * 8)) - 1);
133 if (bt & BT_SIGNED && BT_SZ(bt) != LONGSZ &&
134 t->addr > (1l << (BT_SZ(bt) * 8 - 1)))
135 t->addr = -((1l << (BT_SZ(bt) * 8)) - t->addr);
138 static void tmp_reg(struct tmp *tmp, int dst, int deref)
140 int bt = tmp->bt;
141 if (!tmp->bt)
142 deref = 0;
143 if (deref)
144 tmp->bt = 0;
145 if (tmp->loc == LOC_NUM) {
146 i_num(dst, tmp->addr);
147 tmp->addr = dst;
148 regs[dst] = tmp;
149 tmp->loc = LOC_REG;
151 if (tmp->loc == LOC_SYM) {
152 i_sym(dst, tmp->sym, tmp->off);
153 tmp->addr = dst;
154 regs[dst] = tmp;
155 tmp->loc = LOC_REG;
157 if (tmp->loc == LOC_REG) {
158 if (deref)
159 i_ldr(1, dst, tmp->addr, 0, bt);
160 else if (dst != tmp->addr)
161 i_mov(dst, tmp->addr);
162 regs[tmp->addr] = NULL;
164 if (tmp->loc == LOC_LOCAL) {
165 if (deref)
166 i_ldr(1, dst, REG_FP, tmp->addr + tmp->off, bt);
167 else
168 i_add_anyimm(dst, REG_FP, tmp->addr + tmp->off);
170 if (tmp->loc == LOC_MEM) {
171 i_ldr(1, dst, REG_FP, tmp->addr, LONGSZ);
172 if (deref)
173 i_ldr(1, dst, dst, 0, bt);
175 tmp->addr = dst;
176 regs[dst] = tmp;
177 tmp->loc = LOC_REG;
180 static void reg_free(int reg)
182 int i;
183 if (!regs[reg])
184 return;
185 for (i = 0; i < ARRAY_SIZE(tmpregs); i++)
186 if (!regs[tmpregs[i]]) {
187 tmp_reg(regs[reg], tmpregs[i], 0);
188 return;
190 tmp_mem(regs[reg]);
193 static void reg_for(int reg, struct tmp *t)
195 if (regs[reg] && regs[reg] != t)
196 reg_free(reg);
199 static void tmp_mv(struct tmp *t, int reg)
201 reg_for(reg, t);
202 tmp_reg(t, reg, 0);
205 static void tmp_to(struct tmp *t, int reg)
207 reg_for(reg, t);
208 tmp_reg(t, reg, 1);
211 static void tmp_drop(int n)
213 int i;
214 for (i = ntmp - n; i < ntmp; i++)
215 if (tmps[i].loc == LOC_REG)
216 regs[tmps[i].addr] = NULL;
217 ntmp -= n;
220 static void tmp_pop(int reg)
222 struct tmp *t = TMP(0);
223 tmp_to(t, reg);
224 tmp_drop(1);
227 static struct tmp *tmp_new(void)
229 return &tmps[ntmp++];
232 static void tmp_push(int reg)
234 struct tmp *t = tmp_new();
235 t->addr = reg;
236 t->bt = 0;
237 t->loc = LOC_REG;
238 regs[reg] = t;
241 void o_local(long addr)
243 struct tmp *t = tmp_new();
244 t->addr = -addr;
245 t->loc = LOC_LOCAL;
246 t->bt = 0;
247 t->off = 0;
250 void o_num(long num)
252 struct tmp *t = tmp_new();
253 t->addr = num;
254 t->bt = 0;
255 t->loc = LOC_NUM;
258 void o_sym(char *name)
260 struct tmp *t = tmp_new();
261 strcpy(t->sym, name);
262 t->loc = LOC_SYM;
263 t->bt = 0;
264 t->off = 0;
267 void o_tmpdrop(int n)
269 if (n == -1 || n > ntmp)
270 n = ntmp;
271 tmp_drop(n);
272 if (!ntmp) {
273 if (tmpsp != -1)
274 sp = tmpsp;
275 tmpsp = -1;
279 /* make sure tmps remain intact after a conditional expression */
280 void o_fork(void)
282 int i;
283 for (i = 0; i < ntmp - 1; i++)
284 tmp_mem(&tmps[i]);
287 void o_forkpush(void)
289 tmp_pop(REG_FORK);
292 void o_forkjoin(void)
294 tmp_push(REG_FORK);
297 void o_tmpswap(void)
299 struct tmp *t1 = TMP(0);
300 struct tmp *t2 = TMP(1);
301 struct tmp t;
302 memcpy(&t, t1, sizeof(t));
303 memcpy(t1, t2, sizeof(t));
304 memcpy(t2, &t, sizeof(t));
305 if (t1->loc == LOC_REG)
306 regs[t1->addr] = t1;
307 if (t2->loc == LOC_REG)
308 regs[t2->addr] = t2;
311 static int reg_get(int mask)
313 int i;
314 for (i = 0; i < ARRAY_SIZE(tmpregs); i++)
315 if ((1 << tmpregs[i]) & mask && !regs[tmpregs[i]])
316 return tmpregs[i];
317 for (i = 0; i < ARRAY_SIZE(tmpregs); i++)
318 if ((1 << tmpregs[i]) & mask) {
319 reg_free(tmpregs[i]);
320 return tmpregs[i];
322 return 0;
325 static int reg_fortmp(struct tmp *t, int notmask)
327 if (t->loc == LOC_REG && !(notmask & (1 << t->addr)))
328 return t->addr;
329 return reg_get(~notmask);
332 static void tmp_copy(struct tmp *t1)
334 struct tmp *t2 = tmp_new();
335 memcpy(t2, t1, sizeof(*t1));
336 if (!(t1->loc & (LOC_REG | LOC_MEM)))
337 return;
338 if (t1->loc == LOC_MEM) {
339 tmp_mv(t2, reg_get(~0));
340 } else if (t1->loc == LOC_REG) {
341 t2->addr = reg_fortmp(t2, 1 << t1->addr);
342 i_mov(t2->addr, t1->addr);
343 regs[t2->addr] = t2;
347 void o_tmpcopy(void)
349 tmp_copy(TMP(0));
352 void o_cast(unsigned bt)
354 struct tmp *t = TMP(0);
355 if (!t->bt && t->loc == LOC_NUM) {
356 num_cast(t, bt);
357 return;
359 if (BT_SZ(bt) != LONGSZ) {
360 int reg = reg_fortmp(t, 0);
361 tmp_to(t, reg);
362 if (bt & BT_SIGNED)
363 i_sx(reg, BT_SZ(bt) * 8);
364 else
365 i_zx(reg, BT_SZ(bt) * 8);
369 void o_func_beg(char *name, int argc, int global, int vararg)
371 out_sym(name, (global ? OUT_GLOB : 0) | OUT_CS, cslen, 0);
372 i_prolog();
373 sp = 0;
374 maxsp = sp;
375 ntmp = 0;
376 tmpsp = -1;
377 nret = 0;
378 memset(regs, 0, sizeof(regs));
381 void o_deref(unsigned bt)
383 struct tmp *t = TMP(0);
384 if (t->bt)
385 tmp_to(t, reg_fortmp(t, 0));
386 t->bt = bt;
389 void o_load(void)
391 struct tmp *t = TMP(0);
392 tmp_to(t, reg_fortmp(t, 0));
395 #define TMP_NUM(t) ((t)->loc == LOC_NUM && !(t)->bt)
396 #define LOCAL_PTR(t) ((t)->loc == LOC_LOCAL && !(t)->bt)
397 #define SYM_PTR(t) ((t)->loc == LOC_SYM && !(t)->bt)
399 int o_popnum(long *c)
401 struct tmp *t = TMP(0);
402 if (!TMP_NUM(t))
403 return 1;
404 *c = t->addr;
405 tmp_drop(1);
406 return 0;
409 void o_ret(int rets)
411 if (rets)
412 tmp_pop(REG_RET);
413 else
414 i_num(REG_RET, 0);
415 ret[nret++] = o_jmp(0);
418 void o_func_end(void)
420 int i;
421 for (i = 0; i < nret; i++)
422 o_filljmp(ret[i]);
423 i_epilog();
426 long o_mklocal(int size)
428 return sp_push(ALIGN(size, LONGSZ));
431 void o_rmlocal(long addr, int sz)
433 sp = addr - sz;
436 long o_arg2loc(int i)
438 return -(10 + i) << 2;
441 void o_assign(unsigned bt)
443 struct tmp *t1 = TMP(0);
444 struct tmp *t2 = TMP(1);
445 int r1 = reg_fortmp(t1, 0);
446 int r2 = reg_fortmp(t2, 1 << r1);
447 int off = 0;
448 tmp_to(t1, r1);
449 if (t2->bt)
450 tmp_to(t2, r2);
451 if (t2->loc == LOC_LOCAL) {
452 r2 = REG_FP;
453 off = t2->addr + t2->off;
454 } else {
455 tmp_to(t2, r2);
457 tmp_drop(2);
458 i_ldr(0, r1, r2, off, bt);
459 tmp_push(r1);
462 static long cu(int op, long i)
464 switch (op & 0xff) {
465 case O_NEG:
466 return -i;
467 case O_NOT:
468 return ~i;
469 case O_LNOT:
470 return !i;
472 return 0;
475 static int c_uop(int op)
477 struct tmp *t1 = TMP(0);
478 if (!TMP_NUM(t1))
479 return 1;
480 tmp_drop(1);
481 o_num(cu(op, t1->addr));
482 return 0;
485 static long cb(int op, long a, long b)
487 switch (op & 0xff) {
488 case O_ADD:
489 return a + b;
490 case O_SUB:
491 return a - b;
492 case O_AND:
493 return a & b;
494 case O_OR:
495 return a | b;
496 case O_XOR:
497 return a ^ b;
498 case O_MUL:
499 return a * b;
500 case O_DIV:
501 return a / b;
502 case O_MOD:
503 return a % b;
504 case O_SHL:
505 return a << b;
506 case O_SHR:
507 if (op & O_SIGNED)
508 return a >> b;
509 else
510 return (unsigned long) a >> b;
511 case O_LT:
512 return a < b;
513 case O_GT:
514 return a > b;
515 case O_LE:
516 return a <= b;
517 case O_GE:
518 return a >= b;
519 case O_EQ:
520 return a == b;
521 case O_NEQ:
522 return a != b;
524 return 0;
527 static int c_bop(int op)
529 struct tmp *t1 = TMP(0);
530 struct tmp *t2 = TMP(1);
531 int locals = LOCAL_PTR(t1) + LOCAL_PTR(t2);
532 int syms = SYM_PTR(t1) + SYM_PTR(t2);
533 int nums = TMP_NUM(t1) + TMP_NUM(t2);
534 if (syms + locals == 2 || syms + nums + locals != 2)
535 return 1;
536 if (nums == 1)
537 if ((op & 0xff) != O_ADD && ((op & 0xff) != O_SUB || TMP_NUM(t2)))
538 return 1;
539 if (nums == 1) {
540 long o1 = TMP_NUM(t1) ? t1->addr : t1->off;
541 long o2 = TMP_NUM(t2) ? t2->addr : t2->off;
542 long ret = cb(op, o2, o1);
543 if (!TMP_NUM(t1))
544 o_tmpswap();
545 t2->off = ret;
546 tmp_drop(1);
547 } else {
548 long ret = cb(op, t2->addr, t1->addr);
549 tmp_drop(2);
550 o_num(ret);
552 return 0;
555 void o_uop(int op)
557 int r1 = reg_fortmp(TMP(0), 0);
558 if (!c_uop(op))
559 return;
560 tmp_to(TMP(0), r1);
561 switch (op & 0xff) {
562 case O_NEG:
563 i_neg(r1);
564 break;
565 case O_NOT:
566 i_not(r1);
567 break;
568 case O_LNOT:
569 i_lnot(r1);
570 break;
574 static void bin_regs(int *r1, int *r2)
576 struct tmp *t2 = TMP(0);
577 struct tmp *t1 = TMP(1);
578 *r2 = reg_fortmp(t2, 0);
579 tmp_to(t2, *r2);
580 *r1 = reg_fortmp(t1, 1 << *r2);
581 tmp_pop(*r2);
582 tmp_pop(*r1);
585 static int bop_imm(int *r1, long *n, int swap)
587 struct tmp *t1 = TMP(0);
588 struct tmp *t2 = TMP(1);
589 if (!TMP_NUM(t1) && (!swap || !TMP_NUM(t2)))
590 return 1;
591 *n = TMP_NUM(t1) ? t1->addr : t2->addr;
592 if (!i_decodeable(*n))
593 return 1;
594 if (!TMP_NUM(t1))
595 o_tmpswap();
596 *r1 = reg_fortmp(t2, 0);
597 tmp_drop(1);
598 tmp_pop(*r1);
599 return 0;
602 static void bin_add(int op)
604 int r1, r2;
605 long n;
606 if (!bop_imm(&r1, &n, (op & 0xff) != O_SUB)) {
607 i_add_imm(op, r1, r1, n);
608 } else {
609 bin_regs(&r1, &r2);
610 i_add(op, r1, r1, r2);
612 tmp_push(r1);
615 static void bin_shx(int op)
617 int r1, r2;
618 long n;
619 if (!bop_imm(&r1, &n, 0)) {
620 i_shl_imm(op, r1, r1, n);
621 } else {
622 bin_regs(&r1, &r2);
623 i_shl(op, r1, r1, r2);
625 tmp_push(r1);
628 static int log2a(unsigned long n)
630 int i = 0;
631 for (i = 0; i < LONGSZ * 8; i++)
632 if (n & (1u << i))
633 break;
634 if (i == LONGSZ * 8 || !(n >> (i + 1)))
635 return i;
636 return -1;
639 /* optimized version of mul/div/mod for powers of two */
640 static int mul_2(int op)
642 struct tmp *t1 = TMP(0);
643 struct tmp *t2 = TMP(1);
644 long n;
645 int r2;
646 int p;
647 if ((op & 0xff) == O_MUL && t2->loc == LOC_NUM && !t2->bt)
648 o_tmpswap();
649 if (t1->loc != LOC_NUM || t1->bt)
650 return 1;
651 n = t1->addr;
652 p = log2a(n);
653 if (n && p == -1)
654 return 1;
655 if ((op & 0xff) == O_MUL) {
656 tmp_drop(1);
657 if (n == 1)
658 return 0;
659 if (n == 0) {
660 tmp_drop(1);
661 o_num(0);
662 return 0;
664 r2 = reg_fortmp(t2, 0);
665 tmp_to(t2, r2);
666 i_shl_imm(O_SHL, r2, r2, p);
667 return 0;
669 if (op == O_DIV) {
670 tmp_drop(1);
671 if (n == 1)
672 return 0;
673 r2 = reg_fortmp(t2, 0);
674 tmp_to(t2, r2);
675 i_shl_imm((op & O_SIGNED) | O_SHR, r2, r2, p);
676 return 0;
678 if (op == O_MOD) {
679 tmp_drop(1);
680 if (n == 1) {
681 tmp_drop(1);
682 o_num(0);
683 return 0;
685 r2 = reg_fortmp(t2, 0);
686 tmp_to(t2, r2);
687 i_zx(r2, p);
688 return 0;
690 return 1;
693 static void bin_div(int op)
695 struct tmp *t2 = TMP(0);
696 struct tmp *t1 = TMP(1);
697 char *func;
698 int i;
699 putdiv = 1;
700 if ((op & 0xff) == O_DIV)
701 func = op & O_SIGNED ? "__divdi3" : "__udivdi3";
702 else
703 func = op & O_SIGNED ? "__moddi3" : "__umoddi3";
704 for (i = 0; i < ARRAY_SIZE(argregs); i++)
705 if (regs[argregs[i]] && regs[argregs[i]] - tmps < ntmp - 2)
706 tmp_mem(regs[argregs[i]]);
707 tmp_to(t1, argregs[0]);
708 tmp_to(t2, argregs[1]);
709 tmp_drop(2);
710 i_call(func, 0);
711 tmp_push(REG_RET);
714 static void bin_mul(int op)
716 int r1, r2;
717 if (!mul_2(op))
718 return;
719 if ((op & 0xff) == O_DIV || (op & 0xff) == O_MOD) {
720 bin_div(op);
721 } else {
722 bin_regs(&r1, &r2);
723 i_mul(r1, r1, r2);
724 tmp_push(r1);
728 static void bin_cmp(int op)
730 int r1, r2;
731 long n;
732 if (!bop_imm(&r1, &n, (op & 0xff) == O_EQ || (op & 0xff) == O_NEQ)) {
733 i_cmp_imm(r1, n);
734 } else {
735 bin_regs(&r1, &r2);
736 i_cmp(r1, r2);
738 i_set(op, r1);
739 tmp_push(r1);
742 void o_bop(int op)
744 if (!c_bop(op))
745 return;
746 if ((op & 0xf0) == 0x00)
747 bin_add(op);
748 if ((op & 0xf0) == 0x10)
749 bin_shx(op);
750 if ((op & 0xf0) == 0x20)
751 bin_mul(op);
752 if ((op & 0xf0) == 0x30)
753 bin_cmp(op);
756 static void load_regs2(int *r0, int *r1, int *r2)
758 struct tmp *t0 = TMP(0);
759 struct tmp *t1 = TMP(1);
760 struct tmp *t2 = TMP(2);
761 *r0 = reg_fortmp(t0, 0);
762 *r1 = reg_fortmp(t1, 1 << *r0);
763 *r2 = reg_fortmp(t2, (1 << *r0) | (1 << *r1));
764 tmp_to(t0, *r0);
765 tmp_to(t1, *r1);
766 tmp_to(t2, *r2);
769 void o_memcpy(void)
771 int rd, rs, rn;
772 load_regs2(&rn, &rs, &rd);
773 i_memcpy(rd, rs, rn);
774 tmp_drop(2);
777 void o_memset(void)
779 int rd, rs, rn;
780 load_regs2(&rn, &rs, &rd);
781 i_memset(rd, rs, rn);
782 tmp_drop(2);
785 long o_mklabel(void)
787 return cslen;
790 static long jxz(long addr, int z)
792 int r = reg_fortmp(TMP(0), 0);
793 tmp_pop(r);
794 i_b_if(addr, r, z);
795 return cslen - 4;
798 long o_jz(long addr)
800 return jxz(addr, 1);
803 long o_jnz(long addr)
805 return jxz(addr, 0);
808 long o_jmp(long addr)
810 i_b(addr);
811 return cslen - 4;
814 void o_filljmp2(long addr, long jmpdst)
816 i_b_fill((void *) cs + addr, jmpdst - addr);
819 void o_filljmp(long addr)
821 o_filljmp2(addr, cslen);
824 void o_call(int argc, int rets)
826 struct tmp *t;
827 int i;
828 int aregs = MIN(ARRAY_SIZE(argregs), argc);
829 for (i = 0; i < ARRAY_SIZE(argregs); i++)
830 if (regs[argregs[i]] && regs[argregs[i]] - tmps < ntmp - argc)
831 tmp_mem(regs[argregs[i]]);
832 if (argc > aregs) {
833 sp_push(LONGSZ * (argc - aregs));
834 for (i = argc - 1; i >= aregs; --i) {
835 int reg = reg_fortmp(TMP(0), 0);
836 tmp_pop(reg);
837 i_ldr(0, reg, REG_SP, (i - aregs) * LONGSZ, LONGSZ);
840 for (i = aregs - 1; i >= 0; --i)
841 tmp_to(TMP(aregs - i - 1), argregs[i]);
842 tmp_drop(aregs);
843 t = TMP(0);
844 if (t->loc == LOC_SYM && !t->bt) {
845 i_call(t->sym, t->off);
846 tmp_drop(1);
847 } else {
848 int reg = t->loc == LOC_REG ? t->addr : REG_TMP;
849 tmp_pop(reg);
850 i_call_reg(reg);
852 if (rets)
853 tmp_push(REG_RET);
856 void o_nogen(void)
858 nogen++;
861 void o_dogen(void)
863 nogen--;
866 void o_mkbss(char *name, int size, int global)
868 out_sym(name, OUT_BSS | (global ? OUT_GLOB : 0), bsslen, size);
869 bsslen += ALIGN(size, LONGSZ);
872 #define MAXDATS (1 << 10)
873 static char dat_names[MAXDATS][NAMELEN];
874 static int dat_offs[MAXDATS];
875 static int ndats;
877 void err(char *msg);
878 void *o_mkdat(char *name, int size, int global)
880 void *addr = ds + dslen;
881 int idx = ndats++;
882 if (idx >= MAXDATS)
883 err("nomem: MAXDATS reached!\n");
884 strcpy(dat_names[idx], name);
885 dat_offs[idx] = dslen;
886 out_sym(name, OUT_DS | (global ? OUT_GLOB : 0), dslen, size);
887 dslen += ALIGN(size, LONGSZ);
888 return addr;
891 static int dat_off(char *name)
893 int i;
894 for (i = 0; i < ndats; i++)
895 if (!strcmp(name, dat_names[i]))
896 return dat_offs[i];
897 return 0;
900 void o_datset(char *name, int off, unsigned bt)
902 struct tmp *t = TMP(0);
903 int sym_off = dat_off(name) + off;
904 if (t->loc == LOC_NUM && !t->bt) {
905 num_cast(t, bt);
906 memcpy(ds + sym_off, &t->addr, BT_SZ(bt));
908 if (t->loc == LOC_SYM && !t->bt) {
909 out_rel(t->sym, OUT_DS, sym_off);
910 memcpy(ds + sym_off, &t->off, BT_SZ(bt));
912 tmp_drop(1);
915 /* compiled division functions; div.s contains the source */
916 static int udivdi3[] = {
917 0xe3a02000, 0xe3a03000, 0xe1110001, 0x0a00000a,
918 0xe1b0c211, 0xe2822001, 0x5afffffc, 0xe3a0c001,
919 0xe2522001, 0x4a000004, 0xe1500211, 0x3afffffb,
920 0xe0400211, 0xe083321c, 0xeafffff8, 0xe1a01000,
921 0xe1a00003, 0xe1a0f00e,
923 static int umoddi3[] = {
924 0xe92d4000, 0xebffffeb, 0xe1a00001, 0xe8bd8000,
926 static int divdi3[] = {
927 0xe92d4030, 0xe1a04000, 0xe1a05001, 0xe1100000,
928 0x42600000, 0xe1110001, 0x42611000, 0xebffffe1,
929 0xe1340005, 0x42600000, 0xe1140004, 0x42611000,
930 0xe8bd8030,
932 static int moddi3[] = {
933 0xe92d4000, 0xebfffff0, 0xe1a00001, 0xe8bd8000,
936 void o_write(int fd)
938 if (putdiv) {
939 out_sym("__udivdi3", OUT_CS, cslen, 0);
940 os(udivdi3, sizeof(udivdi3));
941 out_sym("__umoddi3", OUT_CS, cslen, 0);
942 os(umoddi3, sizeof(umoddi3));
943 out_sym("__divdi3", OUT_CS, cslen, 0);
944 os(divdi3, sizeof(divdi3));
945 out_sym("__moddi3", OUT_CS, cslen, 0);
946 os(moddi3, sizeof(moddi3));
948 out_write(fd, cs, cslen, ds, dslen);
951 /* ARM arch specific functions */
953 #define I_AND 0x00
954 #define I_EOR 0x01
955 #define I_SUB 0x02
956 #define I_RSB 0x03
957 #define I_ADD 0x04
958 #define I_TST 0x08
959 #define I_CMP 0x0a
960 #define I_ORR 0x0c
961 #define I_MOV 0x0d
962 #define I_MVN 0x0f
964 /* for optimizing cmp + bcc */
965 #define OPT_ISCMP() (last_cmp + 12 == cslen && last_set + 4 == cslen)
966 #define OPT_CCOND() (*(unsigned int *) ((void *) cs + last_set) >> 28)
968 static long last_cmp = -1;
969 static long last_set = -1;
971 #define MAXNUMS 1024
973 /* data pool */
974 static long num_offs[MAXNUMS]; /* data immediate value */
975 static char num_names[MAXNUMS][NAMELEN]; /* relocation data symbol name */
976 static int nums;
978 static int pool_find(char *name, int off)
980 int i;
981 for (i = 0; i < nums; i++)
982 if (!strcmp(name, num_names[i]) && off == num_offs[i])
983 return i;
984 return -1;
987 static int pool_num(long num)
989 int idx = pool_find("", num);
990 if (idx < 0) {
991 idx = nums++;
992 num_offs[idx] = num;
993 num_names[idx][0] = '\0';
995 return idx << 2;
998 static int pool_reloc(char *name, long off)
1000 int idx = pool_find(name, off);
1001 if (idx < 0) {
1002 idx = nums++;
1003 num_offs[idx] = off;
1004 strcpy(num_names[idx], name);
1006 return idx << 2;
1009 static void pool_write(void)
1011 int i;
1012 for (i = 0; i < nums; i++) {
1013 if (num_names[i])
1014 out_rel(num_names[i], OUT_CS, cslen);
1015 oi(num_offs[i]);
1020 * data processing:
1021 * +---------------------------------------+
1022 * |COND|00|I| op |S| Rn | Rd | operand2 |
1023 * +---------------------------------------+
1025 * S: set condition code
1026 * Rn: first operand
1027 * Rd: destination operand
1029 * I=0 operand2=| shift | Rm |
1030 * I=1 operand2=|rota| imm |
1032 #define ADD(op, rd, rn, s, i, cond) \
1033 (((cond) << 28) | ((i) << 25) | ((s) << 20) | \
1034 ((op) << 21) | ((rn) << 16) | ((rd) << 12))
1036 static int add_encimm(unsigned n)
1038 int i = 0;
1039 while (i < 12 && (n >> ((4 + i) << 1)))
1040 i++;
1041 return (n >> (i << 1)) | (((16 - i) & 0x0f) << 8);
1044 static unsigned add_decimm(int n)
1046 int rot = (16 - ((n >> 8) & 0x0f)) & 0x0f;
1047 return (n & 0xff) << (rot << 1);
1050 static int add_rndimm(unsigned n)
1052 int rot = (n >> 8) & 0x0f;
1053 int num = n & 0xff;
1054 if (rot == 0)
1055 return n;
1056 if (num == 0xff) {
1057 num = 0;
1058 rot = (rot + 12) & 0x0f;
1060 return ((num + 1) & 0xff) | (rot << 8);
1063 static int opcode_add(int op)
1065 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
1066 static int rx[] = {I_ADD, I_SUB, I_AND, I_ORR, I_EOR};
1067 return rx[op & 0x0f];
1070 static void i_add(int op, int rd, int rn, int rm)
1072 oi(ADD(opcode_add(op), rd, rn, 0, 0, 14) | rm);
1075 static int i_decodeable(long imm)
1077 return add_decimm(add_encimm(imm)) == imm;
1080 static void i_add_imm(int op, int rd, int rn, long n)
1082 oi(ADD(opcode_add(op), rd, rn, 0, 1, 14) | add_encimm(n));
1085 static void i_num(int rd, long n)
1087 int enc = add_encimm(n);
1088 if (n == add_decimm(enc)) {
1089 oi(ADD(I_MOV, rd, 0, 0, 1, 14) | enc);
1090 return;
1092 enc = add_encimm(-n - 1);
1093 if (~n == add_decimm(enc)) {
1094 oi(ADD(I_MVN, rd, 0, 0, 1, 14) | enc);
1095 return;
1097 if (!nogen) {
1098 int off = pool_num(n);
1099 i_ldr(1, rd, REG_DP, off, LONGSZ);
1103 static void i_add_anyimm(int rd, int rn, long n)
1105 int neg = n < 0;
1106 int imm = add_encimm(neg ? -n : n);
1107 if (imm == add_decimm(neg ? -n : n)) {
1108 oi(ADD(neg ? I_SUB : I_ADD, rd, rn, 0, 1, 14) | imm);
1109 } else {
1110 i_num(rd, n);
1111 i_add(O_ADD, rd, rd, rn);
1116 * multiply
1117 * +----------------------------------------+
1118 * |COND|000000|A|S| Rd | Rn | Rs |1001| Rm |
1119 * +----------------------------------------+
1121 * Rd: destination
1122 * A: accumulate
1123 * C: set condition codes
1125 * I=0 operand2=| shift | Rm |
1126 * I=1 operand2=|rota| imm |
1128 #define MUL(rd, rn, rs) \
1129 ((14 << 28) | ((rd) << 16) | ((0) << 12) | ((rn) << 8) | ((9) << 4) | (rm))
1131 static void i_mul(int rd, int rn, int rm)
1133 oi(MUL(rd, rn, rm));
1136 static int opcode_set(int op)
1138 /* lt, gt, le, ge, eq, neq */
1139 static int ucond[] = {3, 8, 9, 2, 0, 1};
1140 static int scond[] = {11, 12, 13, 10, 0, 1};
1141 return op & O_SIGNED ? scond[op & 0x0f] : ucond[op & 0x0f];
1144 static void i_tst(int rn, int rm)
1146 oi(ADD(I_TST, 0, rn, 1, 0, 14) | rm);
1149 static void i_cmp(int rn, int rm)
1151 last_cmp = cslen;
1152 oi(ADD(I_CMP, 0, rn, 1, 0, 14) | rm);
1155 static void i_cmp_imm(int rn, long n)
1157 last_cmp = cslen;
1158 oi(ADD(I_CMP, 0, rn, 1, 1, 14) | add_encimm(n));
1161 static void i_set(int cond, int rd)
1163 oi(ADD(I_MOV, rd, 0, 0, 1, 14));
1164 last_set = cslen;
1165 oi(ADD(I_MOV, rd, 0, 0, 1, opcode_set(cond)) | 1);
1168 #define SM_LSL 0
1169 #define SM_LSR 1
1170 #define SM_ASR 2
1172 static int opcode_shl(int op)
1174 if (op & 0x0f)
1175 return op & O_SIGNED ? SM_ASR : SM_LSR;
1176 return SM_LSL;
1179 static void i_shl(int op, int rd, int rm, int rs)
1181 int sm = opcode_shl(op);
1182 oi(ADD(I_MOV, rd, 0, 0, 0, 14) | (rs << 8) | (sm << 5) | (1 << 4) | rm);
1185 static void i_shl_imm(int op, int rd, int rn, long n)
1187 int sm = opcode_shl(op);
1188 oi(ADD(I_MOV, rd, 0, 0, 0, 14) | (n << 7) | (sm << 5) | rn);
1191 static void i_mov(int rd, int rn)
1193 oi(ADD(I_MOV, rd, 0, 0, 0, 14) | rn);
1197 * single data transfer:
1198 * +------------------------------------------+
1199 * |COND|01|I|P|U|B|W|L| Rn | Rd | offset |
1200 * +------------------------------------------+
1202 * I: immediate/offset
1203 * P: post/pre indexing
1204 * U: down/up
1205 * B: byte/word
1206 * W: writeback
1207 * L: store/load
1208 * Rn: base register
1209 * Rd: source/destination register
1211 * I=0 offset=| immediate |
1212 * I=1 offset=| shift | Rm |
1214 * halfword and signed data transfer
1215 * +----------------------------------------------+
1216 * |COND|000|P|U|0|W|L| Rn | Rd |0000|1|S|H|1| Rm |
1217 * +----------------------------------------------+
1219 * +----------------------------------------------+
1220 * |COND|000|P|U|1|W|L| Rn | Rd |off1|1|S|H|1|off2|
1221 * +----------------------------------------------+
1223 * S: singed
1224 * H: halfword
1226 #define LDR(l, rd, rn, b, u, p, w) \
1227 ((14 << 28) | (1 << 26) | ((p) << 24) | ((b) << 22) | ((u) << 23) | \
1228 ((w) << 21) | ((l) << 20) | ((rn) << 16) | ((rd) << 12))
1229 #define LDRH(l, rd, rn, s, h, u, i) \
1230 ((14 << 28) | (1 << 24) | ((u) << 23) | ((i) << 22) | ((l) << 20) | \
1231 ((rn) << 16) | ((rd) << 12) | ((s) << 6) | ((h) << 5) | (9 << 4))
1233 static void i_ldr(int l, int rd, int rn, int off, int bt)
1235 int b = BT_SZ(bt) == 1;
1236 int h = BT_SZ(bt) == 2;
1237 int s = l && (bt & BT_SIGNED);
1238 int half = h || (b && s);
1239 int maximm = half ? 0x100 : 0x1000;
1240 int neg = off < 0;
1241 if (neg)
1242 off = -off;
1243 while (off >= maximm) {
1244 int imm = add_encimm(off);
1245 oi(ADD(neg ? I_SUB : I_ADD, REG_TMP, rn, 0, 1, 14) | imm);
1246 rn = REG_TMP;
1247 off -= add_decimm(imm);
1249 if (!half)
1250 oi(LDR(l, rd, rn, b, !neg, 1, 0) | off);
1251 else
1252 oi(LDRH(l, rd, rn, s, h, !neg, 1) |
1253 ((off & 0xf0) << 4) | (off & 0x0f));
1256 static void i_sym(int rd, char *sym, int off)
1258 if (!nogen) {
1259 int doff = pool_reloc(sym, off);
1260 i_ldr(1, rd, REG_DP, doff, LONGSZ);
1264 static void i_neg(int rd)
1266 oi(ADD(I_RSB, rd, rd, 0, 1, 14));
1269 static void i_not(int rd)
1271 oi(ADD(I_MVN, rd, 0, 0, 0, 14) | rd);
1274 static int cond_nots[] = {1, 0, 3, 2, -1, -1, -1, -1, 9, 8, 11, 10, 13, 12, -1};
1276 static void i_lnot(int rd)
1278 if (OPT_ISCMP()) {
1279 unsigned int *lset = (void *) cs + last_set;
1280 int cond = cond_nots[OPT_CCOND()];
1281 *lset = (*lset & 0x0fffffff) | (cond << 28);
1282 return;
1284 i_tst(rd, rd);
1285 i_set(O_EQ, rd);
1288 /* rd = rd & ((1 << bits) - 1) */
1289 static void i_zx(int rd, int bits)
1291 if (bits <= 8) {
1292 oi(ADD(I_AND, rd, rd, 0, 1, 14) | add_encimm((1 << bits) - 1));
1293 } else {
1294 i_shl_imm(O_SHL, rd, rd, 32 - bits);
1295 i_shl_imm(O_SHR, rd, rd, 32 - bits);
1299 static void i_sx(int rd, int bits)
1301 i_shl_imm(O_SHL, rd, rd, 32 - bits);
1302 i_shl_imm(O_SIGNED | O_SHR, rd, rd, 32 - bits);
1306 * branch:
1307 * +-----------------------------------+
1308 * |COND|101|L| offset |
1309 * +-----------------------------------+
1311 * L: link
1313 #define BL(cond, l, o) (((cond) << 28) | (5 << 25) | ((l) << 24) | \
1314 ((((o) - 8) >> 2) & 0x00ffffff))
1316 static void i_b(long addr)
1318 oi(BL(14, 0, addr - cslen));
1321 static void i_b_if(long addr, int rn, int z)
1323 if (OPT_ISCMP()) {
1324 int cond = OPT_CCOND();
1325 cslen = last_cmp + 4;
1326 last_set = -1;
1327 oi(BL(z ? cond_nots[cond] : cond, 0, addr - cslen));
1328 return;
1330 i_tst(rn, rn);
1331 oi(BL(z ? 0 : 1, 0, addr - cslen));
1334 static void i_b_fill(long *dst, int diff)
1336 *dst = (*dst & 0xff000000) | (((diff - 8) >> 2) & 0x00ffffff);
1339 static void i_memcpy(int rd, int rs, int rn)
1341 oi(ADD(I_SUB, rn, rn, 1, 1, 14) | 1);
1342 oi(BL(4, 0, 16));
1343 oi(LDR(1, REG_TMP, rs, 1, 1, 0, 0) | 1);
1344 oi(LDR(0, REG_TMP, rd, 1, 1, 0, 0) | 1);
1345 oi(BL(14, 0, -16));
1348 static void i_memset(int rd, int rs, int rn)
1350 oi(ADD(I_SUB, rn, rn, 1, 1, 14) | 1);
1351 oi(BL(4, 0, 12));
1352 oi(LDR(0, rs, rd, 1, 1, 0, 0) | 1);
1353 oi(BL(14, 0, -12));
1356 static void i_call_reg(int rd)
1358 i_mov(REG_LR, REG_PC);
1359 i_mov(REG_PC, rd);
1362 static void i_call(char *sym, int off)
1364 if (!nogen)
1365 out_rel(sym, OUT_CS | OUT_REL24, cslen);
1366 oi(BL(14, 1, off));
1369 static void i_prolog(void)
1371 func_beg = cslen;
1372 nums = 0;
1373 oi(0xe1a0c00d); /* mov r12, sp */
1374 oi(0xe92d000f); /* stmfd sp!, {r0-r3} */
1375 oi(0xe92d5ff0); /* stmfd sp!, {r0-r11, r12, lr} */
1376 oi(0xe1a0b00d); /* mov fp, sp */
1377 oi(0xe24dd000); /* sub sp, sp, xx */
1378 oi(0xe28fa000); /* add dp, pc, xx */
1381 static void i_epilog(void)
1383 int dpoff;
1384 oi(0xe89baff0); /* ldmfd fp, {r4-r11, sp, pc} */
1385 dpoff = add_decimm(add_rndimm(add_encimm(cslen - func_beg - 28)));
1386 cslen = func_beg + dpoff + 28;
1387 maxsp = ALIGN(maxsp, 8);
1388 maxsp = add_decimm(add_rndimm(add_encimm(maxsp)));
1389 /* fill stack sub: sp = sp - xx */
1390 *(long *) (cs + func_beg + 16) |= add_encimm(maxsp);
1391 /* fill data ptr addition: dp = pc + xx */
1392 *(long *) (cs + func_beg + 20) |= add_encimm(dpoff);
1393 pool_write();