8 /* variable location */
13 #define LOC_LOCAL 0x10
15 #define MIN(a, b) ((a) < (b) ? (a) : (b))
16 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
18 char cs
[SECSIZE
]; /* code segment */
20 static char ds
[SECSIZE
]; /* data segment */
22 static long bsslen
; /* bss segment size */
24 static long sp
; /* stack pointer offset from R_RBP */
25 static long sp_max
; /* maximum stack pointer offset */
26 static long sp_tmp
; /* sp for the first tmp on the stack */
28 #define TMP(i) (((i) < ntmp) ? &tmps[ntmp - 1 - (i)] : NULL)
33 long off
; /* offset from a symbol or a local */
34 unsigned loc
; /* variable location */
35 unsigned bt
; /* type of address; zero when not a pointer */
36 int id
; /* local variable id */
40 static struct tmp
*regs
[N_REGS
];
42 #define MAXLOCALS (1 << 10)
46 int n_addr
; /* # of address accesses */
47 int n_access
; /* # of accesses */
56 /* function statistics */
57 int pass1
; /* collect statistics; 1st pass */
58 static int stat_calls
; /* # of function calls */
59 static int stat_tmps
; /* # of stack temporaries */
60 static int stat_regs
; /* mask of used registers */
62 /* optimization info */
63 static int pass2
; /* use the collected statistics in the 1st pass */
64 static int tmp_mask
; /* registers that can be used for tmps */
65 static int opt_sargs
; /* saved args */
66 static int opt_isreg
[MAXLOCALS
];/* is a register allocated to a local? */
67 static int opt_lreg
[MAXLOCALS
]; /* registers allocated to locals */
68 static int opt_lregs
; /* mask of registers allocated to locals */
70 #define TMP_ISLREG(t) (!(t)->bt && (t)->loc == LOC_LOCAL && opt_isreg[(t)->id])
71 #define TMP_LREG(t) (opt_lreg[(t)->id])
74 #define MAXJMPS (1 << 14)
76 static long labels
[MAXJMPS
];
78 static long jmp_loc
[MAXJMPS
];
79 static int jmp_goal
[MAXJMPS
];
80 static int jmp_len
[MAXJMPS
];
90 /* the number of bytes needed for holding jmp displacement */
91 static int jmp_sz(int id
)
93 long n
= jmp_len
[id
] > 0 ? jmp_len
[id
] : -jmp_len
[id
];
97 return n
== 0 ? 0 : 1;
98 return n
< 0x7000 ? 2 : 4;
101 static void jmp_add(int id
, int rn
, int z
)
103 if (njmps
>= MAXJMPS
)
104 err("nomem: MAXJMPS reached!\n");
105 i_jmp(rn
, z
, jmp_sz(njmps
));
106 jmp_loc
[njmps
] = cslen
;
107 jmp_goal
[njmps
] = id
;
111 static void jmp_fill(void)
114 for (i
= 0; i
< njmps
; i
++)
115 jmp_len
[i
] = i_fill(jmp_loc
[i
], labels
[jmp_goal
[i
]], jmp_sz(i
));
118 /* generating code */
120 void os(void *s
, int n
)
123 cs
[cslen
++] = *(char *) (s
++);
126 void oi(long n
, int l
)
134 static long sp_push(int sz
)
136 sp
-= ALIGN(sz
, LONGSZ
);
142 static void tmp_mem(struct tmp
*tmp
)
145 if (tmp
->loc
!= LOC_REG
|| (1 << src
) & opt_lregs
)
149 tmp
->addr
= sp_push(LONGSZ
);
150 i_save(src
, REG_FP
, tmp
->addr
, LONGSZ
);
156 static void num_cast(struct tmp
*t
, unsigned bt
)
158 if (!(bt
& BT_SIGNED
) && BT_SZ(bt
) != LONGSZ
)
159 t
->addr
&= ((1l << (long) (BT_SZ(bt
) * 8)) - 1);
160 if (bt
& BT_SIGNED
&& BT_SZ(bt
) != LONGSZ
&&
161 t
->addr
> (1l << (BT_SZ(bt
) * 8 - 1)))
162 t
->addr
= -((1l << (BT_SZ(bt
) * 8)) - t
->addr
);
165 static void tmp_reg(struct tmp
*tmp
, int dst
, int deref
)
172 if (tmp
->loc
== LOC_NUM
) {
173 i_num(dst
, tmp
->addr
);
178 if (tmp
->loc
== LOC_SYM
) {
179 i_sym(dst
, tmp
->sym
, tmp
->off
);
184 if (tmp
->loc
== LOC_REG
) {
186 i_load(dst
, tmp
->addr
, 0, bt
);
187 else if (dst
!= tmp
->addr
)
188 i_mov(dst
, tmp
->addr
);
189 regs
[tmp
->addr
] = NULL
;
191 if (tmp
->loc
== LOC_LOCAL
) {
193 locals
[tmp
->id
].n_access
++;
195 locals
[tmp
->id
].n_addr
++;
197 i_load(dst
, REG_FP
, tmp
->addr
+ tmp
->off
, bt
);
199 i_op_imm(O_ADD
, dst
, REG_FP
, tmp
->addr
+ tmp
->off
);
201 if (tmp
->loc
== LOC_MEM
) {
202 i_load(dst
, REG_FP
, tmp
->addr
, LONGSZ
);
204 i_load(dst
, dst
, 0, bt
);
207 stat_regs
|= 1 << dst
;
212 /* empty the given register, but never touch the registers in rsrvd mask */
213 static void reg_free(int reg
, int rsrvd
)
219 for (i
= 0; i
< N_TMPS
; i
++)
220 if (!regs
[tmpregs
[i
]] && ~rsrvd
& (1 << tmpregs
[i
])) {
221 tmp_reg(regs
[reg
], tmpregs
[i
], 0);
227 static void reg_for(int reg
, struct tmp
*t
)
229 if (regs
[reg
] && regs
[reg
] != t
)
233 static void tmp_mv(struct tmp
*t
, int reg
)
239 static void tmp_to(struct tmp
*t
, int reg
)
242 if (t
->loc
== LOC_LOCAL
&& TMP_ISLREG(t
)) {
244 t
->addr
= TMP_LREG(t
);
250 static void tmp_drop(int n
)
253 for (i
= ntmp
- n
; i
< ntmp
; i
++)
254 if (tmps
[i
].loc
== LOC_REG
)
255 regs
[tmps
[i
].addr
] = NULL
;
259 static void tmp_pop(int reg
)
261 struct tmp
*t
= TMP(0);
266 static struct tmp
*tmp_new(void)
268 return &tmps
[ntmp
++];
271 static void tmp_push(int reg
)
273 struct tmp
*t
= tmp_new();
276 stat_regs
|= 1 << reg
;
281 void o_local(long addr
)
283 struct tmp
*t
= tmp_new();
284 t
->addr
= locals
[addr
].loc
;
293 struct tmp
*t
= tmp_new();
299 void o_sym(char *name
)
301 struct tmp
*t
= tmp_new();
302 strcpy(t
->sym
, name
);
308 void o_tmpdrop(int n
)
310 if (n
== -1 || n
> ntmp
)
320 /* make sure tmps remain intact after a conditional expression */
324 for (i
= 0; i
< ntmp
- 1; i
++)
328 void o_forkpush(void)
333 void o_forkjoin(void)
340 struct tmp
*t1
= TMP(0);
341 struct tmp
*t2
= TMP(1);
343 memcpy(&t
, t1
, sizeof(t
));
344 memcpy(t1
, t2
, sizeof(t
));
345 memcpy(t2
, &t
, sizeof(t
));
346 if (t1
->loc
== LOC_REG
)
348 if (t2
->loc
== LOC_REG
)
352 static int reg_get(int mask
)
356 for (i
= 0; i
< N_TMPS
; i
++)
357 if ((1 << tmpregs
[i
]) & mask
&& !regs
[tmpregs
[i
]]) {
358 stat_regs
|= 1 << tmpregs
[i
];
361 for (i
= 0; i
< N_TMPS
; i
++)
362 if ((1 << tmpregs
[i
]) & mask
) {
363 reg_free(tmpregs
[i
], 0);
364 stat_regs
|= 1 << tmpregs
[i
];
367 die("reg_get: out of registers!\n");
371 static int reg_tmp(struct tmp
*t
, int mask
, int readonly
)
373 if (t
->loc
== LOC_REG
&& (mask
& (1 << t
->addr
)))
374 if (!(opt_lregs
& (1 << t
->addr
)) || (readonly
&& !t
->bt
))
376 return reg_get(mask
);
379 static int reg_tmpn(struct tmp
*t
, int notmask
, int readonly
)
381 if (t
->loc
== LOC_REG
&& !(notmask
& (1 << t
->addr
)))
382 if (!(opt_lregs
& (1 << t
->addr
)) || (readonly
&& !t
->bt
))
384 return reg_get(~notmask
);
387 static void tmp_copy(struct tmp
*t1
)
389 struct tmp
*t2
= tmp_new();
390 memcpy(t2
, t1
, sizeof(*t1
));
391 if (!(t1
->loc
& (LOC_REG
| LOC_MEM
)))
393 if (t1
->loc
== LOC_MEM
) {
394 tmp_mv(t2
, reg_get(R_TMPS
));
395 } else if (t1
->loc
== LOC_REG
) {
396 t2
->addr
= reg_tmpn(t2
, 1 << t1
->addr
, 0);
397 i_mov(t2
->addr
, t1
->addr
);
399 stat_regs
|= 1 << t2
->addr
;
408 void o_deref(unsigned bt
)
410 struct tmp
*t
= TMP(0);
413 t
->addr
= TMP_LREG(t
);
416 tmp_to(t
, reg_tmp(t
, R_TMPS
, 0));
423 struct tmp
*t
= TMP(0);
424 tmp_to(t
, reg_tmp(t
, R_TMPS
, 0));
427 #define TMP_NUM(t) ((t)->loc == LOC_NUM && !(t)->bt)
428 #define LOCAL_PTR(t) ((t)->loc == LOC_LOCAL && !(t)->bt)
429 #define SYM_PTR(t) ((t)->loc == LOC_SYM && !(t)->bt)
431 int o_popnum(long *c
)
433 struct tmp
*t
= TMP(0);
450 long o_mklocal(int sz
)
452 locals
[nlocals
].loc
= sp_push(ALIGN(sz
, LONGSZ
));
453 locals
[nlocals
].sz
= sz
;
457 void o_rmlocal(long addr
, int sz
)
461 long o_arg2loc(int i
)
466 #define MOVXX(bt) ((BT_SZ(bt) == LONGSZ ? O_MOV : ((bt) & BT_SIGNED ? O_SX : O_ZX)))
468 void o_assign(unsigned bt
)
470 struct tmp
*t1
= TMP(0);
471 struct tmp
*t2
= TMP(1);
472 int r1
= reg_tmp(t1
, BT_SZ(bt
) > 1 ? R_TMPS
: R_BYTE
, 1);
473 int r2
= reg_tmpn(t2
, 1 << r1
, 1);
476 if (TMP_ISLREG(t2
)) {
477 i_op_imm(MOVXX(bt
), TMP_LREG(t2
), r1
, BT_SZ(bt
) * 8);
482 if (t2
->loc
== LOC_LOCAL
) {
484 off
= t2
->addr
+ t2
->off
;
485 locals
[t2
->id
].n_access
++;
489 i_save(r1
, r2
, off
, bt
);
495 static long cu(int op
, long i
)
508 static int c_uop(int op
)
510 struct tmp
*t1
= TMP(0);
514 o_num(cu(op
, t1
->addr
));
518 static long cb(int op
, long a
, long b
)
543 return (unsigned long) a
>> b
;
560 static int c_bop(int op
)
562 struct tmp
*t1
= TMP(0);
563 struct tmp
*t2
= TMP(1);
564 int locs
= LOCAL_PTR(t1
) + LOCAL_PTR(t2
);
565 int syms
= SYM_PTR(t1
) + SYM_PTR(t2
);
566 int nums
= TMP_NUM(t1
) + TMP_NUM(t2
);
567 if (syms
+ locs
== 2 || syms
+ nums
+ locs
!= 2)
570 if ((op
& 0xff) != O_ADD
&& ((op
& 0xff) != O_SUB
|| TMP_NUM(t2
)))
573 long o1
= TMP_NUM(t1
) ? t1
->addr
: t1
->off
;
574 long o2
= TMP_NUM(t2
) ? t2
->addr
: t2
->off
;
575 long ret
= cb(op
, o2
, o1
);
578 if (t2
->loc
== LOC_LOCAL
)
579 locals
[t2
->id
].n_addr
++;
583 long ret
= cb(op
, t2
->addr
, t1
->addr
);
590 /* allocate registers for the given binary or unary instruction */
591 static void regs2(int op
, int *rd
, int *r1
, int *r2
)
596 i_reg(op
, &md
, &m1
, &m2
, &mt
);
598 struct tmp
*t2
= TMP(0);
599 *r2
= reg_tmp(t2
, m2
, 1);
604 struct tmp
*t1
= TMP(m2
? 1 : 0);
605 *r1
= reg_tmp(t1
, m1
& ~all
, md
? 1 : 0);
610 if (m2
&& md
& tmp_mask
& (1 << *r2
))
612 else if (m1
&& md
& tmp_mask
& (1 << *r1
))
615 *rd
= reg_get(md
& ~all
);
621 for (i
= 0; i
< N_TMPS
; i
++)
622 if (mt
& ~all
& (1 << tmpregs
[i
]))
623 reg_free(tmpregs
[i
], all
| mt
);
626 tmp_drop(m2
? 2 : 1);
629 /* allocate registers for a 3 operand instruction */
630 static void regs3(int op
, int *r0
, int *r1
, int *r2
)
633 struct tmp
*t0
= TMP(2);
634 struct tmp
*t1
= TMP(1);
635 struct tmp
*t2
= TMP(0);
638 i_reg(op
, &m0
, &m1
, &m2
, &mt
);
640 *r2
= reg_tmp(t2
, m2
, 1);
645 *r1
= reg_tmp(t1
, m1
& ~(1 << *r2
), 1);
650 *r0
= reg_tmp(t0
, m0
& ~((1 << *r2
) | (1 << *r1
)), 1);
655 for (i
= 0; i
< N_TMPS
; i
++)
656 if (mt
& ~all
& (1 << tmpregs
[i
]))
657 reg_free(tmpregs
[i
], all
| mt
);
663 static void op_imm(int op
, long n
)
666 regs2(op
| O_IMM
, &rd
, &r1
, &r2
);
667 i_op_imm(op
| O_IMM
, rd
, r1
, n
);
676 regs2(op
, &rd
, &r1
, &r2
);
677 i_op(op
, rd
, r1
, r2
);
681 static int bop_imm(int op
, long *n
, int swap
)
683 struct tmp
*t1
= TMP(0);
684 struct tmp
*t2
= TMP(1);
685 if (!TMP_NUM(t1
) && (!swap
|| !TMP_NUM(t2
)))
687 *n
= TMP_NUM(t1
) ? t1
->addr
: t2
->addr
;
696 static void bin_op(int op
, int swap
)
700 if (!bop_imm(op
, &n
, swap
)) {
701 regs2(op
| O_IMM
, &rd
, &r1
, &r2
);
702 i_op_imm(op
, rd
, r1
, n
);
704 regs2(op
, &rd
, &r1
, &r2
);
705 i_op(op
, rd
, r1
, r2
);
710 static int log2a(unsigned long n
)
713 for (i
= 0; i
< LONGSZ
* 8; i
++)
716 if (i
== LONGSZ
* 8 || !(n
>> (i
+ 1)))
721 /* optimized version of mul/div/mod for powers of two */
722 static int mul_2(int op
)
724 struct tmp
*t1
= TMP(0);
725 struct tmp
*t2
= TMP(1);
728 if ((op
& 0xff) == O_MUL
&& t2
->loc
== LOC_NUM
&& !t2
->bt
)
730 if (t1
->loc
!= LOC_NUM
|| t1
->bt
)
736 if ((op
& 0xff) == O_MUL
) {
752 op_imm((op
& O_SIGNED
) | O_SHR
, p
);
772 if ((op
& 0xf0) == 0x00) /* add */
773 bin_op(op
, (op
& 0xff) != O_SUB
);
774 if ((op
& 0xf0) == 0x10) /* shx */
776 if ((op
& 0xf0) == 0x20) { /* mul */
779 bin_op(op
, (op
& 0xff) == O_MUL
);
781 if ((op
& 0xf0) == 0x30)
782 bin_op(op
, (op
& 0xff) == O_EQ
|| (op
& 0xff) == O_NEQ
);
788 regs3(O_MCPY
, &r0
, &r1
, &r2
);
789 i_memcpy(r0
, r1
, r2
);
795 regs3(O_MSET
, &r0
, &r1
, &r2
);
796 i_memset(r0
, r1
, r2
);
799 void o_cast(unsigned bt
)
801 struct tmp
*t
= TMP(0);
802 if (!t
->bt
&& t
->loc
== LOC_NUM
) {
806 if (BT_SZ(bt
) != LONGSZ
)
807 op_imm(MOVXX(bt
), BT_SZ(bt
) * 8);
810 static void jxz(int id
, int z
)
812 int r
= reg_tmp(TMP(0), R_TMPS
, 1);
832 void o_call(int argc
, int rets
)
836 int aregs
= MIN(N_ARGS
, argc
);
837 for (i
= 0; i
< N_TMPS
; i
++)
838 if (regs
[tmpregs
[i
]] && regs
[tmpregs
[i
]] - tmps
< ntmp
- argc
)
839 tmp_mem(regs
[tmpregs
[i
]]);
841 sp_push(LONGSZ
* (argc
- aregs
));
842 for (i
= argc
- 1; i
>= aregs
; --i
) {
843 int reg
= reg_tmp(TMP(0), R_TMPS
, 1);
845 i_save(reg
, REG_SP
, (i
- aregs
) * LONGSZ
, LONGSZ
);
848 for (i
= aregs
- 1; i
>= 0; --i
)
849 tmp_to(TMP(aregs
- i
- 1), argregs
[i
]);
852 if (t
->loc
== LOC_SYM
&& !t
->bt
) {
853 i_call(t
->sym
, t
->off
);
856 int reg
= reg_tmp(t
, R_TMPS
, 1);
865 void o_mkbss(char *name
, int size
, int global
)
869 out_sym(name
, OUT_BSS
| (global
? OUT_GLOB
: 0), bsslen
, size
);
870 bsslen
+= ALIGN(size
, OUT_ALIGNMENT
);
873 #define MAXDATS (1 << 10)
874 static char dat_names
[MAXDATS
][NAMELEN
];
875 static int dat_offs
[MAXDATS
];
878 void *o_mkdat(char *name
, int size
, int global
)
880 void *addr
= ds
+ dslen
;
886 err("nomem: MAXDATS reached!\n");
887 strcpy(dat_names
[idx
], name
);
888 dat_offs
[idx
] = dslen
;
889 out_sym(name
, OUT_DS
| (global
? OUT_GLOB
: 0), dslen
, size
);
890 dslen
+= ALIGN(size
, OUT_ALIGNMENT
);
894 static int dat_off(char *name
)
897 for (i
= 0; i
< ndats
; i
++)
898 if (!strcmp(name
, dat_names
[i
]))
903 void o_datset(char *name
, int off
, unsigned bt
)
905 struct tmp
*t
= TMP(0);
906 int sym_off
= dat_off(name
) + off
;
911 if (t
->loc
== LOC_NUM
&& !t
->bt
) {
913 memcpy(ds
+ sym_off
, &t
->addr
, BT_SZ(bt
));
915 if (t
->loc
== LOC_SYM
&& !t
->bt
) {
916 out_rel(t
->sym
, OUT_DS
, sym_off
);
917 memcpy(ds
+ sym_off
, &t
->off
, BT_SZ(bt
));
925 out_write(fd
, cs
, cslen
, ds
, dslen
);
928 static void opt_reset(void)
931 memset(opt_isreg
, 0, sizeof(opt_isreg
));
932 opt_sargs
= func_varg
? R_ARGS
: 0;
933 for (i
= MIN(func_argc
, N_ARGS
) - 1; i
>= 0; --i
)
934 opt_sargs
|= 1 << argregs
[i
];
935 tmp_mask
= N_TMPS
> 6 ? R_TMPS
& ~R_SAVED
: R_TMPS
;
941 static void func_reset(void)
945 memset(regs
, 0, sizeof(regs
));
946 memset(locals
, 0, sizeof(*locals
) * nlocals
);
956 stat_regs
= 1 << REG_RET
;
957 for (i
= 0; i
< func_argc
; i
++) {
958 locals
[nlocals
].loc
= i_args() + argaddr
;
959 locals
[nlocals
].sz
= LONGSZ
;
961 if (i
>= N_ARGS
|| opt_sargs
& (1 << argregs
[i
]))
966 void o_func_beg(char *name
, int argc
, int global
, int varg
)
971 out_sym(name
, (global
? OUT_GLOB
: 0) | OUT_CS
, cslen
, 0);
973 i_prolog(argc
, varg
, opt_sargs
, tmp_mask
& R_SAVED
, 1, 1);
977 /* sort locals for register allocation based on the number of accesses */
978 static int *sortedlocals(void)
980 static int ord
[MAXLOCALS
];
982 for (i
= 0; i
< nlocals
; i
++) {
983 for (j
= i
- 1; j
>= 0; j
--) {
984 if (locals
[i
].n_access
<= locals
[ord
[j
]].n_access
)
993 /* assign locals to registers */
994 static int locals2regs(int leaf
)
996 int *ord
= sortedlocals();
1000 /* letting arguments stay in their registers for leaf functions */
1001 if (!func_varg
&& leaf
) {
1002 for (i
= 0; i
< MIN(func_argc
, N_ARGS
); i
++) {
1003 if (locals
[i
].sz
> LONGSZ
|| (1 << argregs
[i
]) & stat_regs
)
1005 if (locals
[i
].n_access
&& !locals
[i
].n_addr
) {
1007 opt_lreg
[i
] = argregs
[i
];
1008 opt_sargs
&= ~(1 << argregs
[i
]);
1009 opt_lregs
|= (1 << argregs
[i
]);
1014 /* try finding a register for each local */
1015 for (i
= 0; i
< nlocals
; i
++) {
1017 int nmask
= (leaf
? 0 : ~R_SAVED
) | stat_regs
| opt_lregs
;
1018 if (opt_isreg
[l
] || (func_varg
&& l
< func_argc
))
1020 /* find a free register */
1021 while (idx
< N_TMPS
&& ((1 << tmpregs
[idx
]) & nmask
))
1025 if (locals
[l
].sz
> LONGSZ
|| locals
[l
].n_addr
)
1027 if (locals
[l
].n_access
> (leaf
? 0 : 1)) {
1029 opt_lreg
[l
] = tmpregs
[idx
];
1030 opt_lregs
|= 1 << tmpregs
[idx
];
1031 if (l
< MIN(N_ARGS
, func_argc
))
1032 opt_sargs
&= ~(1 << argregs
[l
]);
1048 int initfp
, subsp
, sregs
;
1054 locregs
= locals2regs(leaf
);
1055 subsp
= nlocals
> locregs
|| !leaf
;
1056 initfp
= subsp
|| stat_tmps
|| func_argc
> N_ARGS
;
1057 sregs
= (opt_lregs
| stat_regs
) & R_SAVED
;
1058 tmp_mask
= stat_regs
;
1062 for (i
= 0; i
< func_argc
; i
++)
1063 if (i
< N_ARGS
&& (locals
[i
].n_access
+ locals
[i
].n_addr
) == 0)
1064 opt_sargs
&= ~(1 << argregs
[i
]);
1065 i_prolog(func_argc
, func_varg
, opt_sargs
, sregs
, initfp
, subsp
);
1067 for (i
= 0; i
< MIN(func_argc
, N_ARGS
); i
++)
1068 if (opt_isreg
[i
] && opt_lreg
[i
] != argregs
[i
])
1069 i_mov(opt_lreg
[i
], argregs
[i
]);
1070 for (i
= N_ARGS
; i
< func_argc
; i
++)
1072 i_load(opt_lreg
[i
], REG_FP
, locals
[i
].loc
, LONGSZ
);
1075 void o_func_end(void)