1 /* neatcc code generation */
7 static struct mem ds
; /* data segment */
8 static struct mem cs
; /* code segment */
9 static long bsslen
; /* bss segment size */
10 static struct ic
*ic
; /* current instruction stream */
11 static long ic_n
; /* number of instructions in ic[] */
12 static long ic_i
; /* current instruction */
13 static long *ic_luse
; /* last instruction in which values are used */
15 static long *loc_off
; /* offset of locals on the stack */
16 static long loc_n
, loc_sz
; /* number of locals */
17 static long loc_pos
; /* current stack position */
18 static int *loc_mem
; /* local was accessed on the stack */
20 static char (*ds_name
)[NAMELEN
];/* data section symbols */
21 static long *ds_off
; /* data section offsets */
22 static long ds_n
, ds_sz
; /* number of data section symbols */
24 static int func_argc
; /* number of arguments */
25 static int func_varg
; /* varargs */
26 static int func_regs
; /* used registers */
27 static int func_maxargs
; /* the maximum number of arguments on the stack */
28 static long *ic_bbeg
; /* whether each instruction begins a basic block */
30 static long ra_vmap
[N_REGS
]; /* register to intermediate value assignments */
31 static long ra_lmap
[N_REGS
]; /* register to local assignments */
32 static long *ra_gmask
; /* the mask of good registers for each value */
33 static long ra_live
[NTMPS
]; /* live values */
34 static int ra_vmax
; /* the number of values stored on the stack */
36 static long loc_add(long pos
)
38 if (loc_n
>= loc_sz
) {
39 loc_sz
= MAX(128, loc_sz
* 2);
40 loc_off
= mextend(loc_off
, loc_n
, loc_sz
, sizeof(loc_off
[0]));
46 long o_mklocal(long sz
)
48 loc_pos
+= ALIGN(sz
, ULNG
);
49 return loc_add(loc_pos
);
52 void o_rmlocal(long addr
, long sz
)
61 void o_bsnew(char *name
, long size
, int global
)
63 out_def(name
, OUT_BSS
| (global
? OUT_GLOB
: 0), bsslen
, size
);
64 bsslen
+= ALIGN(size
, OUT_ALIGNMENT
);
67 long o_dsnew(char *name
, long size
, int global
)
71 ds_sz
= MAX(128, ds_sz
* 2);
72 ds_name
= mextend(ds_name
, ds_n
, ds_sz
, sizeof(ds_name
[0]));
73 ds_off
= mextend(ds_off
, ds_n
, ds_sz
, sizeof(ds_off
[0]));
76 strcpy(ds_name
[idx
], name
);
77 ds_off
[idx
] = mem_len(&ds
);
78 out_def(name
, OUT_DS
| (global
? OUT_GLOB
: 0), mem_len(&ds
), size
);
79 mem_putz(&ds
, ALIGN(size
, OUT_ALIGNMENT
));
83 void o_dscpy(long addr
, void *buf
, long len
)
85 mem_cpy(&ds
, addr
, buf
, len
);
88 static int dat_off(char *name
)
91 for (i
= 0; i
< ds_n
; i
++)
92 if (!strcmp(name
, ds_name
[i
]))
97 void o_dsset(char *name
, long off
, long bt
)
99 long sym_off
= dat_off(name
) + off
;
100 long num
, roff
, rsym
;
101 if (!o_popnum(&num
)) {
102 mem_cpy(&ds
, sym_off
, &num
, T_SZ(bt
));
105 if (!o_popsym(&rsym
, &roff
)) {
106 out_rel(rsym
, OUT_DS
, sym_off
);
107 mem_cpy(&ds
, sym_off
, &roff
, T_SZ(bt
));
111 static int ra_vreg(int val
)
114 for (i
= 0; i
< LEN(ra_vmap
); i
++)
115 if (ra_vmap
[i
] == val
)
120 static int ra_lreg(int loc
)
123 for (i
= 0; i
< LEN(ra_lmap
); i
++)
124 if (ra_lmap
[i
] == loc
)
129 /* mask of registers assigned to locals */
130 static long ra_lmask(void)
134 for (i
= 0; i
< LEN(ra_lmap
); i
++)
140 /* mask of registers assigned to values */
141 static long ra_vmask(void)
145 for (i
= 0; i
< LEN(ra_vmap
); i
++)
151 /* find a temporary register specified in the given mask */
152 static long ra_regscn(long mask
)
155 for (i
= 0; i
< N_TMPS
; i
++)
156 if ((1 << tmpregs
[i
]) & mask
)
161 /* find a register, with the given good, acceptable, and bad register masks */
162 static long ra_regget(long iv
, long gmask
, long amask
, long bmask
)
165 gmask
&= ~bmask
& amask
;
167 if (ra_vreg(iv
) >= 0 && (1 << ra_vreg(iv
)) & (gmask
| amask
))
171 if (ra_regscn(gmask
& ~vmask
& ~lmask
) >= 0)
172 return ra_regscn(gmask
& ~vmask
& ~lmask
);
173 if (ra_regscn(amask
& ~vmask
& ~lmask
) >= 0)
174 return ra_regscn(amask
& ~vmask
& ~lmask
);
175 if (ra_regscn(gmask
) >= 0)
176 return ra_regscn(gmask
);
177 if (ra_regscn(amask
) >= 0)
178 return ra_regscn(amask
);
179 die("neatcc: cannot allocate an acceptable register\n");
183 /* find a free and cheap register */
184 static long ra_regcheap(long mask
)
186 return ra_regscn(mask
& (func_regs
| (R_TMPS
& ~R_PERM
)) &
187 ~ra_lmask() & ~ra_vmask());
190 /* allocate registers for the current instruction */
191 static void ra_map(int *r0
, int *r1
, int *r2
, long *mt
)
194 struct ic
*c
= &ic
[ic_i
];
196 int n
= ic_regcnt(c
);
203 /* optimizing loading locals: point to local's register */
204 if (oc
== (O_LD
| O_LOC
) && ra_lreg(c
->arg1
) >= 0 &&
205 ra_vmap
[ra_lreg(c
->arg1
)] < 0) {
206 *r0
= ra_lreg(c
->arg1
);
207 func_regs
|= 1 << *r0
;
210 /* do not use argument registers to hold call destination */
212 for (i
= 0; i
< MIN(c
->arg2
, N_ARGS
); i
++)
213 all
|= (1 << argregs
[i
]);
214 /* instructions on locals can be simplified */
218 if (oc
& (O_ST
| O_LD
))
219 oc
= (oc
& ~O_LOC
) & O_NUM
;
221 if (i_reg(c
->op
, &m0
, &m1
, &m2
, mt
))
222 die("neatcc: instruction %08lx not supported\n", c
->op
);
224 * the registers used in global register allocation should not
225 * be used in the last instruction of a basic block.
227 if (c
->op
& (O_JZ
| O_JCC
))
228 for (i
= 0; i
< LEN(ra_lmap
); i
++)
229 if (reg_rmap(ic_i
, i
) >= 0 && ra_lmap
[i
] != reg_rmap(ic_i
, i
))
231 /* allocating registers for the operands */
233 *r2
= ra_regget(c
->arg2
, m2
, m2
, all
);
237 *r1
= ra_regget(c
->arg1
, m1
, m1
, all
);
241 int wop
= c
->op
& O_OUT
;
242 if (wop
&& n
>= 3 && m0
& (1 << *r2
) && ic_luse
[*r2
] <= ic_i
)
244 else if (wop
&& n
>= 2 && m0
& (1 << *r1
) && ic_luse
[*r1
] <= ic_i
)
247 *r0
= ra_regget(c
->arg0
, ra_gmask
[c
->arg0
],
252 /* if r0 is overwritten and it is a local; use another register */
253 if (n
>= 1 && oc
& O_OUT
&& ra_lmap
[*r0
] >= 0) {
254 long m3
= (m0
? m0
: m1
) & ~(all
| (1 << *r0
));
255 long arg3
= m0
? c
->arg0
: c
->arg1
;
257 int r3
= ra_regget(arg3
, ra_gmask
[c
->arg0
], m3
, 0);
258 if (n
>= 2 && *r0
== *r1
)
260 if (n
>= 3 && *r0
== *r2
)
267 func_regs
|= all
| *mt
;
270 static long iv_rank(long iv
)
273 for (i
= 0; i
< LEN(ra_live
); i
++)
274 if (ra_live
[i
] == iv
)
276 die("neatcc: the specified value is not live\n");
280 static long iv_addr(long rank
)
282 return loc_pos
+ rank
* ULNG
+ ULNG
;
285 static void loc_toreg(long loc
, long off
, int reg
, int bt
)
288 i_ins(O_MK(O_LD
| O_NUM
, bt
), reg
, REG_FP
, -loc_off
[loc
] + off
);
291 static void loc_tomem(long loc
, long off
, int reg
, int bt
)
294 i_ins(O_MK(O_ST
| O_NUM
, bt
), reg
, REG_FP
, -loc_off
[loc
] + off
);
297 static void loc_toadd(long loc
, long off
, int reg
)
300 i_ins(O_ADD
| O_NUM
, reg
, REG_FP
, -loc_off
[loc
] + off
);
303 /* return nonzero if the local is read at least once in this basic block */
304 static int loc_isread(long loc
)
307 for (i
= ic_i
+ 1; i
< ic_n
&& !ic_bbeg
[i
]; i
++)
308 if (ic
[i
].op
& O_LOC
)
309 return ic
[i
].op
& O_LD
;
313 static void val_toreg(long val
, int reg
)
315 i_ins(O_MK(O_LD
| O_NUM
, ULNG
), reg
, REG_FP
, -iv_addr(iv_rank(val
)));
318 static void val_tomem(long val
, int reg
)
320 long rank
= iv_rank(val
);
321 ra_vmax
= MAX(ra_vmax
, rank
+ 1);
322 i_ins(O_MK(O_ST
| O_NUM
, ULNG
), reg
, REG_FP
, -iv_addr(rank
));
325 /* move the value to the stack */
326 static void ra_spill(int reg
)
328 if (ra_vmap
[reg
] >= 0) {
329 val_tomem(ra_vmap
[reg
], reg
);
332 if (ra_lmap
[reg
] >= 0) {
333 if (ra_lmap
[reg
] == reg_rmap(ic_i
, reg
))
334 loc_tomem(ra_lmap
[reg
], 0, reg
, ULNG
);
339 /* set the value to the given register */
340 static void ra_vsave(long iv
, int reg
)
344 for (i
= 0; i
< LEN(ra_live
); i
++)
347 if (i
== LEN(ra_live
))
348 die("neatcc: too many live values\n");
352 /* load the value into a register */
353 static void ra_vload(long iv
, int reg
)
355 if (ra_vmap
[reg
] == iv
)
357 if (ra_vmap
[reg
] >= 0 || ra_lmap
[reg
] >= 0)
359 if (ra_vreg(iv
) >= 0) {
360 i_ins(O_MK(O_MOV
, ULNG
), reg
, ra_vreg(iv
), 0);
361 ra_vmap
[ra_vreg(iv
)] = -1;
368 /* the value is no longer needed */
369 static void ra_vdrop(long iv
)
372 for (i
= 0; i
< LEN(ra_live
); i
++)
373 if (ra_live
[i
] == iv
)
375 if (ra_vreg(iv
) >= 0)
376 ra_vmap
[ra_vreg(iv
)] = -1;
379 /* move the given value to memory or a free register */
380 static void ra_vmove(long iv
, long mask
)
382 int src
= ra_vreg(iv
);
383 int dst
= ra_regcheap(mask
);
384 if (dst
>= 0 && ra_vmap
[dst
] < 0 && ra_lmap
[dst
] < 0) {
385 i_ins(O_MK(O_MOV
, ULNG
), dst
, src
, 0);
393 /* load the value of local loc into register reg */
394 static void ra_lload(long loc
, long off
, int reg
, int bt
)
396 int greg
= reg_lmap(ic_i
, loc
);
398 int lreg
= ra_lreg(loc
);
399 /* values using the same register */
400 if (ra_vmap
[greg
] >= 0)
401 ra_vmove(ra_vmap
[greg
],
402 ra_gmask
[ra_vmap
[greg
]] & ~(1 << reg
));
406 i_ins(O_MK(O_MOV
, bt
), greg
, lreg
, 0);
408 loc_toreg(loc
, off
, greg
, bt
);
412 if (ra_lreg(loc
) < 0 && reg_safe(loc
) && loc_isread(loc
)) {
414 loc_toreg(loc
, off
, reg
, bt
);
416 if (ra_lreg(loc
) >= 0) {
417 if (ra_lreg(loc
) != reg
)
418 i_ins(O_MK(O_MOV
, bt
), reg
, ra_lreg(loc
), 0);
420 loc_toreg(loc
, off
, reg
, bt
);
424 /* register reg contains the value of local loc */
425 static void ra_lsave(long loc
, long off
, int reg
, int bt
)
427 int lreg
= ra_lreg(loc
);
428 int greg
= reg_lmap(ic_i
, loc
);
430 if (lreg
>= 0 && lreg
!= greg
)
432 if (ra_vmap
[greg
] >= 0) /* values using the same register */
433 ra_vmove(ra_vmap
[greg
],
434 ra_gmask
[ra_vmap
[greg
]] & ~(1 << reg
));
435 i_ins(O_MK(O_MOV
, bt
), greg
, reg
, 0);
440 loc_tomem(loc
, off
, reg
, bt
);
441 if (ra_lmap
[reg
] < 0 && reg_safe(loc
) && loc_isread(loc
))
446 /* end of a basic block */
447 static void ra_bbend(void)
450 /* save values to memory */
451 for (i
= 0; i
< LEN(ra_vmap
); i
++)
454 /* dropping local caches */
455 for (i
= 0; i
< LEN(ra_lmap
); i
++)
456 if (ra_lmap
[i
] != reg_rmap(ic_i
, i
) && ra_lmap
[i
] >= 0)
458 /* load global register allocations from memory */
459 for (i
= 0; i
< LEN(ra_lmap
); i
++) {
460 if (ra_lmap
[i
] != reg_rmap(ic_i
, i
)) {
461 ra_lmap
[i
] = reg_rmap(ic_i
, i
);
462 loc_toreg(ra_lmap
[i
], 0, i
, ULNG
);
467 static void ra_init(struct ic
*ic
, long ic_n
)
471 ic_bbeg
= calloc(ic_n
, sizeof(ic_bbeg
[0]));
472 ra_gmask
= calloc(ic_n
, sizeof(ra_gmask
[0]));
473 loc_mem
= calloc(loc_n
, sizeof(loc_mem
[0]));
475 for (i
= 0; i
< ic_n
; i
++) {
476 if (i
+ 1 < ic_n
&& ic
[i
].op
& (O_JXX
| O_RET
))
478 if (ic
[i
].op
& O_JXX
&& ic
[i
].arg2
< ic_n
)
479 ic_bbeg
[ic
[i
].arg2
] = 1;
482 for (i
= 0; i
< ic_n
; i
++) {
483 int n
= ic_regcnt(ic
+ i
);
485 i_reg(op
, &m0
, &m1
, &m2
, &mt
);
486 if (n
>= 1 && !(op
& O_OUT
))
487 ra_gmask
[ic
[i
].arg0
] = m0
;
489 ra_gmask
[ic
[i
].arg1
] = m1
;
491 ra_gmask
[ic
[i
].arg2
] = m2
;
493 for (j
= 0; j
< MIN(N_ARGS
, ic
[i
].arg2
); j
++)
494 ra_gmask
[ic
[i
].args
[j
]] = 1 << argregs
[j
];
497 for (i
= 0; i
< LEN(ra_vmap
); i
++)
500 for (i
= 0; i
< LEN(ra_lmap
); i
++)
501 ra_lmap
[i
] = reg_rmap(0, i
);
502 func_regs
|= reg_mask();
504 for (i
= 0; i
< LEN(ra_live
); i
++)
510 static void ra_done(void)
517 static void ic_gencode(struct ic
*ic
, long ic_n
)
522 /* loading arguments in their allocated registers */
523 for (i
= 0; i
< LEN(ra_lmap
); i
++) {
524 int loc
= ra_lmap
[i
];
525 if (loc
>= 0 && loc
< func_argc
)
526 if (loc
>= N_ARGS
|| i
!= argregs
[loc
])
527 loc_toreg(loc
, 0, i
, ULNG
);
529 /* generating code */
530 for (i
= 0; i
< ic_n
; i
++) {
533 int n
= ic_regcnt(ic
+ i
);
536 ra_map(&r0
, &r1
, &r2
, &mt
);
538 int argc
= ic
[i
].arg2
;
539 int aregs
= MIN(N_ARGS
, argc
);
540 /* arguments passed via stack */
541 for (j
= argc
- 1; j
>= aregs
; --j
) {
542 int v
= ic
[i
].args
[j
];
543 int rx
= ra_vreg(v
) >= 0 ? ra_vreg(v
) : r0
;
545 i_ins(O_MK(O_ST
| O_NUM
, ULNG
), rx
, REG_SP
,
549 func_maxargs
= MAX(func_maxargs
, argc
- aregs
);
550 /* arguments passed via registers */
551 for (j
= aregs
- 1; j
>= 0; --j
)
552 ra_vload(ic
[i
].args
[j
], argregs
[j
]);
554 /* loading the operands */
555 if (n
>= 1 && !(oc
& O_OUT
))
556 ra_vload(ic
[i
].arg0
, r0
);
557 if (n
>= 2 && !(oc
& O_LOC
))
558 ra_vload(ic
[i
].arg1
, r1
);
560 ra_vload(ic
[i
].arg2
, r2
);
561 /* dropping values that are no longer used */
562 for (j
= 0; j
< LEN(ra_live
); j
++)
563 if (ra_live
[j
] >= 0 && ic_luse
[ra_live
[j
]] <= i
)
564 ra_vdrop(ra_live
[j
]);
565 /* saving values stored in registers that may change */
566 for (j
= 0; j
< N_REGS
; j
++)
569 /* overwriting a value that is needed later (unless loading a local to its register) */
570 if (n
>= 1 && oc
& O_OUT
)
571 if (oc
!= (O_LD
| O_LOC
) || ra_lmap
[r0
] != ic
[i
].arg1
||
574 /* before the last instruction of a basic block; for jumps */
575 if (i
+ 1 < ic_n
&& ic_bbeg
[i
+ 1] && oc
& O_JXX
)
577 /* performing the instruction */
579 i_ins(op
, r0
, r1
, oc
& O_NUM
? ic
[i
].arg2
: r2
);
581 i_ins(op
, r0
, r1
, r2
);
582 if (oc
== (O_LD
| O_NUM
))
583 i_ins(op
, r0
, r1
, ic
[i
].arg2
);
584 if (oc
== (O_LD
| O_LOC
))
585 ra_lload(ic
[i
].arg1
, ic
[i
].arg2
, r0
, O_T(op
));
586 if (oc
== (O_ST
| O_NUM
))
587 i_ins(op
, r0
, r1
, ic
[i
].arg2
);
588 if (oc
== (O_ST
| O_LOC
))
589 ra_lsave(ic
[i
].arg1
, ic
[i
].arg2
, r0
, O_T(op
));
593 i_ins(op
, r0
, r1
, 0);
594 if (oc
== (O_MOV
| O_NUM
))
595 i_ins(op
, r0
, ic
[i
].arg1
, 0);
596 if (oc
== (O_MOV
| O_LOC
))
597 loc_toadd(ic
[i
].arg1
, ic
[i
].arg2
, r0
);
598 if (oc
== (O_MOV
| O_SYM
))
599 i_ins(op
, r0
, ic
[i
].arg1
, ic
[i
].arg2
);
601 i_ins(op
, r0
, r1
, 0);
602 if (oc
== (O_CALL
| O_SYM
))
603 i_ins(op
, r0
, ic
[i
].arg1
, 0);
605 i_ins(op
, 0, 0, ic
[i
].arg2
);
607 i_ins(op
, r0
, 0, ic
[i
].arg2
);
609 i_ins(op
, r0
, oc
& O_NUM
? ic
[i
].arg1
: r1
, ic
[i
].arg2
);
611 i_ins(op
, r0
, r1
, r2
);
613 i_ins(op
, r0
, r1
, r2
);
614 /* saving back the output register */
615 if (oc
& O_OUT
&& ic_luse
[i
] > i
)
616 ra_vsave(ic
[i
].arg0
, r0
);
617 /* after the last instruction of a basic block */
618 if (i
+ 1 < ic_n
&& ic_bbeg
[i
+ 1] && !(oc
& O_JXX
))
624 static void ic_reset(void)
635 void o_func_beg(char *name
, int argc
, int global
, int varg
)
642 for (i
= 0; i
< argc
; i
++)
643 loc_add(I_ARG0
+ -i
* ULNG
);
644 out_def(name
, (global
? OUT_GLOB
: 0) | OUT_CS
, mem_len(&cs
), 0);
647 void o_code(char *name
, char *c
, long c_len
)
649 out_def(name
, OUT_CS
, mem_len(&cs
), 0);
650 mem_put(&cs
, c
, c_len
);
653 void o_func_end(void)
657 long sargs_last
= -1;
660 long c_len
, *rsym
, *rflg
, *roff
, rcnt
;
662 int locs
= 0; /* accessing locals on the stack */
664 ic_get(&ic
, &ic_n
); /* the intermediate code */
665 reg_init(ic
, ic_n
); /* global register allocation */
666 ra_init(ic
, ic_n
); /* initialize register allocation */
667 ic_luse
= ic_lastuse(ic
, ic_n
);
668 ic_gencode(ic
, ic_n
); /* generating machine code */
670 /* deciding which arguments to save */
671 for (i
= 0; i
< func_argc
; i
++)
674 for (i
= 0; i
< N_ARGS
&& (func_varg
|| i
< sargs_last
); i
++)
675 sargs
|= 1 << argregs
[i
];
676 /* computing the amount of stack subtraction */
677 for (i
= 0; i
< loc_n
; i
++)
680 spsub
= (locs
|| ra_vmax
) ? loc_pos
+ ra_vmax
* ULNG
: 0;
681 for (i
= 0; i
< N_TMPS
; i
++)
682 if (((1 << tmpregs
[i
]) & func_regs
& R_PERM
) != 0)
685 spsub
+= func_maxargs
* ULNG
;
687 for (i
= 0; i
< ic_n
; i
++)
688 if (ic
[i
].op
& O_CALL
)
690 /* adding function prologue and epilogue */
691 i_wrap(func_argc
, sargs
, spsub
, spsub
|| locs
|| !leaf
,
692 func_regs
& R_PERM
, -sregs_pos
);
694 i_code(&c
, &c_len
, &rsym
, &rflg
, &roff
, &rcnt
);
695 for (i
= 0; i
< rcnt
; i
++) /* adding the relocations */
696 out_rel(rsym
[i
], rflg
[i
], roff
[i
] + mem_len(&cs
));
697 mem_put(&cs
, c
, c_len
); /* appending function code */
702 for (i
= 0; i
< ic_n
; i
++)
712 out_write(fd
, mem_buf(&cs
), mem_len(&cs
), mem_buf(&ds
), mem_len(&ds
));