1 /* neatcc code generation */
7 static struct mem ds
; /* data segment */
8 static struct mem cs
; /* code segment */
9 static long bsslen
; /* bss segment size */
10 static struct ic
*ic
; /* current instruction stream */
11 static long ic_n
; /* number of instructions in ic[] */
12 static long ic_i
; /* current instruction */
13 static long *ic_luse
; /* last instruction in which values are used */
15 static long *loc_off
; /* offset of locals on the stack */
16 static long loc_n
, loc_sz
; /* number of locals */
17 static long loc_pos
; /* current stack position */
18 static int *loc_mem
; /* local was accessed on the stack */
20 static char (*ds_name
)[NAMELEN
];/* data section symbols */
21 static long *ds_off
; /* data section offsets */
22 static long ds_n
, ds_sz
; /* number of data section symbols */
24 static int func_argc
; /* number of arguments */
25 static int func_varg
; /* varargs */
26 static int func_regs
; /* used registers */
27 static int func_maxargs
; /* the maximum number of arguments on the stack */
28 static long *ic_bbeg
; /* whether each instruction begins a basic block */
30 static long ra_vmap
[N_REGS
]; /* register to intermediate value assignments */
31 static long ra_lmap
[N_REGS
]; /* register to local assignments */
32 static long *ra_gmask
; /* the mask of good registers for each value */
33 static long ra_live
[NTMPS
]; /* live values */
34 static int ra_vmax
; /* the number of values stored on the stack */
36 static long loc_add(long pos
)
38 if (loc_n
>= loc_sz
) {
39 loc_sz
= MAX(128, loc_sz
* 2);
40 loc_off
= mextend(loc_off
, loc_n
, loc_sz
, sizeof(loc_off
[0]));
46 long o_mklocal(long sz
)
48 loc_pos
+= ALIGN(sz
, ULNG
);
49 return loc_add(loc_pos
);
52 void o_rmlocal(long addr
, long sz
)
61 void o_bsnew(char *name
, long size
, int global
)
63 out_def(name
, OUT_BSS
| (global
? OUT_GLOB
: 0), bsslen
, size
);
64 bsslen
+= ALIGN(size
, OUT_ALIGNMENT
);
67 long o_dsnew(char *name
, long size
, int global
)
71 ds_sz
= MAX(128, ds_sz
* 2);
72 ds_name
= mextend(ds_name
, ds_n
, ds_sz
, sizeof(ds_name
[0]));
73 ds_off
= mextend(ds_off
, ds_n
, ds_sz
, sizeof(ds_off
[0]));
76 strcpy(ds_name
[idx
], name
);
77 ds_off
[idx
] = mem_len(&ds
);
78 out_def(name
, OUT_DS
| (global
? OUT_GLOB
: 0), mem_len(&ds
), size
);
79 mem_putz(&ds
, ALIGN(size
, OUT_ALIGNMENT
));
83 void o_dscpy(long addr
, void *buf
, long len
)
85 mem_cpy(&ds
, addr
, buf
, len
);
88 static int dat_off(char *name
)
91 for (i
= 0; i
< ds_n
; i
++)
92 if (!strcmp(name
, ds_name
[i
]))
97 void o_dsset(char *name
, long off
, long bt
)
99 long sym_off
= dat_off(name
) + off
;
100 long num
, roff
, rsym
;
101 if (!o_popnum(&num
)) {
102 mem_cpy(&ds
, sym_off
, &num
, T_SZ(bt
));
105 if (!o_popsym(&rsym
, &roff
)) {
106 out_rel(rsym
, OUT_DS
, sym_off
);
107 mem_cpy(&ds
, sym_off
, &roff
, T_SZ(bt
));
111 static int ra_vreg(int val
)
114 for (i
= 0; i
< LEN(ra_vmap
); i
++)
115 if (ra_vmap
[i
] == val
)
120 static int ra_lreg(int loc
)
123 for (i
= 0; i
< LEN(ra_lmap
); i
++)
124 if (ra_lmap
[i
] == loc
)
129 /* mask of registers assigned to locals */
130 static long ra_lmask(void)
134 for (i
= 0; i
< LEN(ra_lmap
); i
++)
140 /* mask of registers assigned to values */
141 static long ra_vmask(void)
145 for (i
= 0; i
< LEN(ra_vmap
); i
++)
151 /* find a temporary register specified in the given mask */
152 static long ra_regscn(long mask
)
155 for (i
= 0; i
< N_TMPS
; i
++)
156 if ((1 << tmpregs
[i
]) & mask
)
161 /* find a register, with the given good, acceptable, and bad register masks */
162 static long ra_regget(long iv
, long gmask
, long amask
, long bmask
)
165 gmask
&= ~bmask
& amask
;
167 if (ra_vreg(iv
) >= 0 && (1 << ra_vreg(iv
)) & (gmask
| amask
))
171 if (ra_regscn(gmask
& ~vmask
& ~lmask
) >= 0)
172 return ra_regscn(gmask
& ~vmask
& ~lmask
);
173 if (ra_regscn(amask
& ~vmask
& ~lmask
) >= 0)
174 return ra_regscn(amask
& ~vmask
& ~lmask
);
175 if (ra_regscn(gmask
) >= 0)
176 return ra_regscn(gmask
);
177 if (ra_regscn(amask
) >= 0)
178 return ra_regscn(amask
);
179 die("neatcc: cannot allocate an acceptable register\n");
183 /* find a free and cheap register */
184 static long ra_regcheap(long mask
)
186 return ra_regscn(mask
& (func_regs
| (R_TMPS
& ~R_PERM
)) &
187 ~ra_lmask() & ~ra_vmask());
190 /* allocate registers for the current instruction */
191 static void ra_map(int *rd
, int *r1
, int *r2
, int *r3
, long *mt
)
194 struct ic
*c
= &ic
[ic_i
];
196 int n
= ic_regcnt(c
);
204 /* optimizing loading locals: point to local's register */
205 if (oc
== (O_LD
| O_LOC
) && ra_lreg(c
->a1
) >= 0 &&
206 ra_vmap
[ra_lreg(c
->a1
)] < 0) {
207 *rd
= ra_lreg(c
->a1
);
208 func_regs
|= 1 << *rd
;
211 /* do not use argument registers to hold call destination */
213 for (i
= 0; i
< MIN(c
->a3
, N_ARGS
); i
++)
214 all
|= (1 << argregs
[i
]);
215 /* instructions on locals can be simplified */
219 if (oc
& (O_ST
| O_LD
))
220 oc
= (oc
& ~O_LOC
) & O_NUM
;
222 if (i_reg(c
->op
, &md
, &m1
, &m2
, &m3
, mt
))
223 die("neatcc: instruction %08lx not supported\n", c
->op
);
225 * the registers used in global register allocation should not
226 * be used in the last instruction of a basic block.
228 if (c
->op
& (O_JZ
| O_JCC
))
229 for (i
= 0; i
< LEN(ra_lmap
); i
++)
230 if (reg_rmap(ic_i
, i
) >= 0 && ra_lmap
[i
] != reg_rmap(ic_i
, i
))
232 /* allocating registers for the operands */
234 *r2
= ra_regget(c
->a2
, m2
, m2
, all
);
238 *r1
= ra_regget(c
->a1
, m1
, m1
, all
);
242 *r3
= ra_regget(c
->a3
, m3
, m3
, all
);
248 else if (n
>= 2 && md
& (1 << *r2
) && ic_luse
[*r2
] <= ic_i
)
250 else if (n
>= 1 && md
& (1 << *r1
) && ic_luse
[*r1
] <= ic_i
)
253 *rd
= ra_regget(ic_i
, ra_gmask
[ic_i
], md
, 0);
255 /* if r0 is overwritten and it is a local; use another register */
256 if (oc
& O_OUT
&& ra_lmap
[*rd
] >= 0) {
257 long m4
= (md
? md
: m1
) & ~(all
| (1 << *rd
));
258 long a4
= md
? ic_i
: c
->a1
;
260 int r4
= ra_regget(a4
, ra_gmask
[ic_i
], m4
, 0);
261 if (n
>= 1 && *rd
== *r1
)
263 if (n
>= 2 && *rd
== *r2
)
270 func_regs
|= all
| *mt
;
273 static long iv_rank(long iv
)
276 for (i
= 0; i
< LEN(ra_live
); i
++)
277 if (ra_live
[i
] == iv
)
279 die("neatcc: the specified value is not live\n");
283 static long iv_addr(long rank
)
285 return loc_pos
+ rank
* ULNG
+ ULNG
;
288 static void loc_toreg(long loc
, long off
, int reg
, int bt
)
291 i_ins(O_MK(O_LD
| O_NUM
, bt
), reg
, REG_FP
, -loc_off
[loc
] + off
, 0);
294 static void loc_tomem(long loc
, long off
, int reg
, int bt
)
297 i_ins(O_MK(O_ST
| O_NUM
, bt
), 0, reg
, REG_FP
, -loc_off
[loc
] + off
);
300 static void loc_toadd(long loc
, long off
, int reg
)
303 i_ins(O_ADD
| O_NUM
, reg
, REG_FP
, -loc_off
[loc
] + off
, 0);
306 /* return nonzero if the local is read at least once in this basic block */
307 static int loc_isread(long loc
)
310 for (i
= ic_i
+ 1; i
< ic_n
&& !ic_bbeg
[i
]; i
++)
311 if (ic
[i
].op
& O_LOC
)
312 return ic
[i
].op
& O_LD
;
316 static void val_toreg(long val
, int reg
)
318 i_ins(O_MK(O_LD
| O_NUM
, ULNG
), reg
, REG_FP
, -iv_addr(iv_rank(val
)), 0);
321 static void val_tomem(long val
, int reg
)
323 long rank
= iv_rank(val
);
324 ra_vmax
= MAX(ra_vmax
, rank
+ 1);
325 i_ins(O_MK(O_ST
| O_NUM
, ULNG
), 0, reg
, REG_FP
, -iv_addr(rank
));
328 /* move the value to the stack */
329 static void ra_spill(int reg
)
331 if (ra_vmap
[reg
] >= 0) {
332 val_tomem(ra_vmap
[reg
], reg
);
335 if (ra_lmap
[reg
] >= 0) {
336 if (ra_lmap
[reg
] == reg_rmap(ic_i
, reg
))
337 loc_tomem(ra_lmap
[reg
], 0, reg
, ULNG
);
342 /* set the value to the given register */
343 static void ra_vsave(long iv
, int reg
)
347 for (i
= 0; i
< LEN(ra_live
); i
++)
350 if (i
== LEN(ra_live
))
351 die("neatcc: too many live values\n");
355 /* load the value into a register */
356 static void ra_vload(long iv
, int reg
)
358 if (ra_vmap
[reg
] == iv
)
360 if (ra_vmap
[reg
] >= 0 || ra_lmap
[reg
] >= 0)
362 if (ra_vreg(iv
) >= 0) {
363 i_ins(O_MK(O_MOV
, ULNG
), reg
, ra_vreg(iv
), 0, 0);
364 ra_vmap
[ra_vreg(iv
)] = -1;
371 /* the value is no longer needed */
372 static void ra_vdrop(long iv
)
375 for (i
= 0; i
< LEN(ra_live
); i
++)
376 if (ra_live
[i
] == iv
)
378 if (ra_vreg(iv
) >= 0)
379 ra_vmap
[ra_vreg(iv
)] = -1;
382 /* move the given value to memory or a free register */
383 static void ra_vmove(long iv
, long mask
)
385 int src
= ra_vreg(iv
);
386 int dst
= ra_regcheap(mask
);
387 if (dst
>= 0 && ra_vmap
[dst
] < 0 && ra_lmap
[dst
] < 0) {
388 i_ins(O_MK(O_MOV
, ULNG
), dst
, src
, 0, 0);
396 /* load the value of local loc into register reg */
397 static void ra_lload(long loc
, long off
, int reg
, int bt
)
399 int greg
= reg_lmap(ic_i
, loc
);
401 int lreg
= ra_lreg(loc
);
402 /* values using the same register */
403 if (ra_vmap
[greg
] >= 0)
404 ra_vmove(ra_vmap
[greg
],
405 ra_gmask
[ra_vmap
[greg
]] & ~(1 << reg
));
409 i_ins(O_MK(O_MOV
, bt
), greg
, lreg
, 0, 0);
411 loc_toreg(loc
, off
, greg
, bt
);
415 if (ra_lreg(loc
) < 0 && reg_safe(loc
) && loc_isread(loc
)) {
417 loc_toreg(loc
, off
, reg
, bt
);
419 if (ra_lreg(loc
) >= 0) {
420 if (ra_lreg(loc
) != reg
)
421 i_ins(O_MK(O_MOV
, bt
), reg
, ra_lreg(loc
), 0, 0);
423 loc_toreg(loc
, off
, reg
, bt
);
427 /* register reg contains the value of local loc */
428 static void ra_lsave(long loc
, long off
, int reg
, int bt
)
430 int lreg
= ra_lreg(loc
);
431 int greg
= reg_lmap(ic_i
, loc
);
433 if (lreg
>= 0 && lreg
!= greg
)
435 if (ra_vmap
[greg
] >= 0) /* values using the same register */
436 ra_vmove(ra_vmap
[greg
],
437 ra_gmask
[ra_vmap
[greg
]] & ~(1 << reg
));
438 i_ins(O_MK(O_MOV
, bt
), greg
, reg
, 0, 0);
443 loc_tomem(loc
, off
, reg
, bt
);
444 if (ra_lmap
[reg
] < 0 && reg_safe(loc
) && loc_isread(loc
))
449 /* end of a basic block */
450 static void ra_bbend(void)
453 /* save values to memory */
454 for (i
= 0; i
< LEN(ra_vmap
); i
++)
457 /* dropping local caches */
458 for (i
= 0; i
< LEN(ra_lmap
); i
++)
459 if (ra_lmap
[i
] != reg_rmap(ic_i
, i
) && ra_lmap
[i
] >= 0)
461 /* load global register allocations from memory */
462 for (i
= 0; i
< LEN(ra_lmap
); i
++) {
463 if (ra_lmap
[i
] != reg_rmap(ic_i
, i
)) {
464 ra_lmap
[i
] = reg_rmap(ic_i
, i
);
465 loc_toreg(ra_lmap
[i
], 0, i
, ULNG
);
470 static void ra_init(struct ic
*ic
, long ic_n
)
472 long md
, m1
, m2
, m3
, mt
;
474 ic_bbeg
= calloc(ic_n
, sizeof(ic_bbeg
[0]));
475 ra_gmask
= calloc(ic_n
, sizeof(ra_gmask
[0]));
476 loc_mem
= calloc(loc_n
, sizeof(loc_mem
[0]));
478 for (i
= 0; i
< ic_n
; i
++) {
479 if (i
+ 1 < ic_n
&& ic
[i
].op
& (O_JXX
| O_RET
))
481 if (ic
[i
].op
& O_JXX
&& ic
[i
].a3
< ic_n
)
482 ic_bbeg
[ic
[i
].a3
] = 1;
485 for (i
= 0; i
< ic_n
; i
++) {
486 int n
= ic_regcnt(ic
+ i
);
488 i_reg(op
, &md
, &m1
, &m2
, &m3
, &mt
);
490 ra_gmask
[ic
[i
].a1
] = m1
;
492 ra_gmask
[ic
[i
].a2
] = m2
;
494 ra_gmask
[ic
[i
].a3
] = m3
;
496 for (j
= 0; j
< MIN(N_ARGS
, ic
[i
].a3
); j
++)
497 ra_gmask
[ic
[i
].args
[j
]] = 1 << argregs
[j
];
500 for (i
= 0; i
< LEN(ra_vmap
); i
++)
503 for (i
= 0; i
< LEN(ra_lmap
); i
++)
504 ra_lmap
[i
] = reg_rmap(0, i
);
505 func_regs
|= reg_mask();
507 for (i
= 0; i
< LEN(ra_live
); i
++)
513 static void ra_done(void)
520 static void ic_gencode(struct ic
*ic
, long ic_n
)
525 /* loading arguments in their allocated registers */
526 for (i
= 0; i
< LEN(ra_lmap
); i
++) {
527 int loc
= ra_lmap
[i
];
528 if (loc
>= 0 && loc
< func_argc
)
529 if (loc
>= N_ARGS
|| i
!= argregs
[loc
])
530 loc_toreg(loc
, 0, i
, ULNG
);
532 /* generating code */
533 for (i
= 0; i
< ic_n
; i
++) {
536 int n
= ic_regcnt(ic
+ i
);
539 ra_map(&rd
, &r1
, &r2
, &r3
, &mt
);
542 int aregs
= MIN(N_ARGS
, argc
);
543 /* arguments passed via stack */
544 for (j
= argc
- 1; j
>= aregs
; --j
) {
545 int v
= ic
[i
].args
[j
];
546 int rx
= ra_vreg(v
) >= 0 ? ra_vreg(v
) : rd
;
548 i_ins(O_MK(O_ST
| O_NUM
, ULNG
), 0,
549 rx
, REG_SP
, (j
- aregs
) * ULNG
);
552 func_maxargs
= MAX(func_maxargs
, argc
- aregs
);
553 /* arguments passed via registers */
554 for (j
= aregs
- 1; j
>= 0; --j
)
555 ra_vload(ic
[i
].args
[j
], argregs
[j
]);
557 /* loading the operands */
559 ra_vload(ic
[i
].a1
, r1
);
561 ra_vload(ic
[i
].a2
, r2
);
563 ra_vload(ic
[i
].a3
, r3
);
564 /* dropping values that are no longer used */
565 for (j
= 0; j
< LEN(ra_live
); j
++)
566 if (ra_live
[j
] >= 0 && ic_luse
[ra_live
[j
]] <= i
)
567 ra_vdrop(ra_live
[j
]);
568 /* saving values stored in registers that may change */
569 for (j
= 0; j
< N_REGS
; j
++)
572 /* overwriting a value that is needed later (unless loading a local to its register) */
574 if (oc
!= (O_LD
| O_LOC
) || ra_lmap
[rd
] != ic
[i
].a1
||
577 /* before the last instruction of a basic block; for jumps */
578 if (i
+ 1 < ic_n
&& ic_bbeg
[i
+ 1] && oc
& O_JXX
)
580 /* performing the instruction */
582 i_ins(op
, rd
, r1
, oc
& O_NUM
? ic
[i
].a2
: r2
, 0);
584 i_ins(op
, rd
, r1
, r2
, 0);
585 if (oc
== (O_LD
| O_NUM
))
586 i_ins(op
, rd
, r1
, ic
[i
].a2
, 0);
587 if (oc
== (O_LD
| O_LOC
))
588 ra_lload(ic
[i
].a1
, ic
[i
].a2
, rd
, O_T(op
));
589 if (oc
== (O_ST
| O_NUM
))
590 i_ins(op
, 0, r1
, r2
, ic
[i
].a3
);
591 if (oc
== (O_ST
| O_LOC
))
592 ra_lsave(ic
[i
].a2
, ic
[i
].a3
, r1
, O_T(op
));
594 i_ins(op
, 0, r1
, 0, 0);
596 i_ins(op
, rd
, r1
, 0, 0);
597 if (oc
== (O_MOV
| O_NUM
))
598 i_ins(op
, rd
, ic
[i
].a1
, 0, 0);
599 if (oc
== (O_MOV
| O_LOC
))
600 loc_toadd(ic
[i
].a1
, ic
[i
].a2
, rd
);
601 if (oc
== (O_MOV
| O_SYM
))
602 i_ins(op
, rd
, ic
[i
].a1
, ic
[i
].a2
, 0);
604 i_ins(op
, rd
, r1
, 0, 0);
605 if (oc
== (O_CALL
| O_SYM
))
606 i_ins(op
, rd
, ic
[i
].a1
, ic
[i
].a2
, 0);
608 i_ins(op
, 0, 0, 0, ic
[i
].a3
);
610 i_ins(op
, 0, r1
, 0, ic
[i
].a3
);
612 i_ins(op
, 0, r1
, oc
& O_NUM
? ic
[i
].a2
: r2
, ic
[i
].a3
);
614 i_ins(op
, 0, r1
, r2
, r3
);
616 i_ins(op
, 0, r1
, r2
, r3
);
617 /* saving back the output register */
618 if (oc
& O_OUT
&& ic_luse
[i
] > i
)
620 /* after the last instruction of a basic block */
621 if (i
+ 1 < ic_n
&& ic_bbeg
[i
+ 1] && !(oc
& O_JXX
))
627 static void ic_reset(void)
638 void o_func_beg(char *name
, int argc
, int global
, int varg
)
645 for (i
= 0; i
< argc
; i
++)
646 loc_add(I_ARG0
+ -i
* ULNG
);
647 out_def(name
, (global
? OUT_GLOB
: 0) | OUT_CS
, mem_len(&cs
), 0);
650 void o_code(char *name
, char *c
, long c_len
)
652 out_def(name
, OUT_CS
, mem_len(&cs
), 0);
653 mem_put(&cs
, c
, c_len
);
656 void o_func_end(void)
660 long sargs_last
= -1;
663 long c_len
, *rsym
, *rflg
, *roff
, rcnt
;
665 int locs
= 0; /* accessing locals on the stack */
667 ic_get(&ic
, &ic_n
); /* the intermediate code */
668 reg_init(ic
, ic_n
); /* global register allocation */
669 ra_init(ic
, ic_n
); /* initialize register allocation */
670 ic_luse
= ic_lastuse(ic
, ic_n
);
671 ic_gencode(ic
, ic_n
); /* generating machine code */
673 /* deciding which arguments to save */
674 for (i
= 0; i
< func_argc
; i
++)
677 for (i
= 0; i
< N_ARGS
&& (func_varg
|| i
< sargs_last
); i
++)
678 sargs
|= 1 << argregs
[i
];
679 /* computing the amount of stack subtraction */
680 for (i
= 0; i
< loc_n
; i
++)
683 spsub
= (locs
|| ra_vmax
) ? loc_pos
+ ra_vmax
* ULNG
: 0;
684 for (i
= 0; i
< N_TMPS
; i
++)
685 if (((1 << tmpregs
[i
]) & func_regs
& R_PERM
) != 0)
688 spsub
+= func_maxargs
* ULNG
;
690 for (i
= 0; i
< ic_n
; i
++)
691 if (ic
[i
].op
& O_CALL
)
693 /* adding function prologue and epilogue */
694 i_wrap(func_argc
, sargs
, spsub
, spsub
|| locs
|| !leaf
,
695 func_regs
& R_PERM
, -sregs_pos
);
697 i_code(&c
, &c_len
, &rsym
, &rflg
, &roff
, &rcnt
);
698 for (i
= 0; i
< rcnt
; i
++) /* adding the relocations */
699 out_rel(rsym
[i
], rflg
[i
], roff
[i
] + mem_len(&cs
));
700 mem_put(&cs
, c
, c_len
); /* appending function code */
705 for (i
= 0; i
< ic_n
; i
++)
715 out_write(fd
, mem_buf(&cs
), mem_len(&cs
), mem_buf(&ds
), mem_len(&ds
));