1 /* neatcc code generation */
7 static struct mem ds
; /* data segment */
8 static struct mem cs
; /* code segment */
9 static long bsslen
; /* bss segment size */
10 static struct ic
*ic
; /* current instruction stream */
11 static long ic_n
; /* number of instructions in ic[] */
12 static long ic_i
; /* current instruction */
13 static long *ic_luse
; /* last instruction in which values are used */
15 static long *loc_off
; /* offset of locals on the stack */
16 static long loc_n
, loc_sz
; /* number of locals */
17 static long loc_pos
; /* current stack position */
18 static int *loc_mem
; /* local was accessed on the stack */
20 static char (*ds_name
)[NAMELEN
];/* data section symbols */
21 static long *ds_off
; /* data section offsets */
22 static long ds_n
, ds_sz
; /* number of data section symbols */
24 static int func_argc
; /* number of arguments */
25 static int func_varg
; /* varargs */
26 static int func_regs
; /* used registers */
27 static int func_maxargs
; /* the maximum number of arguments on the stack */
28 static long *ic_bbeg
; /* whether each instruction begins a basic block */
30 static long ra_vmap
[N_REGS
]; /* register to intermediate value assignments */
31 static long ra_lmap
[N_REGS
]; /* register to local assignments */
32 static long *ra_gmask
; /* the mask of good registers for each value */
33 static long ra_live
[NTMPS
]; /* live values */
34 static int ra_vmax
; /* the number of values stored on the stack */
36 static long loc_add(long pos
)
38 if (loc_n
>= loc_sz
) {
39 loc_sz
= MAX(128, loc_sz
* 2);
40 loc_off
= mextend(loc_off
, loc_n
, loc_sz
, sizeof(loc_off
[0]));
46 long o_mklocal(long sz
)
48 loc_pos
+= ALIGN(sz
, ULNG
);
49 return loc_add(loc_pos
);
52 void o_rmlocal(long addr
, long sz
)
61 void o_bsnew(char *name
, long size
, int global
)
63 out_def(name
, OUT_BSS
| (global
? OUT_GLOB
: 0), bsslen
, size
);
64 bsslen
+= ALIGN(size
, OUT_ALIGNMENT
);
67 long o_dsnew(char *name
, long size
, int global
)
71 ds_sz
= MAX(128, ds_sz
* 2);
72 ds_name
= mextend(ds_name
, ds_n
, ds_sz
, sizeof(ds_name
[0]));
73 ds_off
= mextend(ds_off
, ds_n
, ds_sz
, sizeof(ds_off
[0]));
76 strcpy(ds_name
[idx
], name
);
77 ds_off
[idx
] = mem_len(&ds
);
78 out_def(name
, OUT_DS
| (global
? OUT_GLOB
: 0), mem_len(&ds
), size
);
79 mem_putz(&ds
, ALIGN(size
, OUT_ALIGNMENT
));
83 void o_dscpy(long addr
, void *buf
, long len
)
85 mem_cpy(&ds
, addr
, buf
, len
);
88 static int dat_off(char *name
)
91 for (i
= 0; i
< ds_n
; i
++)
92 if (!strcmp(name
, ds_name
[i
]))
97 void o_dsset(char *name
, long off
, long bt
)
99 long sym_off
= dat_off(name
) + off
;
100 long num
, roff
, rsym
;
101 if (!o_popnum(&num
)) {
102 mem_cpy(&ds
, sym_off
, &num
, T_SZ(bt
));
105 if (!o_popsym(&rsym
, &roff
)) {
106 out_rel(rsym
, OUT_DS
, sym_off
);
107 mem_cpy(&ds
, sym_off
, &roff
, T_SZ(bt
));
111 static int ra_vreg(int val
)
114 for (i
= 0; i
< LEN(ra_vmap
); i
++)
115 if (ra_vmap
[i
] == val
)
120 static int ra_lreg(int loc
)
123 for (i
= 0; i
< LEN(ra_lmap
); i
++)
124 if (ra_lmap
[i
] == loc
)
129 /* mask of registers assigned to locals */
130 static long ra_lmask(void)
134 for (i
= 0; i
< LEN(ra_lmap
); i
++)
140 /* mask of registers assigned to values */
141 static long ra_vmask(void)
145 for (i
= 0; i
< LEN(ra_vmap
); i
++)
151 /* find a temporary register specified in the given mask */
152 static long ra_regscn(long mask
)
155 for (i
= 0; i
< N_TMPS
; i
++)
156 if ((1 << tmpregs
[i
]) & mask
)
161 /* find a register, with the given good, acceptable, and bad register masks */
162 static long ra_regget(long iv
, long gmask
, long amask
, long bmask
)
165 gmask
&= ~bmask
& amask
;
167 if (ra_vreg(iv
) >= 0 && (1 << ra_vreg(iv
)) & (gmask
| amask
))
171 if (ra_regscn(gmask
& ~vmask
& ~lmask
) >= 0)
172 return ra_regscn(gmask
& ~vmask
& ~lmask
);
173 if (ra_regscn(amask
& ~vmask
& ~lmask
) >= 0)
174 return ra_regscn(amask
& ~vmask
& ~lmask
);
175 if (ra_regscn(gmask
) >= 0)
176 return ra_regscn(gmask
);
177 if (ra_regscn(amask
) >= 0)
178 return ra_regscn(amask
);
179 die("neatcc: cannot allocate an acceptable register\n");
183 /* find a free and cheap register */
184 static long ra_regcheap(long mask
)
186 return ra_regscn(mask
& (func_regs
| (R_TMPS
& ~R_PERM
)) &
187 ~ra_lmask() & ~ra_vmask());
190 /* allocate registers for the current instruction */
191 static void ra_map(int *rd
, int *r1
, int *r2
, int *r3
, long *mt
)
194 struct ic
*c
= &ic
[ic_i
];
196 int n
= ic_regcnt(c
);
204 /* optimizing loading locals: point to local's register */
205 if (oc
== (O_LD
| O_LOC
) && ra_lreg(c
->a1
) >= 0 &&
206 ra_vmap
[ra_lreg(c
->a1
)] < 0) {
207 *rd
= ra_lreg(c
->a1
);
208 func_regs
|= 1 << *rd
;
211 /* do not use argument registers to hold call destination */
213 for (i
= 0; i
< MIN(c
->a3
, N_ARGS
); i
++)
214 all
|= (1 << argregs
[i
]);
215 /* instructions on locals can be simplified */
219 if (oc
& (O_ST
| O_LD
))
220 oc
= (oc
& ~O_LOC
) & O_NUM
;
222 if (i_reg(c
->op
, &md
, &m1
, &m2
, &m3
, mt
))
223 die("neatcc: instruction %08lx not supported\n", c
->op
);
225 * the registers used in global register allocation should not
226 * be used in the last instruction of a basic block.
228 if (c
->op
& (O_JZ
| O_JCC
))
229 for (i
= 0; i
< LEN(ra_lmap
); i
++)
230 if (reg_rmap(ic_i
, i
) >= 0 && ra_lmap
[i
] != reg_rmap(ic_i
, i
))
232 /* allocating registers for the operands */
234 *r2
= ra_regget(c
->a2
, m2
, m2
, all
);
238 *r1
= ra_regget(c
->a1
, m1
, m1
, all
);
242 *r3
= ra_regget(c
->a3
, m3
, m3
, all
);
246 long m4
= (md
? md
: m1
) & ~all
;
247 long a4
= md
? ic_i
: c
->a1
;
250 else if (n
>= 2 && md
& (1 << *r2
) && ic_luse
[*r2
] <= ic_i
)
252 else if (n
>= 1 && md
& (1 << *r1
) && ic_luse
[*r1
] <= ic_i
)
255 *rd
= ra_regget(ic_i
, ra_gmask
[ic_i
], md
, 0);
256 /* if overwriting a local, use another register */
257 if (ra_lmap
[*rd
] >= 0)
258 if (m4
& ~(1 << *rd
))
259 *rd
= ra_regget(a4
, ra_gmask
[ic_i
], m4
& ~(1 << *rd
), 0);
264 func_regs
|= all
| *mt
;
267 static long iv_rank(long iv
)
270 for (i
= 0; i
< LEN(ra_live
); i
++)
271 if (ra_live
[i
] == iv
)
273 die("neatcc: the specified value is not live\n");
277 static long iv_addr(long rank
)
279 return loc_pos
+ rank
* ULNG
+ ULNG
;
282 static void loc_toreg(long loc
, long off
, int reg
, int bt
)
285 i_ins(O_MK(O_LD
| O_NUM
, bt
), reg
, REG_FP
, -loc_off
[loc
] + off
, 0);
288 static void loc_tomem(long loc
, long off
, int reg
, int bt
)
291 i_ins(O_MK(O_ST
| O_NUM
, bt
), 0, reg
, REG_FP
, -loc_off
[loc
] + off
);
294 static void loc_toadd(long loc
, long off
, int reg
)
297 i_ins(O_ADD
| O_NUM
, reg
, REG_FP
, -loc_off
[loc
] + off
, 0);
300 /* return nonzero if the local is read at least once in this basic block */
301 static int loc_isread(long loc
)
304 for (i
= ic_i
+ 1; i
< ic_n
&& !ic_bbeg
[i
]; i
++)
305 if (ic
[i
].op
& O_LOC
)
306 return ic
[i
].op
& O_LD
;
310 static void val_toreg(long val
, int reg
)
312 i_ins(O_MK(O_LD
| O_NUM
, ULNG
), reg
, REG_FP
, -iv_addr(iv_rank(val
)), 0);
315 static void val_tomem(long val
, int reg
)
317 long rank
= iv_rank(val
);
318 ra_vmax
= MAX(ra_vmax
, rank
+ 1);
319 i_ins(O_MK(O_ST
| O_NUM
, ULNG
), 0, reg
, REG_FP
, -iv_addr(rank
));
322 /* move the value to the stack */
323 static void ra_spill(int reg
)
325 if (ra_vmap
[reg
] >= 0) {
326 val_tomem(ra_vmap
[reg
], reg
);
329 if (ra_lmap
[reg
] >= 0) {
330 if (ra_lmap
[reg
] == reg_rmap(ic_i
, reg
))
331 loc_tomem(ra_lmap
[reg
], 0, reg
, ULNG
);
336 /* set the value to the given register */
337 static void ra_vsave(long iv
, int reg
)
341 for (i
= 0; i
< LEN(ra_live
); i
++)
344 if (i
== LEN(ra_live
))
345 die("neatcc: too many live values\n");
349 /* load the value into a register */
350 static void ra_vload(long iv
, int reg
)
352 if (ra_vmap
[reg
] == iv
)
354 if (ra_vmap
[reg
] >= 0 || ra_lmap
[reg
] >= 0)
356 if (ra_vreg(iv
) >= 0) {
357 i_ins(O_MK(O_MOV
, ULNG
), reg
, ra_vreg(iv
), 0, 0);
358 ra_vmap
[ra_vreg(iv
)] = -1;
365 /* the value is no longer needed */
366 static void ra_vdrop(long iv
)
369 for (i
= 0; i
< LEN(ra_live
); i
++)
370 if (ra_live
[i
] == iv
)
372 if (ra_vreg(iv
) >= 0)
373 ra_vmap
[ra_vreg(iv
)] = -1;
376 /* move the given value to memory or a free register */
377 static void ra_vmove(long iv
, long mask
)
379 int src
= ra_vreg(iv
);
380 int dst
= ra_regcheap(mask
);
381 if (dst
>= 0 && ra_vmap
[dst
] < 0 && ra_lmap
[dst
] < 0) {
382 i_ins(O_MK(O_MOV
, ULNG
), dst
, src
, 0, 0);
390 /* load the value of local loc into register reg */
391 static void ra_lload(long loc
, long off
, int reg
, int bt
)
393 int greg
= reg_lmap(ic_i
, loc
);
395 int lreg
= ra_lreg(loc
);
396 /* values using the same register */
397 if (ra_vmap
[greg
] >= 0)
398 ra_vmove(ra_vmap
[greg
],
399 ra_gmask
[ra_vmap
[greg
]] & ~(1 << reg
));
403 i_ins(O_MK(O_MOV
, bt
), greg
, lreg
, 0, 0);
405 loc_toreg(loc
, off
, greg
, bt
);
409 if (ra_lreg(loc
) < 0 && reg_safe(loc
) && loc_isread(loc
)) {
411 loc_toreg(loc
, off
, reg
, bt
);
413 if (ra_lreg(loc
) >= 0) {
414 if (ra_lreg(loc
) != reg
)
415 i_ins(O_MK(O_MOV
, bt
), reg
, ra_lreg(loc
), 0, 0);
417 loc_toreg(loc
, off
, reg
, bt
);
421 /* register reg contains the value of local loc */
422 static void ra_lsave(long loc
, long off
, int reg
, int bt
)
424 int lreg
= ra_lreg(loc
);
425 int greg
= reg_lmap(ic_i
, loc
);
427 if (lreg
>= 0 && lreg
!= greg
)
429 if (ra_vmap
[greg
] >= 0) /* values using the same register */
430 ra_vmove(ra_vmap
[greg
],
431 ra_gmask
[ra_vmap
[greg
]] & ~(1 << reg
));
432 i_ins(O_MK(O_MOV
, bt
), greg
, reg
, 0, 0);
437 loc_tomem(loc
, off
, reg
, bt
);
438 if (ra_lmap
[reg
] < 0 && reg_safe(loc
) && loc_isread(loc
))
443 /* end of a basic block */
444 static void ra_bbend(void)
447 /* save values to memory */
448 for (i
= 0; i
< LEN(ra_vmap
); i
++)
451 /* dropping local caches */
452 for (i
= 0; i
< LEN(ra_lmap
); i
++)
453 if (ra_lmap
[i
] != reg_rmap(ic_i
, i
) && ra_lmap
[i
] >= 0)
455 /* load global register allocations from memory */
456 for (i
= 0; i
< LEN(ra_lmap
); i
++) {
457 if (ra_lmap
[i
] != reg_rmap(ic_i
, i
)) {
458 ra_lmap
[i
] = reg_rmap(ic_i
, i
);
459 loc_toreg(ra_lmap
[i
], 0, i
, ULNG
);
464 static void ra_init(struct ic
*ic
, long ic_n
)
466 long md
, m1
, m2
, m3
, mt
;
468 ic_bbeg
= calloc(ic_n
, sizeof(ic_bbeg
[0]));
469 ra_gmask
= calloc(ic_n
, sizeof(ra_gmask
[0]));
470 loc_mem
= calloc(loc_n
, sizeof(loc_mem
[0]));
472 for (i
= 0; i
< ic_n
; i
++) {
473 if (i
+ 1 < ic_n
&& ic
[i
].op
& (O_JXX
| O_RET
))
475 if (ic
[i
].op
& O_JXX
&& ic
[i
].a3
< ic_n
)
476 ic_bbeg
[ic
[i
].a3
] = 1;
479 for (i
= 0; i
< ic_n
; i
++) {
480 int n
= ic_regcnt(ic
+ i
);
482 i_reg(op
, &md
, &m1
, &m2
, &m3
, &mt
);
484 ra_gmask
[ic
[i
].a1
] = m1
;
486 ra_gmask
[ic
[i
].a2
] = m2
;
488 ra_gmask
[ic
[i
].a3
] = m3
;
489 if (O_C(op
) == (O_ST
| O_LOC
) && reg_lmap(i
, ic
[i
].a2
))
490 ra_gmask
[ic
[i
].a1
] = 1 << reg_lmap(i
, ic
[i
].a2
);
492 for (j
= 0; j
< MIN(N_ARGS
, ic
[i
].a3
); j
++)
493 ra_gmask
[ic
[i
].args
[j
]] = 1 << argregs
[j
];
496 for (i
= 0; i
< LEN(ra_vmap
); i
++)
499 for (i
= 0; i
< LEN(ra_lmap
); i
++)
500 ra_lmap
[i
] = reg_rmap(0, i
);
501 func_regs
|= reg_mask();
503 for (i
= 0; i
< LEN(ra_live
); i
++)
509 static void ra_done(void)
516 static void ic_gencode(struct ic
*ic
, long ic_n
)
521 /* loading arguments in their allocated registers */
522 for (i
= 0; i
< LEN(ra_lmap
); i
++) {
523 int loc
= ra_lmap
[i
];
524 if (loc
>= 0 && loc
< func_argc
)
525 if (loc
>= N_ARGS
|| i
!= argregs
[loc
])
526 loc_toreg(loc
, 0, i
, ULNG
);
528 /* generating code */
529 for (i
= 0; i
< ic_n
; i
++) {
532 int n
= ic_regcnt(ic
+ i
);
535 ra_map(&rd
, &r1
, &r2
, &r3
, &mt
);
538 int aregs
= MIN(N_ARGS
, argc
);
539 /* arguments passed via stack */
540 for (j
= argc
- 1; j
>= aregs
; --j
) {
541 int v
= ic
[i
].args
[j
];
542 int rx
= ra_vreg(v
) >= 0 ? ra_vreg(v
) : rd
;
544 i_ins(O_MK(O_ST
| O_NUM
, ULNG
), 0,
545 rx
, REG_SP
, (j
- aregs
) * ULNG
);
548 func_maxargs
= MAX(func_maxargs
, argc
- aregs
);
549 /* arguments passed via registers */
550 for (j
= aregs
- 1; j
>= 0; --j
)
551 ra_vload(ic
[i
].args
[j
], argregs
[j
]);
553 /* loading the operands */
555 ra_vload(ic
[i
].a1
, r1
);
557 ra_vload(ic
[i
].a2
, r2
);
559 ra_vload(ic
[i
].a3
, r3
);
560 /* dropping values that are no longer used */
561 for (j
= 0; j
< LEN(ra_live
); j
++)
562 if (ra_live
[j
] >= 0 && ic_luse
[ra_live
[j
]] <= i
)
563 ra_vdrop(ra_live
[j
]);
564 /* saving values stored in registers that may change */
565 for (j
= 0; j
< N_REGS
; j
++)
568 /* overwriting a value that is needed later (unless loading a local to its register) */
570 if (oc
!= (O_LD
| O_LOC
) || ra_lmap
[rd
] != ic
[i
].a1
||
573 /* before the last instruction of a basic block; for jumps */
574 if (i
+ 1 < ic_n
&& ic_bbeg
[i
+ 1] && oc
& O_JXX
)
576 /* performing the instruction */
578 i_ins(op
, rd
, r1
, oc
& O_NUM
? ic
[i
].a2
: r2
, 0);
580 i_ins(op
, rd
, r1
, r2
, 0);
581 if (oc
== (O_LD
| O_NUM
))
582 i_ins(op
, rd
, r1
, ic
[i
].a2
, 0);
583 if (oc
== (O_LD
| O_LOC
))
584 ra_lload(ic
[i
].a1
, ic
[i
].a2
, rd
, O_T(op
));
585 if (oc
== (O_ST
| O_NUM
))
586 i_ins(op
, 0, r1
, r2
, ic
[i
].a3
);
587 if (oc
== (O_ST
| O_LOC
))
588 ra_lsave(ic
[i
].a2
, ic
[i
].a3
, r1
, O_T(op
));
590 i_ins(op
, 0, r1
, 0, 0);
592 i_ins(op
, rd
, r1
, 0, 0);
593 if (oc
== (O_MOV
| O_NUM
))
594 i_ins(op
, rd
, ic
[i
].a1
, 0, 0);
595 if (oc
== (O_MOV
| O_LOC
))
596 loc_toadd(ic
[i
].a1
, ic
[i
].a2
, rd
);
597 if (oc
== (O_MOV
| O_SYM
))
598 i_ins(op
, rd
, ic
[i
].a1
, ic
[i
].a2
, 0);
600 i_ins(op
, rd
, r1
, 0, 0);
601 if (oc
== (O_CALL
| O_SYM
))
602 i_ins(op
, rd
, ic
[i
].a1
, ic
[i
].a2
, 0);
604 i_ins(op
, 0, 0, 0, ic
[i
].a3
);
606 i_ins(op
, 0, r1
, 0, ic
[i
].a3
);
608 i_ins(op
, 0, r1
, oc
& O_NUM
? ic
[i
].a2
: r2
, ic
[i
].a3
);
610 i_ins(op
, 0, r1
, r2
, r3
);
612 i_ins(op
, 0, r1
, r2
, r3
);
613 /* saving back the output register */
614 if (oc
& O_OUT
&& ic_luse
[i
] > i
)
616 /* after the last instruction of a basic block */
617 if (i
+ 1 < ic_n
&& ic_bbeg
[i
+ 1] && !(oc
& O_JXX
))
623 static void ic_reset(void)
634 void o_func_beg(char *name
, int argc
, int global
, int varg
)
641 for (i
= 0; i
< argc
; i
++)
642 loc_add(I_ARG0
+ -i
* ULNG
);
643 out_def(name
, (global
? OUT_GLOB
: 0) | OUT_CS
, mem_len(&cs
), 0);
646 void o_code(char *name
, char *c
, long c_len
)
648 out_def(name
, OUT_CS
, mem_len(&cs
), 0);
649 mem_put(&cs
, c
, c_len
);
652 void o_func_end(void)
656 long sargs_last
= -1;
659 long c_len
, *rsym
, *rflg
, *roff
, rcnt
;
661 int locs
= 0; /* accessing locals on the stack */
663 ic_get(&ic
, &ic_n
); /* the intermediate code */
664 reg_init(ic
, ic_n
); /* global register allocation */
665 ra_init(ic
, ic_n
); /* initialize register allocation */
666 ic_luse
= ic_lastuse(ic
, ic_n
);
667 ic_gencode(ic
, ic_n
); /* generating machine code */
669 /* deciding which arguments to save */
670 for (i
= 0; i
< func_argc
; i
++)
673 for (i
= 0; i
< N_ARGS
&& (func_varg
|| i
< sargs_last
); i
++)
674 sargs
|= 1 << argregs
[i
];
675 /* computing the amount of stack subtraction */
676 for (i
= 0; i
< loc_n
; i
++)
679 spsub
= (locs
|| ra_vmax
) ? loc_pos
+ ra_vmax
* ULNG
: 0;
680 for (i
= 0; i
< N_TMPS
; i
++)
681 if (((1 << tmpregs
[i
]) & func_regs
& R_PERM
) != 0)
684 spsub
+= func_maxargs
* ULNG
;
686 for (i
= 0; i
< ic_n
; i
++)
687 if (ic
[i
].op
& O_CALL
)
689 /* adding function prologue and epilogue */
690 i_wrap(func_argc
, sargs
, spsub
, spsub
|| locs
|| !leaf
,
691 func_regs
& R_PERM
, -sregs_pos
);
693 i_code(&c
, &c_len
, &rsym
, &rflg
, &roff
, &rcnt
);
694 for (i
= 0; i
< rcnt
; i
++) /* adding the relocations */
695 out_rel(rsym
[i
], rflg
[i
], roff
[i
] + mem_len(&cs
));
696 mem_put(&cs
, c
, c_len
); /* appending function code */
701 for (i
= 0; i
< ic_n
; i
++)
711 out_write(fd
, mem_buf(&cs
), mem_len(&cs
), mem_buf(&ds
), mem_len(&ds
));