1 /* neatcc code generation */
7 static struct mem ds
; /* data segment */
8 static struct mem cs
; /* code segment */
9 static long bsslen
; /* bss segment size */
10 static struct ic
*ic
; /* current instruction stream */
11 static long ic_n
; /* number of instructions in ic[] */
12 static long ic_i
; /* current instruction */
13 static long *ic_luse
; /* last instruction in which values are used */
15 static long *loc_off
; /* offset of locals on the stack */
16 static long loc_n
, loc_sz
; /* number of locals */
17 static long loc_pos
; /* current stack position */
18 static int *loc_mem
; /* local was accessed on the stack */
20 static char (*ds_name
)[NAMELEN
];/* data section symbols */
21 static long *ds_off
; /* data section offsets */
22 static long ds_n
, ds_sz
; /* number of data section symbols */
24 static int func_argc
; /* number of arguments */
25 static int func_varg
; /* varargs */
26 static int func_regs
; /* used registers */
27 static int func_maxargs
; /* the maximum number of arguments on the stack */
28 static long *ic_bbeg
; /* whether each instruction begins a basic block */
30 static long ra_vmap
[N_REGS
]; /* register to intermediate value assignments */
31 static long ra_lmap
[N_REGS
]; /* register to local assignments */
32 static long *ra_gmask
; /* the mask of good registers for each value */
33 static long ra_live
[NTMPS
]; /* live values */
34 static int ra_vmax
; /* the number of values stored on the stack */
36 static long loc_add(long pos
)
38 if (loc_n
>= loc_sz
) {
39 loc_sz
= MAX(128, loc_sz
* 2);
40 loc_off
= mextend(loc_off
, loc_n
, loc_sz
, sizeof(loc_off
[0]));
46 long o_mklocal(long sz
)
48 loc_pos
+= ALIGN(sz
, ULNG
);
49 return loc_add(loc_pos
);
52 void o_rmlocal(long addr
, long sz
)
61 void o_bsnew(char *name
, long size
, int global
)
63 out_def(name
, OUT_BSS
| (global
? OUT_GLOB
: 0), bsslen
, size
);
64 bsslen
+= ALIGN(size
, OUT_ALIGNMENT
);
67 long o_dsnew(char *name
, long size
, int global
)
71 ds_sz
= MAX(128, ds_sz
* 2);
72 ds_name
= mextend(ds_name
, ds_n
, ds_sz
, sizeof(ds_name
[0]));
73 ds_off
= mextend(ds_off
, ds_n
, ds_sz
, sizeof(ds_off
[0]));
76 strcpy(ds_name
[idx
], name
);
77 ds_off
[idx
] = mem_len(&ds
);
78 out_def(name
, OUT_DS
| (global
? OUT_GLOB
: 0), mem_len(&ds
), size
);
79 mem_putz(&ds
, ALIGN(size
, OUT_ALIGNMENT
));
83 void o_dscpy(long addr
, void *buf
, long len
)
85 mem_cpy(&ds
, addr
, buf
, len
);
88 static int dat_off(char *name
)
91 for (i
= 0; i
< ds_n
; i
++)
92 if (!strcmp(name
, ds_name
[i
]))
97 void o_dsset(char *name
, long off
, long bt
)
99 long sym_off
= dat_off(name
) + off
;
100 long num
, roff
, rsym
;
101 if (!o_popnum(&num
)) {
102 mem_cpy(&ds
, sym_off
, &num
, T_SZ(bt
));
105 if (!o_popsym(&rsym
, &roff
)) {
106 out_rel(rsym
, OUT_DS
, sym_off
);
107 mem_cpy(&ds
, sym_off
, &roff
, T_SZ(bt
));
111 static int ra_vreg(int val
)
114 for (i
= 0; i
< LEN(ra_vmap
); i
++)
115 if (ra_vmap
[i
] == val
)
120 static int ra_lreg(int loc
)
123 for (i
= 0; i
< LEN(ra_lmap
); i
++)
124 if (ra_lmap
[i
] == loc
)
129 /* mask of registers assigned to locals */
130 static long ra_lmask(void)
134 for (i
= 0; i
< LEN(ra_lmap
); i
++)
140 /* mask of registers assigned to values */
141 static long ra_vmask(void)
145 for (i
= 0; i
< LEN(ra_vmap
); i
++)
151 /* find a temporary register specified in the given mask */
152 static long ra_regscn(long mask
)
155 for (i
= 0; i
< N_TMPS
; i
++)
156 if ((1 << tmpregs
[i
]) & mask
)
161 /* find a register, with the given good, acceptable, and bad register masks */
162 static long ra_regget(long iv
, long gmask
, long amask
, long bmask
)
165 gmask
&= ~bmask
& amask
;
167 if (ra_vreg(iv
) >= 0 && (1 << ra_vreg(iv
)) & (gmask
| amask
))
171 if (ra_regscn(gmask
& ~vmask
& ~lmask
) >= 0)
172 return ra_regscn(gmask
& ~vmask
& ~lmask
);
173 if (ra_regscn(amask
& ~vmask
& ~lmask
) >= 0)
174 return ra_regscn(amask
& ~vmask
& ~lmask
);
175 if (ra_regscn(gmask
) >= 0)
176 return ra_regscn(gmask
);
177 if (ra_regscn(amask
) >= 0)
178 return ra_regscn(amask
);
179 die("neatcc: cannot allocate an acceptable register\n");
183 /* find a free and cheap register */
184 static long ra_regcheap(long mask
)
186 return ra_regscn(mask
& (func_regs
| (R_TMPS
& ~R_PERM
)) &
187 ~ra_lmask() & ~ra_vmask());
190 /* allocate registers for the current instruction */
191 static void ra_map(int *rd
, int *r1
, int *r2
, int *r3
, long *mt
)
194 struct ic
*c
= &ic
[ic_i
];
196 int n
= ic_regcnt(c
);
204 /* optimizing loading locals: point to local's register */
205 if (oc
== (O_LD
| O_LOC
) && ra_lreg(c
->a1
) >= 0 &&
206 ra_vmap
[ra_lreg(c
->a1
)] < 0) {
207 *rd
= ra_lreg(c
->a1
);
208 func_regs
|= 1 << *rd
;
211 /* do not use argument registers to hold call destination */
213 for (i
= 0; i
< MIN(c
->a3
, N_ARGS
); i
++)
214 all
|= (1 << argregs
[i
]);
215 /* instructions on locals can be simplified */
219 if (oc
& (O_ST
| O_LD
))
220 oc
= (oc
& ~O_LOC
) & O_NUM
;
222 if (i_reg(c
->op
, &md
, &m1
, &m2
, &m3
, mt
))
223 die("neatcc: instruction %08lx not supported\n", c
->op
);
225 * the registers used in global register allocation should not
226 * be used in the last instruction of a basic block.
228 if (c
->op
& (O_JZ
| O_JCC
))
229 for (i
= 0; i
< LEN(ra_lmap
); i
++)
230 if (reg_rmap(ic_i
, i
) >= 0 && ra_lmap
[i
] != reg_rmap(ic_i
, i
))
232 /* allocating registers for the operands */
234 *r2
= ra_regget(c
->a2
, m2
, m2
, all
);
238 *r1
= ra_regget(c
->a1
, m1
, m1
, all
);
242 *r3
= ra_regget(c
->a3
, m3
, m3
, all
);
246 long m4
= (md
? md
: m1
) & ~all
;
247 long a4
= md
? ic_i
: c
->a1
;
250 else if (ra_gmask
[ic_i
] & md
& ~all
)
251 *rd
= ra_regget(ic_i
, ra_gmask
[ic_i
], md
, 0);
252 else if (n
>= 2 && md
& (1 << *r2
) && ic_luse
[*r2
] <= ic_i
)
254 else if (n
>= 1 && md
& (1 << *r1
) && ic_luse
[*r1
] <= ic_i
)
257 *rd
= ra_regget(ic_i
, ra_gmask
[ic_i
], md
, 0);
258 /* if overwriting a local, use another register */
259 if (ra_lmap
[*rd
] >= 0)
260 if (m4
& ~(1 << *rd
))
261 *rd
= ra_regget(a4
, ra_gmask
[ic_i
], m4
& ~(1 << *rd
), 0);
266 func_regs
|= all
| *mt
;
269 static long iv_rank(long iv
)
272 for (i
= 0; i
< LEN(ra_live
); i
++)
273 if (ra_live
[i
] == iv
)
275 die("neatcc: the specified value is not live\n");
279 static long iv_addr(long rank
)
281 return loc_pos
+ rank
* ULNG
+ ULNG
;
284 static void loc_toreg(long loc
, long off
, int reg
, int bt
)
287 i_ins(O_MK(O_LD
| O_NUM
, bt
), reg
, REG_FP
, -loc_off
[loc
] + off
, 0);
290 static void loc_tomem(long loc
, long off
, int reg
, int bt
)
293 i_ins(O_MK(O_ST
| O_NUM
, bt
), 0, reg
, REG_FP
, -loc_off
[loc
] + off
);
296 static void loc_toadd(long loc
, long off
, int reg
)
299 i_ins(O_ADD
| O_NUM
, reg
, REG_FP
, -loc_off
[loc
] + off
, 0);
302 /* return nonzero if the local is read at least once in this basic block */
303 static int loc_isread(long loc
)
306 for (i
= ic_i
+ 1; i
< ic_n
&& !ic_bbeg
[i
]; i
++)
307 if (ic
[i
].op
& O_LOC
)
308 return ic
[i
].op
& O_LD
;
312 static void val_toreg(long val
, int reg
)
314 i_ins(O_MK(O_LD
| O_NUM
, ULNG
), reg
, REG_FP
, -iv_addr(iv_rank(val
)), 0);
317 static void val_tomem(long val
, int reg
)
319 long rank
= iv_rank(val
);
320 ra_vmax
= MAX(ra_vmax
, rank
+ 1);
321 i_ins(O_MK(O_ST
| O_NUM
, ULNG
), 0, reg
, REG_FP
, -iv_addr(rank
));
324 /* move the value to the stack */
325 static void ra_spill(int reg
)
327 if (ra_vmap
[reg
] >= 0) {
328 val_tomem(ra_vmap
[reg
], reg
);
331 if (ra_lmap
[reg
] >= 0) {
332 if (ra_lmap
[reg
] == reg_rmap(ic_i
, reg
))
333 loc_tomem(ra_lmap
[reg
], 0, reg
, ULNG
);
338 /* set the value to the given register */
339 static void ra_vsave(long iv
, int reg
)
343 for (i
= 0; i
< LEN(ra_live
); i
++)
346 if (i
== LEN(ra_live
))
347 die("neatcc: too many live values\n");
351 /* load the value into a register */
352 static void ra_vload(long iv
, int reg
)
354 if (ra_vmap
[reg
] == iv
)
356 if (ra_vmap
[reg
] >= 0 || ra_lmap
[reg
] >= 0)
358 if (ra_vreg(iv
) >= 0) {
359 i_ins(O_MK(O_MOV
, ULNG
), reg
, ra_vreg(iv
), 0, 0);
360 ra_vmap
[ra_vreg(iv
)] = -1;
367 /* the value is no longer needed */
368 static void ra_vdrop(long iv
)
371 for (i
= 0; i
< LEN(ra_live
); i
++)
372 if (ra_live
[i
] == iv
)
374 if (ra_vreg(iv
) >= 0)
375 ra_vmap
[ra_vreg(iv
)] = -1;
378 /* move the given value to memory or a free register */
379 static void ra_vmove(long iv
, long mask
)
381 int src
= ra_vreg(iv
);
382 int dst
= ra_regcheap(mask
);
383 if (dst
>= 0 && ra_vmap
[dst
] < 0 && ra_lmap
[dst
] < 0) {
384 i_ins(O_MK(O_MOV
, ULNG
), dst
, src
, 0, 0);
392 /* load the value of local loc into register reg */
393 static void ra_lload(long loc
, long off
, int reg
, int bt
)
395 int greg
= reg_lmap(ic_i
, loc
);
397 int lreg
= ra_lreg(loc
);
398 /* values using the same register */
399 if (ra_vmap
[greg
] >= 0)
400 ra_vmove(ra_vmap
[greg
],
401 ra_gmask
[ra_vmap
[greg
]] & ~(1 << reg
));
405 i_ins(O_MK(O_MOV
, bt
), greg
, lreg
, 0, 0);
407 loc_toreg(loc
, off
, greg
, bt
);
411 if (ra_lreg(loc
) < 0 && reg_safe(loc
) && loc_isread(loc
)) {
413 loc_toreg(loc
, off
, reg
, bt
);
415 if (ra_lreg(loc
) >= 0) {
416 if (ra_lreg(loc
) != reg
)
417 i_ins(O_MK(O_MOV
, bt
), reg
, ra_lreg(loc
), 0, 0);
419 loc_toreg(loc
, off
, reg
, bt
);
423 /* register reg contains the value of local loc */
424 static void ra_lsave(long loc
, long off
, int reg
, int bt
)
426 int lreg
= ra_lreg(loc
);
427 int greg
= reg_lmap(ic_i
, loc
);
429 if (lreg
>= 0 && lreg
!= greg
)
431 if (ra_vmap
[greg
] >= 0) /* values using the same register */
432 ra_vmove(ra_vmap
[greg
],
433 ra_gmask
[ra_vmap
[greg
]] & ~(1 << reg
));
434 i_ins(O_MK(O_MOV
, bt
), greg
, reg
, 0, 0);
439 loc_tomem(loc
, off
, reg
, bt
);
440 if (ra_lmap
[reg
] < 0 && reg_safe(loc
) && loc_isread(loc
))
445 /* end of a basic block */
446 static void ra_bbend(void)
449 /* save values to memory */
450 for (i
= 0; i
< LEN(ra_vmap
); i
++)
453 /* dropping local caches */
454 for (i
= 0; i
< LEN(ra_lmap
); i
++)
455 if (ra_lmap
[i
] != reg_rmap(ic_i
, i
) && ra_lmap
[i
] >= 0)
457 /* load global register allocations from memory */
458 for (i
= 0; i
< LEN(ra_lmap
); i
++) {
459 if (ra_lmap
[i
] != reg_rmap(ic_i
, i
)) {
460 ra_lmap
[i
] = reg_rmap(ic_i
, i
);
461 loc_toreg(ra_lmap
[i
], 0, i
, ULNG
);
466 static void ra_init(struct ic
*ic
, long ic_n
)
468 long md
, m1
, m2
, m3
, mt
;
470 ic_bbeg
= calloc(ic_n
, sizeof(ic_bbeg
[0]));
471 ra_gmask
= calloc(ic_n
, sizeof(ra_gmask
[0]));
472 loc_mem
= calloc(loc_n
, sizeof(loc_mem
[0]));
474 for (i
= 0; i
< ic_n
; i
++) {
475 if (i
+ 1 < ic_n
&& ic
[i
].op
& (O_JXX
| O_RET
))
477 if (ic
[i
].op
& O_JXX
&& ic
[i
].a3
< ic_n
)
478 ic_bbeg
[ic
[i
].a3
] = 1;
481 for (i
= 0; i
< ic_n
; i
++) {
482 int n
= ic_regcnt(ic
+ i
);
484 i_reg(op
, &md
, &m1
, &m2
, &m3
, &mt
);
486 ra_gmask
[ic
[i
].a1
] = m1
;
488 ra_gmask
[ic
[i
].a2
] = m2
;
490 ra_gmask
[ic
[i
].a3
] = m3
;
491 if (O_C(op
) == (O_ST
| O_LOC
) && reg_lmap(i
, ic
[i
].a2
))
492 ra_gmask
[ic
[i
].a1
] = 1 << reg_lmap(i
, ic
[i
].a2
);
494 for (j
= 0; j
< MIN(N_ARGS
, ic
[i
].a3
); j
++)
495 ra_gmask
[ic
[i
].args
[j
]] = 1 << argregs
[j
];
498 for (i
= 0; i
< LEN(ra_vmap
); i
++)
501 for (i
= 0; i
< LEN(ra_lmap
); i
++)
502 ra_lmap
[i
] = reg_rmap(0, i
);
503 func_regs
|= reg_mask();
505 for (i
= 0; i
< LEN(ra_live
); i
++)
511 static void ra_done(void)
518 static void ic_gencode(struct ic
*ic
, long ic_n
)
523 /* loading arguments in their allocated registers */
524 for (i
= 0; i
< LEN(ra_lmap
); i
++) {
525 int loc
= ra_lmap
[i
];
526 if (loc
>= 0 && loc
< func_argc
)
527 if (loc
>= N_ARGS
|| i
!= argregs
[loc
])
528 loc_toreg(loc
, 0, i
, ULNG
);
530 /* generating code */
531 for (i
= 0; i
< ic_n
; i
++) {
534 int n
= ic_regcnt(ic
+ i
);
537 ra_map(&rd
, &r1
, &r2
, &r3
, &mt
);
540 int aregs
= MIN(N_ARGS
, argc
);
541 /* arguments passed via stack */
542 for (j
= argc
- 1; j
>= aregs
; --j
) {
543 int v
= ic
[i
].args
[j
];
544 int rx
= ra_vreg(v
) >= 0 ? ra_vreg(v
) : rd
;
546 i_ins(O_MK(O_ST
| O_NUM
, ULNG
), 0,
547 rx
, REG_SP
, (j
- aregs
) * ULNG
);
550 func_maxargs
= MAX(func_maxargs
, argc
- aregs
);
551 /* arguments passed via registers */
552 for (j
= aregs
- 1; j
>= 0; --j
)
553 ra_vload(ic
[i
].args
[j
], argregs
[j
]);
555 /* loading the operands */
557 ra_vload(ic
[i
].a1
, r1
);
559 ra_vload(ic
[i
].a2
, r2
);
561 ra_vload(ic
[i
].a3
, r3
);
562 /* dropping values that are no longer used */
563 for (j
= 0; j
< LEN(ra_live
); j
++)
564 if (ra_live
[j
] >= 0 && ic_luse
[ra_live
[j
]] <= i
)
565 ra_vdrop(ra_live
[j
]);
566 /* saving values stored in registers that may change */
567 for (j
= 0; j
< N_REGS
; j
++)
570 /* overwriting a value that is needed later (unless loading a local to its register) */
572 if (oc
!= (O_LD
| O_LOC
) || ra_lmap
[rd
] != ic
[i
].a1
||
575 /* before the last instruction of a basic block; for jumps */
576 if (i
+ 1 < ic_n
&& ic_bbeg
[i
+ 1] && oc
& O_JXX
)
578 /* performing the instruction */
580 i_ins(op
, rd
, r1
, oc
& O_NUM
? ic
[i
].a2
: r2
, 0);
582 i_ins(op
, rd
, r1
, r2
, 0);
583 if (oc
== (O_LD
| O_NUM
))
584 i_ins(op
, rd
, r1
, ic
[i
].a2
, 0);
585 if (oc
== (O_LD
| O_LOC
))
586 ra_lload(ic
[i
].a1
, ic
[i
].a2
, rd
, O_T(op
));
587 if (oc
== (O_ST
| O_NUM
))
588 i_ins(op
, 0, r1
, r2
, ic
[i
].a3
);
589 if (oc
== (O_ST
| O_LOC
))
590 ra_lsave(ic
[i
].a2
, ic
[i
].a3
, r1
, O_T(op
));
592 i_ins(op
, 0, r1
, 0, 0);
594 i_ins(op
, rd
, r1
, 0, 0);
595 if (oc
== (O_MOV
| O_NUM
))
596 i_ins(op
, rd
, ic
[i
].a1
, 0, 0);
597 if (oc
== (O_MOV
| O_LOC
))
598 loc_toadd(ic
[i
].a1
, ic
[i
].a2
, rd
);
599 if (oc
== (O_MOV
| O_SYM
))
600 i_ins(op
, rd
, ic
[i
].a1
, ic
[i
].a2
, 0);
602 i_ins(op
, rd
, r1
, 0, 0);
603 if (oc
== (O_CALL
| O_SYM
))
604 i_ins(op
, rd
, ic
[i
].a1
, ic
[i
].a2
, 0);
606 i_ins(op
, 0, 0, 0, ic
[i
].a3
);
608 i_ins(op
, 0, r1
, 0, ic
[i
].a3
);
610 i_ins(op
, 0, r1
, oc
& O_NUM
? ic
[i
].a2
: r2
, ic
[i
].a3
);
612 i_ins(op
, 0, r1
, r2
, r3
);
614 i_ins(op
, 0, r1
, r2
, r3
);
615 /* saving back the output register */
616 if (oc
& O_OUT
&& ic_luse
[i
] > i
)
618 /* after the last instruction of a basic block */
619 if (i
+ 1 < ic_n
&& ic_bbeg
[i
+ 1] && !(oc
& O_JXX
))
625 static void ic_reset(void)
636 void o_func_beg(char *name
, int argc
, int global
, int varg
)
643 for (i
= 0; i
< argc
; i
++)
644 loc_add(I_ARG0
+ -i
* ULNG
);
645 out_def(name
, (global
? OUT_GLOB
: 0) | OUT_CS
, mem_len(&cs
), 0);
648 void o_code(char *name
, char *c
, long c_len
)
650 out_def(name
, OUT_CS
, mem_len(&cs
), 0);
651 mem_put(&cs
, c
, c_len
);
654 void o_func_end(void)
658 long sargs_last
= -1;
661 long c_len
, *rsym
, *rflg
, *roff
, rcnt
;
663 int locs
= 0; /* accessing locals on the stack */
665 ic_get(&ic
, &ic_n
); /* the intermediate code */
666 reg_init(ic
, ic_n
); /* global register allocation */
667 ra_init(ic
, ic_n
); /* initialize register allocation */
668 ic_luse
= ic_lastuse(ic
, ic_n
);
669 ic_gencode(ic
, ic_n
); /* generating machine code */
671 /* deciding which arguments to save */
672 for (i
= 0; i
< func_argc
; i
++)
675 for (i
= 0; i
< N_ARGS
&& (func_varg
|| i
< sargs_last
); i
++)
676 sargs
|= 1 << argregs
[i
];
677 /* computing the amount of stack subtraction */
678 for (i
= 0; i
< loc_n
; i
++)
681 spsub
= (locs
|| ra_vmax
) ? loc_pos
+ ra_vmax
* ULNG
: 0;
682 for (i
= 0; i
< N_TMPS
; i
++)
683 if (((1 << tmpregs
[i
]) & func_regs
& R_PERM
) != 0)
686 spsub
+= func_maxargs
* ULNG
;
688 for (i
= 0; i
< ic_n
; i
++)
689 if (ic
[i
].op
& O_CALL
)
691 /* adding function prologue and epilogue */
692 i_wrap(func_argc
, sargs
, spsub
, spsub
|| locs
|| !leaf
,
693 func_regs
& R_PERM
, -sregs_pos
);
695 i_code(&c
, &c_len
, &rsym
, &rflg
, &roff
, &rcnt
);
696 for (i
= 0; i
< rcnt
; i
++) /* adding the relocations */
697 out_rel(rsym
[i
], rflg
[i
], roff
[i
] + mem_len(&cs
));
698 mem_put(&cs
, c
, c_len
); /* appending function code */
703 for (i
= 0; i
< ic_n
; i
++)
713 out_write(fd
, mem_buf(&cs
), mem_len(&cs
), mem_buf(&ds
), mem_len(&ds
));