1 /* architecture-dependent code generation for ARM */
6 #define MIN(a, b) ((a) < (b) ? (a) : (b))
7 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
8 #define oi4(i) oi((i), 4)
10 #define REG_DP 10 /* data pointer register */
11 #define REG_TMP 12 /* temporary register */
12 #define REG_LR 14 /* link register */
13 #define REG_PC 15 /* program counter */
14 #define REG_RET 0 /* returned value register */
27 int tmpregs
[] = {4, 5, 6, 7, 8, 9, 3, 2, 1, 0};
28 int argregs
[] = {0, 1, 2, 3};
30 static struct mem cs
; /* generated code */
32 /* code generation functions */
33 static void os(void *s
, int n
)
38 static char *ointbuf(long n
, int l
)
42 for (i
= 0; i
< l
; i
++) {
49 static void oi(long n
, int l
)
51 mem_put(&cs
, ointbuf(n
, l
), l
);
54 static void oi_at(long pos
, long n
, int l
)
56 mem_cpy(&cs
, pos
, ointbuf(n
, l
), l
);
59 static long opos(void)
64 /* compiled division functions; div.s contains the source */
65 static int udivdi3
[] = {
66 0xe3a02000, 0xe3a03000, 0xe1110001, 0x0a00000a,
67 0xe1b0c211, 0xe2822001, 0x5afffffc, 0xe3a0c001,
68 0xe2522001, 0x4a000004, 0xe1500211, 0x3afffffb,
69 0xe0400211, 0xe083321c, 0xeafffff8, 0xe1a01000,
70 0xe1a00003, 0xe1a0f00e,
72 static int umoddi3
[] = {
73 0xe92d4000, 0xebffffeb, 0xe1a00001, 0xe8bd8000,
75 static int divdi3
[] = {
76 0xe92d4030, 0xe1a04000, 0xe1a05001, 0xe1100000,
77 0x42600000, 0xe1110001, 0x42611000, 0xebffffe1,
78 0xe1340005, 0x42600000, 0xe1140004, 0x42611000,
81 static int moddi3
[] = {
82 0xe92d4000, 0xebfffff0, 0xe1a00001, 0xe8bd8000,
85 static long *rel_sym
; /* relocation symbols */
86 static long *rel_flg
; /* relocation flags */
87 static long *rel_off
; /* relocation offsets */
88 static long rel_n
, rel_sz
; /* relocation count */
90 static long lab_sz
; /* label count */
91 static long *lab_loc
; /* label offsets in cs */
92 static long jmp_n
, jmp_sz
; /* jump count */
93 static long *jmp_off
; /* jump offsets */
94 static long *jmp_dst
; /* jump destinations */
95 static long jmp_ret
; /* the position of the last return jmp */
97 static void lab_add(long id
)
99 while (id
>= lab_sz
) {
101 lab_sz
= MAX(128, lab_sz
* 2);
102 lab_loc
= mextend(lab_loc
, lab_n
, lab_sz
, sizeof(*lab_loc
));
104 lab_loc
[id
] = opos();
107 static void jmp_add(long off
, long dst
)
109 if (jmp_n
== jmp_sz
) {
110 jmp_sz
= MAX(128, jmp_sz
* 2);
111 jmp_off
= mextend(jmp_off
, jmp_n
, jmp_sz
, sizeof(*jmp_off
));
112 jmp_dst
= mextend(jmp_dst
, jmp_n
, jmp_sz
, sizeof(*jmp_dst
));
114 jmp_off
[jmp_n
] = off
;
115 jmp_dst
[jmp_n
] = dst
;
119 void i_label(long id
)
124 static void rel_add(long sym
, long flg
, long off
)
126 if (rel_n
== rel_sz
) {
127 rel_sz
= MAX(128, rel_sz
* 2);
128 rel_sym
= mextend(rel_sym
, rel_n
, rel_sz
, sizeof(*rel_sym
));
129 rel_flg
= mextend(rel_flg
, rel_n
, rel_sz
, sizeof(*rel_flg
));
130 rel_off
= mextend(rel_off
, rel_n
, rel_sz
, sizeof(*rel_off
));
132 rel_sym
[rel_n
] = sym
;
133 rel_flg
[rel_n
] = flg
;
134 rel_off
[rel_n
] = off
;
138 static int putdiv
= 0; /* output div/mod functions */
139 static int func_call
; /* */
141 static void i_call(long sym
, long off
);
143 static void i_div(char *func
)
147 i_call(out_sym(func
), 0);
151 static long *num_off
; /* data immediate value */
152 static long *num_sym
; /* relocation data symbol name */
153 static int num_n
, num_sz
;
155 static int pool_find(long sym
, long off
)
158 for (i
= 0; i
< num_n
; i
++)
159 if (sym
== num_sym
[i
] && off
== num_off
[i
])
161 if (num_n
== num_sz
) {
162 num_sz
= MAX(128, num_sz
* 2);
163 num_off
= mextend(num_off
, num_n
, num_sz
, sizeof(*num_off
));
164 num_sym
= mextend(num_sym
, num_n
, num_sz
, sizeof(*num_sym
));
168 return (num_n
++) << 2;
171 static int pool_num(long num
)
173 return pool_find(-1, num
);
176 static int pool_reloc(long sym
, long off
)
178 return pool_find(sym
, off
);
181 static void pool_write(void)
184 for (i
= 0; i
< num_n
; i
++) {
186 rel_add(num_sym
[i
], OUT_CS
, opos());
193 * +---------------------------------------+
194 * |COND|00|I| op |S| Rn | Rd | operand2 |
195 * +---------------------------------------+
197 * S: set condition code
199 * Rd: destination operand
201 * I=0 operand2=| shift | Rm |
202 * I=1 operand2=|rota| imm |
204 #define ADD(op, rd, rn, s, i, cond) \
205 (((cond) << 28) | ((i) << 25) | ((s) << 20) | \
206 ((op) << 21) | ((rn) << 16) | ((rd) << 12))
208 static int add_encimm(unsigned n
)
211 while (i
< 12 && (n
>> ((4 + i
) << 1)))
213 return (n
>> (i
<< 1)) | (((16 - i
) & 0x0f) << 8);
216 static unsigned add_decimm(int n
)
218 int rot
= (16 - ((n
>> 8) & 0x0f)) & 0x0f;
219 return (n
& 0xff) << (rot
<< 1);
222 static int add_rndimm(unsigned n
)
224 int rot
= (n
>> 8) & 0x0f;
230 rot
= (rot
+ 12) & 0x0f;
232 return ((num
+ 1) & 0xff) | (rot
<< 8);
235 static int opcode_add(int op
)
237 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
238 static int rx
[] = {I_ADD
, I_SUB
, I_AND
, I_ORR
, I_EOR
};
239 return rx
[op
& 0x0f];
242 static void i_add(int op
, int rd
, int rn
, int rm
)
244 oi4(ADD(opcode_add(op
), rd
, rn
, 0, 0, 14) | rm
);
247 static void i_add_imm(int op
, int rd
, int rn
, long n
)
249 oi4(ADD(opcode_add(op
), rd
, rn
, 0, 1, 14) | add_encimm(n
));
252 static void i_ldr(int l
, int rd
, int rn
, int off
, int bt
);
254 static void i_num(int rd
, long n
)
256 int enc
= add_encimm(n
);
257 if (n
== add_decimm(enc
)) {
258 oi4(ADD(I_MOV
, rd
, 0, 0, 1, 14) | enc
);
261 enc
= add_encimm(-n
- 1);
262 if (~n
== add_decimm(enc
)) {
263 oi4(ADD(I_MVN
, rd
, 0, 0, 1, 14) | enc
);
266 i_ldr(1, rd
, REG_DP
, pool_num(n
), LONGSZ
);
269 static void i_add_anyimm(int rd
, int rn
, long n
)
272 int imm
= add_encimm(neg
? -n
: n
);
273 if (imm
== add_decimm(neg
? -n
: n
)) {
274 oi4(ADD(neg
? I_SUB
: I_ADD
, rd
, rn
, 0, 1, 14) | imm
);
277 i_add(O_ADD
, rd
, rd
, rn
);
283 * +----------------------------------------+
284 * |COND|000000|A|S| Rd | Rn | Rs |1001| Rm |
285 * +----------------------------------------+
289 * C: set condition codes
291 * I=0 operand2=| shift | Rm |
292 * I=1 operand2=|rota| imm |
294 #define MUL(rd, rn, rs) \
295 ((14 << 28) | ((rd) << 16) | ((0) << 12) | ((rn) << 8) | ((9) << 4) | (rm))
297 static void i_mul(int rd
, int rn
, int rm
)
299 oi4(MUL(rd
, rn
, rm
));
302 static int opcode_set(long op
)
304 /* lt, ge, eq, ne, le, gt */
305 static int ucond
[] = {3, 2, 0, 1, 9, 8};
306 static int scond
[] = {11, 10, 0, 1, 13, 12};
308 return bt
& T_MSIGN
? scond
[op
& 0x0f] : ucond
[op
& 0x0f];
311 static void i_tst(int rn
, int rm
)
313 oi4(ADD(I_TST
, 0, rn
, 1, 0, 14) | rm
);
316 static void i_cmp(int rn
, int rm
)
318 oi4(ADD(I_CMP
, 0, rn
, 1, 0, 14) | rm
);
321 static void i_cmp_imm(int rn
, long n
)
323 oi4(ADD(I_CMP
, 0, rn
, 1, 1, 14) | add_encimm(n
));
326 static void i_set(int cond
, int rd
)
328 oi4(ADD(I_MOV
, rd
, 0, 0, 1, 14));
329 oi4(ADD(I_MOV
, rd
, 0, 0, 1, opcode_set(cond
)) | 1);
336 static int opcode_shl(long op
)
339 return O_T(op
) & T_MSIGN
? SM_ASR
: SM_LSR
;
343 static void i_shl(long op
, int rd
, int rm
, int rs
)
345 int sm
= opcode_shl(op
);
346 oi4(ADD(I_MOV
, rd
, 0, 0, 0, 14) | (rs
<< 8) | (sm
<< 5) | (1 << 4) | rm
);
349 static void i_shl_imm(long op
, int rd
, int rn
, long n
)
351 int sm
= opcode_shl(op
);
352 oi4(ADD(I_MOV
, rd
, 0, 0, 0, 14) | (n
<< 7) | (sm
<< 5) | rn
);
355 void i_mov(int rd
, int rn
)
357 oi4(ADD(I_MOV
, rd
, 0, 0, 0, 14) | rn
);
361 * single data transfer:
362 * +------------------------------------------+
363 * |COND|01|I|P|U|B|W|L| Rn | Rd | offset |
364 * +------------------------------------------+
366 * I: immediate/offset
367 * P: post/pre indexing
373 * Rd: source/destination register
375 * I=0 offset=| immediate |
376 * I=1 offset=| shift | Rm |
378 * halfword and signed data transfer
379 * +----------------------------------------------+
380 * |COND|000|P|U|0|W|L| Rn | Rd |0000|1|S|H|1| Rm |
381 * +----------------------------------------------+
383 * +----------------------------------------------+
384 * |COND|000|P|U|1|W|L| Rn | Rd |off1|1|S|H|1|off2|
385 * +----------------------------------------------+
390 #define LDR(l, rd, rn, b, u, p, w) \
391 ((14 << 28) | (1 << 26) | ((p) << 24) | ((b) << 22) | ((u) << 23) | \
392 ((w) << 21) | ((l) << 20) | ((rn) << 16) | ((rd) << 12))
393 #define LDRH(l, rd, rn, s, h, u, i) \
394 ((14 << 28) | (1 << 24) | ((u) << 23) | ((i) << 22) | ((l) << 20) | \
395 ((rn) << 16) | ((rd) << 12) | ((s) << 6) | ((h) << 5) | (9 << 4))
397 static void i_ldr(int l
, int rd
, int rn
, int off
, int bt
)
399 int b
= T_SZ(bt
) == 1;
400 int h
= T_SZ(bt
) == 2;
401 int s
= l
&& (bt
& T_MSIGN
);
402 int half
= h
|| (b
&& s
);
403 int maximm
= half
? 0x100 : 0x1000;
407 while (off
>= maximm
) {
408 int imm
= add_encimm(off
);
409 oi4(ADD(neg
? I_SUB
: I_ADD
, REG_TMP
, rn
, 0, 1, 14) | imm
);
411 off
-= add_decimm(imm
);
414 oi4(LDR(l
, rd
, rn
, b
, !neg
, 1, 0) | off
);
416 oi4(LDRH(l
, rd
, rn
, s
, h
, !neg
, 1) |
417 ((off
& 0xf0) << 4) | (off
& 0x0f));
420 static void i_sym(int rd
, long sym
, long off
)
422 int doff
= pool_reloc(sym
, off
);
423 i_ldr(1, rd
, REG_DP
, doff
, LONGSZ
);
426 static void i_neg(int rd
, int r1
)
428 oi4(ADD(I_RSB
, rd
, r1
, 0, 1, 14));
431 static void i_not(int rd
, int r1
)
433 oi4(ADD(I_MVN
, rd
, 0, 0, 0, 14) | r1
);
436 static void i_lnot(int rd
, int r1
)
442 /* rd = rd & ((1 << bits) - 1) */
443 static void i_zx(int rd
, int r1
, int bits
)
446 oi4(ADD(I_AND
, rd
, r1
, 0, 1, 14) | add_encimm((1 << bits
) - 1));
448 i_shl_imm(O_SHL
, rd
, r1
, 32 - bits
);
449 i_shl_imm(O_SHR
, rd
, rd
, 32 - bits
);
453 static void i_sx(int rd
, int r1
, int bits
)
455 i_shl_imm(O_SHL
, rd
, r1
, 32 - bits
);
456 i_shl_imm(O_MK(O_SHR
, SLNG
), rd
, rd
, 32 - bits
);
461 * +-----------------------------------+
462 * |COND|101|L| offset |
463 * +-----------------------------------+
467 #define BL(cond, l, o) (((cond) << 28) | (5 << 25) | ((l) << 24) | \
468 ((((o) - 8) >> 2) & 0x00ffffff))
469 static long i_jmp(long op
, long rn
, long rm
)
472 if (O_C(op
) == O_JMP
) {
477 if (O_C(op
) & O_JZ
) {
480 oi4(BL(O_C(op
) == O_JZ
? 0 : 1, 0, 0));
483 if (O_C(op
) & O_JCC
) {
489 oi4(BL(opcode_set(op
), 0, 0));
495 static void i_memcpy(int rd
, int rs
, int rn
)
497 oi4(ADD(I_SUB
, rn
, rn
, 1, 1, 14) | 1);
499 oi4(LDR(1, REG_TMP
, rs
, 1, 1, 0, 0) | 1);
500 oi4(LDR(0, REG_TMP
, rd
, 1, 1, 0, 0) | 1);
504 static void i_memset(int rd
, int rs
, int rn
)
506 oi4(ADD(I_SUB
, rn
, rn
, 1, 1, 14) | 1);
508 oi4(LDR(0, rs
, rd
, 1, 1, 0, 0) | 1);
512 static void i_call_reg(int rd
)
514 i_mov(REG_LR
, REG_PC
);
518 static void i_call(long sym
, long off
)
520 rel_add(sym
, OUT_CS
| OUT_RLREL
| OUT_RL24
, opos());
524 int i_imm(long lim
, long n
)
526 return add_decimm(add_encimm(n
)) == n
;
529 long i_reg(long op
, long *rd
, long *r1
, long *r2
, long *tmp
)
538 *r1
= oc
& (O_NUM
| O_SYM
) ? LONGSZ
* 8 : R_TMPS
;
541 if (oc
& O_MUL
&& oc
& (O_NUM
| O_SYM
))
543 if (oc
== O_DIV
|| oc
== O_MOD
) {
545 *r1
= 1 << argregs
[0];
546 *r2
= 1 << argregs
[1];
547 *tmp
= R_TMPS
& ~R_PERM
;
553 *r2
= op
& O_NUM
? 0 : R_TMPS
;
558 *r1
= op
& O_NUM
? 0 : R_TMPS
;
561 if (oc
== O_MSET
|| oc
== O_MCPY
) {
568 *rd
= (1 << REG_RET
);
572 *rd
= (1 << REG_RET
);
573 *r1
= oc
& O_SYM
? 0 : R_TMPS
;
576 if (oc
& (O_LD
| O_ST
)) {
579 *r2
= oc
& O_NUM
? 0 : R_TMPS
;
588 *r1
= oc
& O_NUM
? 0 : R_TMPS
;
596 long i_ins(long op
, long r0
, long r1
, long r2
)
603 i_add_imm(op
, r0
, r1
, r2
);
605 i_add_anyimm(r0
, r1
, r2
);
607 i_add(op
, r0
, r1
, r2
);
612 i_shl_imm(op
, r0
, r1
, r2
);
614 i_shl(op
, r0
, r1
, r2
);
620 i_div(O_T(op
) & T_MSIGN
? "__divdi3" : "__udivdi3");
622 i_div(O_T(op
) & T_MSIGN
? "__moddi3" : "__umoddi3");
647 if (oc
== (O_CALL
| O_SYM
)) {
652 if (oc
== (O_MOV
| O_SYM
)) {
656 if (oc
== (O_MOV
| O_NUM
)) {
661 i_memset(r0
, r1
, r2
);
665 i_memcpy(r0
, r1
, r2
);
670 jmp_add(i_jmp(O_JMP
, 0, 0), 0);
673 if (oc
== (O_LD
| O_NUM
)) {
674 i_ldr(1, r0
, r1
, r2
, bt
);
677 if (oc
== (O_ST
| O_NUM
)) {
678 i_ldr(0, r0
, r1
, r2
, bt
);
682 if (T_SZ(bt
) == LONGSZ
)
686 i_sx(r0
, r1
, T_SZ(bt
) * 8);
688 i_zx(r0
, r1
, T_SZ(bt
) * 8);
693 jmp_add(i_jmp(op
, r0
, r1
), r2
+ 1);
699 void i_wrap(int argc
, long sargs
, long spsub
, int initfp
, long sregs
, long sregs_pos
)
703 long diff
; /* prologue length */
705 int nsargs
= 0; /* number of saved arguments */
706 int initdp
= num_n
> 0; /* initialize data pointer */
707 long pregs
= 1; /* registers saved in function prologue */
711 if (!initfp
&& !spsub
&& !initdp
&& !sargs
&& argc
< N_ARGS
)
713 initfp
= initfp
|| pregs
;
714 /* removing the last jmp to the epilogue */
715 if (jmp_ret
+ 4 == opos()) {
716 mem_cut(&cs
, jmp_ret
);
719 lab_add(0); /* the return label */
720 body_n
= mem_len(&cs
);
722 /* generating function prologue */
723 for (i
= 0; i
< N_ARGS
; i
++)
724 if ((1 << argregs
[i
]) & sargs
)
726 if (nsargs
& 0x1) { /* keeping stack 8-aligned */
727 for (i
= 0; i
< N_ARGS
; i
++)
728 if (!((1 << argregs
[i
]) & sargs
))
730 sargs
|= 1 << argregs
[i
];
733 oi4(0xe92d0000 | sargs
); /* stmfd sp!, {r0-r3} */
735 oi4(0xe1a0c00d); /* mov r12, sp */
736 oi4(0xe92d5c00); /* stmfd sp!, {sl, fp, ip, lr} */
739 oi4(0xe1a0b00d); /* mov fp, sp */
740 if (sregs
) { /* sregs_pos should be encoded as immediate */
741 int npos
= add_decimm(add_rndimm(add_encimm(-sregs_pos
)));
742 spsub
+= npos
+ sregs_pos
;
745 if (spsub
) { /* sub sp, sp, xx */
746 spsub
= ALIGN(spsub
, 8);
747 spsub
= add_decimm(add_rndimm(add_encimm(spsub
)));
748 oi4(0xe24dd000 | add_encimm(spsub
));
752 oi4(0xe28fa000); /* add dp, pc, xx */
754 if (sregs
) { /* saving registers */
755 oi4(0xe24bc000 | add_encimm(-sregs_pos
));
756 oi4(0xe88c0000 | sregs
); /* stmea ip, {r4-r9} */
759 mem_put(&cs
, body
, body_n
);
761 /* generating function epilogue */
762 if (sregs
) { /* restoring saved registers */
763 oi4(0xe24bc000 | add_encimm(-sregs_pos
));
764 oi4(0xe89c0000 | sregs
); /* ldmfd ip, {r4-r9} */
767 oi4(0xe89bac00); /* ldmfd fp, {sl, fp, sp, pc} */
769 oi4(0xe1a0f00e); /* mov pc, lr */
771 /* adjusting code offsets */
772 for (i
= 0; i
< rel_n
; i
++)
774 for (i
= 0; i
< jmp_n
; i
++)
776 for (i
= 0; i
< lab_sz
; i
++)
778 /* writing the data pool */
780 int dpoff
= opos() - dpadd
- 8;
781 dpoff
= add_decimm(add_rndimm(add_encimm(dpoff
)));
782 mem_putz(&cs
, dpadd
+ dpoff
+ 8 - opos());
783 /* fill data ptr addition: dp = pc + xx */
784 oi_at(dpadd
, 0xe28fa000 | add_encimm(dpoff
), 4);
789 static void i_fill(long src
, long dst
)
791 long *d
= mem_buf(&cs
) + src
;
792 long c
= (*d
& 0xff000000) | (((dst
- src
- 8) >> 2) & 0x00ffffff);
796 void i_code(char **c
, long *c_len
, long **rsym
, long **rflg
, long **roff
, long *rcnt
)
799 for (i
= 0; i
< jmp_n
; i
++) /* filling jmp destinations */
800 i_fill(jmp_off
[i
], lab_loc
[jmp_dst
[i
]]);
801 *c_len
= mem_len(&cs
);
820 o_code("__udivdi3", (void *) udivdi3
, sizeof(udivdi3
));
821 o_code("__umoddi3", (void *) umoddi3
, sizeof(umoddi3
));
822 o_code("__divdi3", (void *) divdi3
, sizeof(divdi3
));
823 o_code("__moddi3", (void *) moddi3
, sizeof(moddi3
));