PPC: Fuse BSWAP with XLOAD/XSTORE to lwbrx/stwbrx.
[luajit-2.0.git] / src / lj_emit_arm.h
blobea90852000bfa8fe12f11082a3199a378f87a81d
1 /*
2 ** ARM instruction emitter.
3 ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
4 */
6 /* -- Constant encoding --------------------------------------------------- */
8 static uint8_t emit_invai[16] = {
9 /* AND */ (ARMI_AND^ARMI_BIC) >> 21,
10 /* EOR */ 0,
11 /* SUB */ (ARMI_SUB^ARMI_ADD) >> 21,
12 /* RSB */ 0,
13 /* ADD */ (ARMI_ADD^ARMI_SUB) >> 21,
14 /* ADC */ (ARMI_ADC^ARMI_SBC) >> 21,
15 /* SBC */ (ARMI_SBC^ARMI_ADC) >> 21,
16 /* RSC */ 0,
17 /* TST */ 0,
18 /* TEQ */ 0,
19 /* CMP */ (ARMI_CMP^ARMI_CMN) >> 21,
20 /* CMN */ (ARMI_CMN^ARMI_CMP) >> 21,
21 /* ORR */ 0,
22 /* MOV */ (ARMI_MOV^ARMI_MVN) >> 21,
23 /* BIC */ (ARMI_BIC^ARMI_AND) >> 21,
24 /* MVN */ (ARMI_MVN^ARMI_MOV) >> 21
27 /* Encode constant in K12 format for data processing instructions. */
28 static uint32_t emit_isk12(ARMIns ai, int32_t n)
30 uint32_t invai, i, m = (uint32_t)n;
31 /* K12: unsigned 8 bit value, rotated in steps of two bits. */
32 for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2))
33 if (m <= 255) return ARMI_K12|m|i;
34 /* Otherwise try negation/complement with the inverse instruction. */
35 invai = emit_invai[((ai >> 21) & 15)];
36 if (!invai) return 0; /* Failed. No inverse instruction. */
37 m = ~(uint32_t)n;
38 if (invai == ((ARMI_SUB^ARMI_ADD) >> 21) ||
39 invai == (ARMI_CMP^ARMI_CMN) >> 21) m++;
40 for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2))
41 if (m <= 255) return ARMI_K12|(invai<<21)|m|i;
42 return 0; /* Failed. */
45 /* -- Emit basic instructions --------------------------------------------- */
47 static void emit_dnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm)
49 *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn) | ARMF_M(rm);
52 static void emit_dm(ASMState *as, ARMIns ai, Reg rd, Reg rm)
54 *--as->mcp = ai | ARMF_D(rd) | ARMF_M(rm);
57 static void emit_dn(ASMState *as, ARMIns ai, Reg rd, Reg rn)
59 *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn);
62 static void emit_nm(ASMState *as, ARMIns ai, Reg rn, Reg rm)
64 *--as->mcp = ai | ARMF_N(rn) | ARMF_M(rm);
67 static void emit_d(ASMState *as, ARMIns ai, Reg rd)
69 *--as->mcp = ai | ARMF_D(rd);
72 static void emit_n(ASMState *as, ARMIns ai, Reg rn)
74 *--as->mcp = ai | ARMF_N(rn);
77 static void emit_m(ASMState *as, ARMIns ai, Reg rm)
79 *--as->mcp = ai | ARMF_M(rm);
82 static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
84 lua_assert(ofs >= -255 && ofs <= 255);
85 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
86 *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) |
87 ((ofs & 0xf0) << 4) | (ofs & 0x0f);
90 static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
92 lua_assert(ofs >= -4095 && ofs <= 4095);
93 /* Combine LDR/STR pairs to LDRD/STRD. */
94 if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) &&
95 (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn &&
96 (uint32_t)ofs <= 252 && !(ofs & 3) && !((rd ^ (ofs >>2)) & 1) &&
97 as->mcp != as->mcloop) {
98 as->mcp++;
99 emit_lsox(as, ai == ARMI_LDR ? ARMI_LDRD : ARMI_STRD, rd&~1, rn, ofs&~4);
100 return;
102 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
103 *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd) | ARMF_N(rn) | ofs;
106 /* -- Emit loads/stores --------------------------------------------------- */
108 /* Prefer spills of BASE/L. */
109 #define emit_canremat(ref) ((ref) < ASMREF_L)
111 /* Try to find a one step delta relative to another constant. */
112 static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
114 RegSet work = ~as->freeset & RSET_GPR;
115 while (work) {
116 Reg r = rset_picktop(work);
117 IRRef ref = regcost_ref(as->cost[r]);
118 lua_assert(r != d);
119 if (emit_canremat(ref)) {
120 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
121 uint32_t k = emit_isk12(ARMI_ADD, delta);
122 if (k) {
123 if (k == ARMI_K12)
124 emit_dm(as, ARMI_MOV, d, r);
125 else
126 emit_dn(as, ARMI_ADD^k, d, r);
127 return 1;
130 rset_clear(work, r);
132 return 0; /* Failed. */
135 /* Try to find a two step delta relative to another constant. */
136 static int emit_kdelta2(ASMState *as, Reg d, int32_t i)
138 RegSet work = ~as->freeset & RSET_GPR;
139 while (work) {
140 Reg r = rset_picktop(work);
141 IRRef ref = regcost_ref(as->cost[r]);
142 lua_assert(r != d);
143 if (emit_canremat(ref)) {
144 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
145 uint32_t sh, inv = 0, k2, k;
146 if (delta < 0) { delta = -delta; inv = ARMI_ADD^ARMI_SUB; }
147 sh = lj_ffs(delta) & ~1;
148 k2 = emit_isk12(0, delta & (255 << sh));
149 k = emit_isk12(0, delta & ~(255 << sh));
150 if (k) {
151 emit_dn(as, ARMI_ADD^k2^inv, d, d);
152 emit_dn(as, ARMI_ADD^k^inv, d, r);
153 return 1;
156 rset_clear(work, r);
158 return 0; /* Failed. */
161 /* Load a 32 bit constant into a GPR. */
162 static void emit_loadi(ASMState *as, Reg r, int32_t i)
164 uint32_t k = emit_isk12(ARMI_MOV, i);
165 lua_assert(rset_test(as->freeset, r) || r == RID_TMP);
166 if (k) {
167 /* Standard K12 constant. */
168 emit_d(as, ARMI_MOV^k, r);
169 } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
170 /* 16 bit loword constant for ARMv6T2. */
171 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r);
172 } else if (emit_kdelta1(as, r, i)) {
173 /* One step delta relative to another constant. */
174 } else if ((as->flags & JIT_F_ARMV6T2)) {
175 /* 32 bit hiword/loword constant for ARMv6T2. */
176 emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r);
177 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r);
178 } else if (emit_kdelta2(as, r, i)) {
179 /* Two step delta relative to another constant. */
180 } else {
181 /* Otherwise construct the constant with up to 4 instructions. */
182 /* NYI: use mvn+bic, use pc-relative loads. */
183 for (;;) {
184 uint32_t sh = lj_ffs(i) & ~1;
185 int32_t m = i & (255 << sh);
186 i &= ~(255 << sh);
187 if (i == 0) {
188 emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r);
189 break;
191 emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r);
196 #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
198 static Reg ra_allock(ASMState *as, int32_t k, RegSet allow);
200 /* Get/set from constant pointer. */
201 static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
203 int32_t i = i32ptr(p);
204 emit_lso(as, ai, r, ra_allock(as, (i & ~4095), rset_exclude(RSET_GPR, r)),
205 (i & 4095));
208 /* Get/set global_State fields. */
209 #define emit_getgl(as, r, field) \
210 emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field)
211 #define emit_setgl(as, r, field) \
212 emit_lsptr(as, ARMI_STR, (r), (void *)&J2G(as->J)->field)
214 /* Trace number is determined from pc of exit instruction. */
215 #define emit_setvmstate(as, i) UNUSED(i)
217 /* -- Emit control-flow instructions -------------------------------------- */
219 /* Label for internal jumps. */
220 typedef MCode *MCLabel;
222 /* Return label pointing to current PC. */
223 #define emit_label(as) ((as)->mcp)
225 static void emit_branch(ASMState *as, ARMIns ai, MCode *target)
227 MCode *p = as->mcp;
228 ptrdiff_t delta = (target - p) - 1;
229 lua_assert(((delta + 0x00800000) >> 24) == 0);
230 *--p = ai | ((uint32_t)delta & 0x00ffffffu);
231 as->mcp = p;
234 static void emit_call(ASMState *as, void *target)
236 MCode *p = --as->mcp;
237 ptrdiff_t delta = ((char *)target - (char *)p) - 8;
238 if ((((delta>>2) + 0x00800000) >> 24) == 0) {
239 if ((delta & 1))
240 *p = ARMI_BLX | ((uint32_t)(delta>>2) & 0x00ffffffu) | ((delta&2) << 27);
241 else
242 *p = ARMI_BL | ((uint32_t)(delta>>2) & 0x00ffffffu);
243 } else { /* Target out of range: need indirect call. But don't use R0-R3. */
244 Reg r = ra_allock(as, i32ptr(target), RSET_RANGE(RID_R4, RID_R12+1));
245 *p = ARMI_BLXr | ARMF_M(r);
249 /* -- Emit generic operations --------------------------------------------- */
251 /* Generic move between two regs. */
252 static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
254 lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
255 if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */
256 MCode ins = *as->mcp, swp = (src^dst);
257 if ((ins & 0x0c000000) == 0x04000000 && (ins & 0x02000010) != 0x02000010) {
258 if (!((ins ^ (dst << 16)) & 0x000f0000))
259 *as->mcp = ins ^ (swp << 16); /* Swap N in load/store. */
260 if (!(ins & 0x00100000) && !((ins ^ (dst << 12)) & 0x0000f000))
261 *as->mcp = ins ^ (swp << 12); /* Swap D in store. */
264 emit_dm(as, ARMI_MOV, dst, src);
267 /* Generic load of register from stack slot. */
268 static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
270 lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
271 emit_lso(as, ARMI_LDR, r, RID_SP, ofs);
274 /* Generic store of register to stack slot. */
275 static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
277 lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
278 emit_lso(as, ARMI_STR, r, RID_SP, ofs);
281 /* Emit an arithmetic/logic operation with a constant operand. */
282 static void emit_opk(ASMState *as, ARMIns ai, Reg dest, Reg src,
283 int32_t i, RegSet allow)
285 uint32_t k = emit_isk12(ai, i);
286 if (k)
287 emit_dn(as, ai^k, dest, src);
288 else
289 emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
292 /* Add offset to pointer. */
293 static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
295 if (ofs)
296 emit_opk(as, ARMI_ADD, r, r, ofs, rset_exclude(RSET_GPR, r));
299 #define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))