From 7ad4908a4880523c5822d006055dc5d6db4556ea Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 5 Jun 2011 18:59:35 +0200 Subject: [PATCH] ARM: Fuse XLOAD/XSTORE operands. --- src/lj_asm_arm.h | 190 ++++++++++++++++++++++++++++++++++++++++--------------- src/lj_crecord.c | 7 ++ 2 files changed, 145 insertions(+), 52 deletions(-) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 74a3a927..31b300bf 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -261,6 +261,83 @@ static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow) return ra_allocref(as, ref, allow); } +/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */ +static IRRef asm_fuselsl2(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL && + irref_isk(ir->op2) && IR(ir->op2)->i == 2) + return ir->op1; + return 0; /* No fusion. */ +} + +/* Fuse XLOAD/XSTORE reference into load/store operand. */ +static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, + RegSet allow) +{ + IRIns *ir = IR(ref); + int32_t ofs = 0; + Reg base; + if (ra_noreg(ir->r) && mayfuse(as, ref)) { + int32_t lim = (ai & 0x04000000) ? 4096 : 256; + if (ir->o == IR_ADD) { + if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i) > -lim && ofs < lim) { + ref = ir->op1; + } else { + IRRef lref = ir->op1, rref = ir->op2; + Reg rn, rm; + if ((ai & 0x04000000)) { + IRRef sref = asm_fuselsl2(as, rref); + if (sref) { + rref = sref; + ai |= ARMF_SH(ARMSH_LSL, 2); + } else if ((sref = asm_fuselsl2(as, lref)) != 0) { + lref = rref; + rref = sref; + ai |= ARMF_SH(ARMSH_LSL, 2); + } + } + rn = ra_alloc1(as, lref, allow); + rm = ra_alloc1(as, rref, rset_exclude(allow, rn)); + if ((ai & 0x04000000)) ai |= ARMI_LS_R; + emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); + return; + } + } else if (ir->o == IR_STRREF) { + ofs = (int32_t)sizeof(GCstr); + if (irref_isk(ir->op2)) { + ofs += IR(ir->op2)->i; + ref = ir->op1; + } else if (irref_isk(ir->op1)) { + ofs += IR(ir->op1)->i; + ref = ir->op2; + } else { + /* NYI: Fuse ADD with constant. */ + Reg rn = ra_alloc1(as, ir->op1, allow); + uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); + if ((ai & 0x04000000)) + emit_lso(as, ai, rd, rd, ofs); + else + emit_lsox(as, ai, rd, rd, ofs); + emit_dn(as, ARMI_ADD^m, rd, rn); + return; + } + if (ofs <= -lim || ofs >= lim) { + Reg rn = ra_alloc1(as, ref, allow); + Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); + if ((ai & 0x04000000)) ai |= ARMI_LS_R; + emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); + return; + } + } + } + base = ra_alloc1(as, ref, allow); + if ((ai & 0x04000000)) + emit_lso(as, ai, rd, base, ofs); + else + emit_lsox(as, ai, rd, base, ofs); +} + /* -- Calls --------------------------------------------------------------- */ /* Generate a call to a C function. */ @@ -749,68 +826,75 @@ static void asm_strref(ASMState *as, IRIns *ir) /* -- Loads and stores ---------------------------------------------------- */ -static void asm_fxload(ASMState *as, IRIns *ir) +static ARMIns asm_fxloadins(IRIns *ir) { - Reg idx, dest = ra_dest(as, ir, RSET_GPR); - int32_t ofs; - ARMIns ai; - if (ir->o == IR_FLOAD) { - idx = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); - return; - } - } - ofs = field_ofs[ir->op2]; - } else { - /* NYI: Fuse xload operands. */ - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - idx = ra_alloc1(as, ir->op1, RSET_GPR); - ofs = 0; - } switch (irt_type(ir->t)) { - case IRT_I8: ai = ARMI_LDRSB; break; - case IRT_U8: ai = ARMI_LDRB; goto use_lso; - case IRT_I16: ai = ARMI_LDRSH; break; - case IRT_U16: ai = ARMI_LDRH; break; + case IRT_I8: return ARMI_LDRSB; + case IRT_U8: return ARMI_LDRB; + case IRT_I16: return ARMI_LDRSH; + case IRT_U16: return ARMI_LDRH; case IRT_NUM: lua_assert(0); case IRT_FLOAT: - default: ai = ARMI_LDR; - use_lso: - emit_lso(as, ai, dest, idx, ofs); - return; + default: return ARMI_LDR; } - emit_lsox(as, ai, dest, idx, ofs); } -static void asm_fxstore(ASMState *as, IRIns *ir) +static ARMIns asm_fxstoreins(IRIns *ir) { - Reg idx, src = ra_alloc1(as, ir->op2, RSET_GPR); - RegSet allow = rset_exclude(RSET_GPR, src); - int32_t ofs; - ARMIns ai; - if (ir->o == IR_FSTORE) { - IRIns *irf = IR(ir->op1); - idx = ra_alloc1(as, irf->op1, allow); - ofs = field_ofs[irf->op2]; - } else { - /* NYI: Fuse xstore operands. */ - idx = ra_alloc1(as, ir->op1, allow); - ofs = 0; - } switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: ai = ARMI_STRB; goto use_lso; - case IRT_I16: case IRT_U16: ai = ARMI_STRH; break; + case IRT_I8: case IRT_U8: return ARMI_STRB; + case IRT_I16: case IRT_U16: return ARMI_STRH; case IRT_NUM: lua_assert(0); case IRT_FLOAT: - default: ai = ARMI_STR; - use_lso: - emit_lso(as, ai, src, idx, ofs); - return; + default: return ARMI_STR; + } +} + +static void asm_fload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); + ARMIns ai = asm_fxloadins(ir); + int32_t ofs; + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); + return; + } } - emit_lsox(as, ai, src, idx, ofs); + ofs = field_ofs[ir->op2]; + if ((ai & 0x04000000)) + emit_lso(as, ai, dest, idx, ofs); + else + emit_lsox(as, ai, dest, idx, ofs); +} + +static void asm_fstore(ASMState *as, IRIns *ir) +{ + Reg src = ra_alloc1(as, ir->op2, RSET_GPR); + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; + ARMIns ai = asm_fxstoreins(ir); + if ((ai & 0x04000000)) + emit_lso(as, ai, src, idx, ofs); + else + emit_lsox(as, ai, src, idx, ofs); +} + +static void asm_xload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); +} + +static void asm_xstore(ASMState *as, IRIns *ir) +{ + Reg src = ra_alloc1(as, ir->op2, RSET_GPR); + asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + rset_exclude(RSET_GPR, src)); } static void asm_ahuvload(ASMState *as, IRIns *ir) @@ -1687,11 +1771,13 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: asm_ahuvload(as, ir); break; - case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; + case IR_FLOAD: asm_fload(as, ir); break; + case IR_XLOAD: asm_xload(as, ir); break; case IR_SLOAD: asm_sload(as, ir); break; case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; - case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; + case IR_FSTORE: asm_fstore(as, ir); break; + case IR_XSTORE: asm_xstore(as, ir); break; /* Allocations. */ case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; diff --git a/src/lj_crecord.c b/src/lj_crecord.c index ed5e8272..8373faaa 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -526,6 +526,13 @@ again: idx = emitir(IRT(IR_BAND, IRT_INTP), idx, lj_ir_kintp(J, 1)); sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info))); idx = crec_reassoc_ofs(J, idx, &ofs, sz); +#if LJ_TARGET_ARM + /* Hoist base add to allow fusion of shifts into operands. */ + if (LJ_LIKELY(J->flags & JIT_F_OPT_LOOP) && ofs && (sz == 1 || sz == 4)) { + ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs)); + ofs = 0; + } +#endif idx = emitir(IRT(IR_MUL, IRT_INTP), idx, lj_ir_kintp(J, sz)); ptr = emitir(IRT(IR_ADD, IRT_PTR), idx, ptr); } -- 2.11.4.GIT