From 0d824b7d1417dbb89468d2b6093c2fad127fcc06 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 7 May 2012 23:16:19 +0200 Subject: [PATCH] Fix PHI stack slot syncing. --- src/lj_asm.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++- src/lj_emit_arm.h | 2 ++ src/lj_emit_mips.h | 6 +++++ src/lj_emit_ppc.h | 14 +++++++---- src/lj_emit_x86.h | 9 ++++++++ src/lj_target_arm.h | 2 ++ src/lj_target_mips.h | 2 ++ src/lj_target_x86.h | 2 ++ 8 files changed, 97 insertions(+), 5 deletions(-) diff --git a/src/lj_asm.c b/src/lj_asm.c index 1a78e32a..4da1a0a3 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1079,6 +1079,64 @@ static void asm_phi_shuffle(ASMState *as) } } +/* Copy unsynced left/right PHI spill slots. Rarely needed. */ +static void asm_phi_copyspill(ASMState *as) +{ + int need = 0; + IRIns *ir; + for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) + if (ra_hasspill(ir->s) && ra_hasspill(IR(ir->op1)->s)) + need |= irt_isfp(ir->t) ? 2 : 1; /* Unsynced spill slot? */ + if ((need & 1)) { /* Copy integer spill slots. */ +#if !LJ_TARGET_X86ORX64 + Reg r = RID_TMP; +#else + Reg r = RID_RET; + if ((as->freeset & RSET_GPR)) + r = rset_pickbot((as->freeset & RSET_GPR)); + else + emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); +#endif + for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) { + if (ra_hasspill(ir->s)) { + IRIns *irl = IR(ir->op1); + if (ra_hasspill(irl->s) && !irt_isfp(ir->t)) { + emit_spstore(as, irl, r, sps_scale(irl->s)); + emit_spload(as, ir, r, sps_scale(ir->s)); + } + } + } +#if LJ_TARGET_X86ORX64 + if (!rset_test(as->freeset, r)) + emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); +#endif + } +#if !LJ_SOFTFP + if ((need & 2)) { /* Copy FP spill slots. */ +#if LJ_TARGET_X86 + Reg r = RID_XMM0; +#else + Reg r = RID_FPRET; +#endif + if ((as->freeset & RSET_FPR)) + r = rset_pickbot((as->freeset & RSET_FPR)); + if (!rset_test(as->freeset, r)) + emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); + for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) { + if (ra_hasspill(ir->s)) { + IRIns *irl = IR(ir->op1); + if (ra_hasspill(irl->s) && irt_isfp(ir->t)) { + emit_spstore(as, irl, r, sps_scale(irl->s)); + emit_spload(as, ir, r, sps_scale(ir->s)); + } + } + } + if (!rset_test(as->freeset, r)) + emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); + } +#endif +} + /* Emit renames for left PHIs which are only spilled outside the loop. */ static void asm_phi_fixup(ASMState *as) { @@ -1132,7 +1190,7 @@ static void asm_phi(ASMState *as, IRIns *ir) if (ra_hasreg(irl->r) || ra_hasreg(irr->r)) lj_trace_err(as->J, LJ_TRERR_NYIPHI); ra_spill(as, ir); - irl->s = irr->s = ir->s; /* Sync left/right PHI spill slots. */ + irr->s = ir->s; /* Set right PHI spill slot. Sync left slot later. */ } } @@ -1142,6 +1200,7 @@ static void asm_loop_fixup(ASMState *as); /* Middle part of a loop. */ static void asm_loop(ASMState *as) { + MCode *mcspill; /* LOOP is a guard, so the snapno is up to date. */ as->loopsnapno = as->snapno; if (as->gcsteps) @@ -1151,10 +1210,14 @@ static void asm_loop(ASMState *as) as->sectref = 0; if (!neverfuse(as)) as->fuseref = 0; asm_phi_shuffle(as); + mcspill = as->mcp; + asm_phi_copyspill(as); asm_loop_fixup(as); as->mcloop = as->mcp; RA_DBGX((as, "===== LOOP =====")); if (!as->realign) RA_DBG_FLUSH(); + if (as->mcp != mcspill) + emit_jmp(as, mcspill); } /* -- Target-specific assembler ------------------------------------------- */ diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index 7654c19b..21ece88e 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h @@ -231,6 +231,8 @@ static void emit_branch(ASMState *as, ARMIns ai, MCode *target) as->mcp = p; } +#define emit_jmp(as, target) emit_branch(as, ARMI_B, (target)) + static void emit_call(ASMState *as, void *target) { MCode *p = --as->mcp; diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index 59f0640b..3edf8851 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h @@ -146,6 +146,12 @@ static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target) as->mcp = p; } +static void emit_jmp(ASMState *as, MCode *target) +{ + *--as->mcp = MIPSI_NOP; + emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); +} + static void emit_call(ASMState *as, void *target) { MCode *p = as->mcp; diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index bc361c5b..f2bf0a94 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h @@ -142,12 +142,18 @@ typedef MCode *MCLabel; static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target) { - MCode *p = as->mcp; - ptrdiff_t delta = ((char *)target - (char *)p) + 4; + MCode *p = --as->mcp; + ptrdiff_t delta = (char *)target - (char *)p; lua_assert(((delta + 0x8000) >> 16) == 0); pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); - *--p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); - as->mcp = p; + *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); +} + +static void emit_jmp(ASMState *as, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = (char *)target - (char *)p; + *p = PPCI_B | (delta & 0x03fffffcu); } static void emit_call(ASMState *as, void *target) diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 4f3a08a1..dfb70574 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -383,6 +383,15 @@ static void emit_jcc(ASMState *as, int cc, MCode *target) as->mcp = p - 6; } +/* jmp target */ +static void emit_jmp(ASMState *as, MCode *target) +{ + MCode *p = as->mcp; + *(int32_t *)(p-4) = jmprel(p, target); + p[-5] = XI_JMP; + as->mcp = p - 5; +} + /* call target */ static void emit_call_(ASMState *as, MCode *target) { diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index 96fc85e3..a24fc819 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h @@ -101,6 +101,8 @@ enum { #define SPS_FIXED 2 #define SPS_FIRST 2 +#define SPOFS_TMP 0 + #define sps_scale(slot) (4 * (int32_t)(slot)) #define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h index e81d55bd..1b7727d0 100644 --- a/src/lj_target_mips.h +++ b/src/lj_target_mips.h @@ -102,6 +102,8 @@ enum { #define SPS_FIXED 5 #define SPS_FIRST 4 +#define SPOFS_TMP 0 + #define sps_scale(slot) (4 * (int32_t)(slot)) #define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 3de408cc..cc15490b 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -139,6 +139,8 @@ enum { #define SPS_FIRST 2 #endif +#define SPOFS_TMP 0 + #define sps_scale(slot) (4 * (int32_t)(slot)) #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) -- 2.11.4.GIT