From 9608e801932b8c76f0c23e40b384a08c59b368f0 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Wed, 11 Mar 2020 15:05:02 +0100 Subject: [PATCH] Bug 418702 - ARMv8.1 Paired register compare-and-swap instructions are not supported. Implementation only; tests to follow. Patch by Assad Hashmi . --- VEX/priv/guest_arm64_toIR.c | 47 +++++++++++++++++++++++++ VEX/priv/host_arm64_defs.c | 86 +++++++++++++++++++++++++++++++++++++++++++++ VEX/priv/host_arm64_defs.h | 5 +++ VEX/priv/host_arm64_isel.c | 45 ++++++++++++++++++++++++ 4 files changed, 183 insertions(+) diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index c8bfd1888..fe80e593c 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -6907,6 +6907,53 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, return True; } + /* ---------------- ARMv8.1-LSE: Compare-and-Swap Pair --------------- */ + /* 31 30 29 22 21 20 15 14 9 4 + 0 sz 0010000 A 1 s R 11111 n t CASP{,A}{,L} , , [] + */ + if (INSN(31,31) == 0 + && INSN(29,23) == BITS7(0,0,1,0,0,0,0) + && INSN(21,21) == 1 + && INSN(14,10) == BITS5(1,1,1,1,1)) { + UInt is64 = INSN(30,30); + Bool isAcq = INSN(22,22) == 1; + Bool isRel = INSN(15,15) == 1; + UInt ss = INSN(20,16); + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + + if ((ss & 0x1) || (tt & 0x1)) { + /* undefined; fall through */ + } else { + IRExpr *expLo = getIRegOrZR(is64, ss); + IRExpr *expHi = getIRegOrZR(is64, ss + 1); + IRExpr *newLo = getIRegOrZR(is64, tt); + IRExpr *newHi = getIRegOrZR(is64, tt + 1); + IRTemp oldLo = newTemp(is64 ? Ity_I64 : Ity_I32); + IRTemp oldHi = newTemp(is64 ? Ity_I64 : Ity_I32); + + if (isAcq) + stmt(IRStmt_MBE(Imbe_Fence)); + + stmt( IRStmt_CAS(mkIRCAS(oldHi, oldLo, + Iend_LE, getIReg64orSP(nn), + expHi, expLo, + newHi, newLo)) ); + + if (isRel) + stmt(IRStmt_MBE(Imbe_Fence)); + + putIRegOrZR(is64, ss, mkexpr(oldLo)); + putIRegOrZR(is64, ss+1, mkexpr(oldHi)); + DIP("casp%s%s %s, %s, %s, %s, [%s]\n", + isAcq ? "a" : "", isRel ? "l" : "", + nameIRegOrZR(is64, ss), nameIRegOrZR(is64, ss+1), + nameIRegOrZR(is64, tt), nameIRegOrZR(is64, tt+1), + nameIReg64orSP(nn)); + return True; + } + } + if (sigill_diag) { vex_printf("ARM64 front end: load_store\n"); } diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 13a61b0bd..e4ef56986 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -1020,6 +1020,13 @@ ARM64Instr* ARM64Instr_CAS ( Int szB ) { vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1); return i; } +ARM64Instr* ARM64Instr_CASP ( Int szB ) { + ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); + i->tag = ARM64in_CASP; + i->ARM64in.CASP.szB = szB; + vassert(szB == 8 || szB == 4); + return i; +} ARM64Instr* ARM64Instr_MFence ( void ) { ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); i->tag = ARM64in_MFence; @@ -1593,6 +1600,10 @@ void ppARM64Instr ( const ARM64Instr* i ) { vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB); return; } + case ARM64in_CASP: { + vex_printf("x0,x1 = casp(%dbit)(x2, x4,x5 -> x6,x7)", 8 * i->ARM64in.CASP.szB); + return; + } case ARM64in_MFence: vex_printf("(mfence) dsb sy; dmb sy; isb"); return; @@ -2102,6 +2113,17 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) /* Pointless to state this since X8 is not available to RA. */ addHRegUse(u, HRmWrite, hregARM64_X8()); break; + case ARM64in_CASP: + addHRegUse(u, HRmRead, hregARM64_X2()); + addHRegUse(u, HRmRead, hregARM64_X4()); + addHRegUse(u, HRmRead, hregARM64_X5()); + addHRegUse(u, HRmRead, hregARM64_X6()); + addHRegUse(u, HRmRead, hregARM64_X7()); + addHRegUse(u, HRmWrite, hregARM64_X0()); + addHRegUse(u, HRmWrite, hregARM64_X1()); + addHRegUse(u, HRmWrite, hregARM64_X9()); + addHRegUse(u, HRmWrite, hregARM64_X8()); + break; case ARM64in_MFence: return; case ARM64in_ClrEX: @@ -2372,6 +2394,8 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) return; case ARM64in_CAS: return; + case ARM64in_CASP: + return; case ARM64in_MFence: return; case ARM64in_ClrEX: @@ -3910,6 +3934,68 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = 0x35FFFF68; goto done; } + case ARM64in_CASP: { + /* Generate: + CASP , , , , [{,#0}] + + Register allocation (see ARM64in_CASP in getRegUsage_ARM64Instr): + Xn: memory address + -> X2 (INPUT) + Xs, X(s+1): values to be compared with value read from address + -> X4,X5 (INPUTS) + -> X0,X1 (OUTPUTS) loaded from memory and compared with + scratch registers X8,X9 (CLOBBERED) which contain + contents of X4,X5 + Xt, X(t+1): values to be stored to memory if X0,X1==X8,X9 + -> X6,X7 (INPUT) + + loop: + -- two of: + mov x8, x4 // AA0403E8 + mov x9, x5 // AA0503E9 + and x8, x4, #0xFFFFFFFF // 92407C88 + and x9, x5, #0xFFFFFFFF // 92407CA9 + + -- one of: + ldxp x0,x1, [x2] // C87F0440 + ldxp w0,w1, [x2] // 887F0440 + + -- always: + cmp x0, x8 // EB08001F + bne out // 540000E1 (b.ne #28 ) + cmp x1, x9 // EB09003F + bne out // 540000A1 (b.ne #20 ) + + -- one of: + stxp w1, x6, x7, [x2] // C8211C46 + stxp w1, w6, w7, [x2] // 88211C46 + + -- always: + cbnz w1, loop // 35FFFE81 (cbnz w1, #-48 ) + out: + */ + switch (i->ARM64in.CASP.szB) { + case 8: *p++ = 0xAA0403E8; *p++ = 0xAA0503E9; break; + case 4: *p++ = 0x92407C88; *p++ = 0x92407CA9; break; + default: vassert(0); + } + switch (i->ARM64in.CASP.szB) { + case 8: *p++ = 0xC87F0440; break; + case 4: *p++ = 0x887F0440; break; + default: vassert(0); + } + *p++ = 0xEB08001F; + *p++ = 0x540000E1; + *p++ = 0xEB09003F; + *p++ = 0x540000A1; + switch (i->ARM64in.CASP.szB) { + case 8: *p++ = 0xC8211C46; break; + case 4: *p++ = 0x88211C46; break; + default: vassert(0); + } + *p++ = 0x35FFFE81; + goto done; + } case ARM64in_MFence: { *p++ = 0xD5033F9F; /* DSB sy */ *p++ = 0xD5033FBF; /* DMB sy */ diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 63cf2bb79..05dba7ab8 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -481,6 +481,7 @@ typedef ARM64in_LdrEX, ARM64in_StrEX, ARM64in_CAS, + ARM64in_CASP, ARM64in_MFence, ARM64in_ClrEX, /* ARM64in_V*: scalar ops involving vector registers */ @@ -700,6 +701,9 @@ typedef struct { Int szB; /* 1, 2, 4 or 8 */ } CAS; + struct { + Int szB; /* 4 or 8 */ + } CASP; /* Mem fence. An insn which fences all loads and stores as much as possible before continuing. On ARM64 we emit the sequence "dsb sy ; dmb sy ; isb sy", which is probably @@ -946,6 +950,7 @@ extern ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR, extern ARM64Instr* ARM64Instr_LdrEX ( Int szB ); extern ARM64Instr* ARM64Instr_StrEX ( Int szB ); extern ARM64Instr* ARM64Instr_CAS ( Int szB ); +extern ARM64Instr* ARM64Instr_CASP ( Int szB ); extern ARM64Instr* ARM64Instr_MFence ( void ); extern ARM64Instr* ARM64Instr_ClrEX ( void ); extern ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index d19b19fcf..2f19eab81 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -4003,6 +4003,51 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) addInstr(env, ARM64Instr_MovI(rOld, rResult)); return; } + else { + /* Paired register CAS, i.e. CASP */ + UChar sz; + IRCAS* cas = stmt->Ist.CAS.details; + IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); + switch (ty) { + case Ity_I64: sz = 8; break; + case Ity_I32: sz = 4; break; + default: goto unhandled_cas; + } + HReg rAddr = iselIntExpr_R(env, cas->addr); + + HReg rExpd0 = iselIntExpr_R(env, cas->expdLo); + vassert(cas->expdHi != NULL); + HReg rExpd1 = iselIntExpr_R(env, cas->expdHi); + + HReg rData0 = iselIntExpr_R(env, cas->dataLo); + vassert(cas->dataHi != NULL); + HReg rData1 = iselIntExpr_R(env, cas->dataHi); + + addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rAddr)); + + addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rExpd0)); + addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd1)); + + addInstr(env, ARM64Instr_MovI(hregARM64_X6(), rData0)); + addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData1)); + + addInstr(env, ARM64Instr_CASP(sz)); + + HReg rResult0 = hregARM64_X0(); + HReg rResult1 = hregARM64_X1(); + switch (sz) { + case 8: break; + case 4: rResult0 = widen_z_32_to_64(env, rResult0); + rResult1 = widen_z_32_to_64(env, rResult1); + break; + default: vassert(0); + } + HReg rOldLo = lookupIRTemp(env, cas->oldLo); + HReg rOldHi = lookupIRTemp(env, cas->oldHi); + addInstr(env, ARM64Instr_MovI(rOldLo, rResult0)); + addInstr(env, ARM64Instr_MovI(rOldHi, rResult1)); + return; + } unhandled_cas: break; } -- 2.11.4.GIT