From 73680a5fc760cb39760e4bbfce1166ce75de237f Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 18 Apr 2016 11:16:13 +0200 Subject: [PATCH] x86/x64: Search for exit jumps with instruction length decoder. Contributed by Peter Cawley. --- src/lj_asm_x86.h | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 104 insertions(+), 9 deletions(-) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index ffd59d33..39a792c2 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -2776,6 +2776,106 @@ static void asm_setup_target(ASMState *as) /* -- Trace patching ------------------------------------------------------ */ +static const uint8_t map_op1[256] = { +0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20, +0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51, +0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, +0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, +#if LJ_64 +0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14, +#else +0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, +#endif +0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, +0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51, +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, +0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, +0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51, +#if LJ_64 +0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, +#else +0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, +#endif +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05, +0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51, +0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51, +0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92 +}; + +static const uint8_t map_op2[256] = { +0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52 +}; + +static uint32_t asm_x86_inslen(const uint8_t* p) +{ + uint32_t result = 0; + uint32_t prefixes = 0; + uint32_t x = map_op1[*p]; + for (;;) { + switch (x >> 4) { + case 0: return result + x + (prefixes & 4); + case 1: prefixes |= x; x = map_op1[*++p]; result++; break; + case 2: x = map_op2[*++p]; break; + case 3: p++; goto mrm; + case 4: result -= (prefixes & 2); /* fallthrough */ + case 5: return result + (x & 15); + case 6: /* Group 3. */ + if (p[1] & 0x38) return result + 2; + if ((prefixes & 2) && (x == 0x66)) return result + 4; + return result + (x & 15); + case 7: /* VEX c4/c5. */ + if (LJ_32 && p[1] < 0xc0) { + x = 2; + goto mrm; + } + if (x == 0x70) { + x = *++p & 0x1f; + result++; + if (x >= 2) { + p += 2; + result += 2; + goto mrm; + } + } + p++; + result++; + x = map_op2[*++p]; + break; + case 8: result -= (prefixes & 2); /* fallthrough */ + case 9: mrm: /* ModR/M and possibly SIB. */ + result += (x & 15); + x = *++p; + switch (x >> 6) { + case 0: if ((x & 7) == 5) return result + 4; break; + case 1: result++; break; + case 2: result += 4; break; + case 3: return result; + } + if ((x & 7) == 4) { + result++; + if (x < 0x40 && (p[1] & 7) == 5) result += 4; + } + return result; + } + } +} + /* Patch exit jumps of existing machine code to a new target. */ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) { @@ -2788,18 +2888,13 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) *(int32_t *)(p+len-4) = jmprel(p+len, target); /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ - for (; p < pe; p++) - if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) { - p += LJ_64 ? 11 : 10; + for (; p < pe; p += asm_x86_inslen(p)) + if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) break; - } lua_assert(p < pe); - for (; p < pe; p++) { - if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) { + for (; p < pe; p += asm_x86_inslen(p)) + if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) *(int32_t *)(p+2) = jmprel(p+6, target); - p += 5; - } - } lj_mcode_sync(T->mcode, T->mcode + T->szmcode); lj_mcode_patch(J, mcarea, 1); } -- 2.11.4.GIT