From ed622e91125b5846895d6dcca5366b1775b47d6d Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 24 Sep 2019 21:44:03 +0300 Subject: [PATCH] [netcore] Implement missing Bmi1/Bmi2 intrinsics (#16919) * Finalize BMI1 * cleanup * Update simd-intrinsics-netcore.c * Update simd-intrinsics-netcore.c * fix LLVM-AOT --- mono/mini/mini-llvm.c | 14 +++++++++++++ mono/mini/mini-ops.h | 27 +++++++++++++----------- mono/mini/simd-intrinsics-netcore.c | 42 ++++++++++++++++++++++++++++++------- mono/mini/simd-methods-netcore.h | 4 ++-- 4 files changed, 65 insertions(+), 22 deletions(-) diff --git a/mono/mini/mini-llvm.c b/mono/mini/mini-llvm.c index f190ef43848..a99eefdf288 100644 --- a/mono/mini/mini-llvm.c +++ b/mono/mini/mini-llvm.c @@ -332,6 +332,8 @@ typedef enum { INTRINS_PDEP_I64, INTRINS_BZHI_I32, INTRINS_BZHI_I64, + INTRINS_BEXTR_I32, + INTRINS_BEXTR_I64, #if defined(TARGET_AMD64) || defined(TARGET_X86) INTRINS_SSE_PMOVMSKB, INTRINS_SSE_PSRLI_W, @@ -7465,6 +7467,14 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) values [ins->dreg] = LLVMBuildCall (builder, get_intrins (ctx, ins->opcode == OP_CTTZ32 ? INTRINS_CTTZ_I32 : INTRINS_CTTZ_I64), args, 2, ""); break; } + case OP_BEXTR32: + case OP_BEXTR64: { + LLVMValueRef args [2]; + args [0] = lhs; + args [1] = convert (ctx, rhs, ins->opcode == OP_BEXTR32 ? LLVMInt32Type () : LLVMInt64Type ()); // cast ushort to u32/u64 + values [ins->dreg] = LLVMBuildCall (builder, get_intrins (ctx, ins->opcode == OP_BEXTR32 ? INTRINS_BEXTR_I32 : INTRINS_BEXTR_I64), args, 2, ""); + break; + } case OP_BZHI32: case OP_BZHI64: { LLVMValueRef args [2]; @@ -8742,6 +8752,8 @@ static IntrinsicDesc intrinsics[] = { {INTRINS_CTTZ_I64, "llvm.cttz.i64"}, {INTRINS_BZHI_I32, "llvm.x86.bmi.bzhi.32"}, {INTRINS_BZHI_I64, "llvm.x86.bmi.bzhi.64"}, + {INTRINS_BEXTR_I32, "llvm.x86.bmi.bextr.32"}, + {INTRINS_BEXTR_I64, "llvm.x86.bmi.bextr.64"}, {INTRINS_PEXT_I32, "llvm.x86.bmi.pext.32"}, {INTRINS_PEXT_I64, "llvm.x86.bmi.pext.64"}, {INTRINS_PDEP_I32, "llvm.x86.bmi.pdep.32"}, @@ -8951,11 +8963,13 @@ add_intrinsic (LLVMModuleRef module, int id) case INTRINS_CTTZ_I64: AddFunc2 (module, name, LLVMInt64Type (), LLVMInt64Type (), LLVMInt1Type ()); break; + case INTRINS_BEXTR_I32: case INTRINS_BZHI_I32: case INTRINS_PEXT_I32: case INTRINS_PDEP_I32: AddFunc2 (module, name, LLVMInt32Type (), LLVMInt32Type (), LLVMInt32Type ()); break; + case INTRINS_BEXTR_I64: case INTRINS_BZHI_I64: case INTRINS_PEXT_I64: case INTRINS_PDEP_I64: diff --git a/mono/mini/mini-ops.h b/mono/mini/mini-ops.h index 8b1ef689298..da2a32074b6 100644 --- a/mono/mini/mini-ops.h +++ b/mono/mini/mini-ops.h @@ -1007,6 +1007,21 @@ MINI_OP(OP_DPPS, "dpps", XREG, XREG, XREG) /* inst_c0 is the rounding mode: 0 = round, 1 = floor, 2 = ceiling */ MINI_OP(OP_SSE41_ROUNDPD, "roundpd", XREG, XREG, NONE) +/* Intel BMI1 */ +/* Count trailing zeroes, return 32/64 if the input is 0 */ +MINI_OP(OP_CTTZ32, "cttz32", IREG, IREG, NONE) +MINI_OP(OP_CTTZ64, "cttz64", LREG, LREG, NONE) +MINI_OP(OP_BEXTR32, "bextr32", IREG, IREG, IREG) +MINI_OP(OP_BEXTR64, "bextr64", LREG, LREG, LREG) + +/* Intel BMI2 */ +MINI_OP(OP_BZHI32, "bzhi32", IREG, IREG, IREG) +MINI_OP(OP_BZHI64, "bzhi64", LREG, LREG, LREG) +MINI_OP(OP_PEXT32, "pext32", IREG, IREG, IREG) +MINI_OP(OP_PEXT64, "pext64", LREG, LREG, LREG) +MINI_OP(OP_PDEP32, "pdep32", IREG, IREG, IREG) +MINI_OP(OP_PDEP64, "pdep64", LREG, LREG, LREG) + #endif MINI_OP(OP_XMOVE, "xmove", XREG, XREG, NONE) @@ -1401,16 +1416,4 @@ MINI_OP(OP_LZCNT64, "lzcnt64", LREG, LREG, NONE) MINI_OP(OP_POPCNT32, "popcnt32", IREG, IREG, NONE) MINI_OP(OP_POPCNT64, "popcnt64", LREG, LREG, NONE) -/* Intel BMI1 */ -/* Count trailing zeroes, return 32/64 if the input is 0 */ -MINI_OP(OP_CTTZ32, "cttz32", IREG, IREG, NONE) -MINI_OP(OP_CTTZ64, "cttz64", LREG, LREG, NONE) - -/* Intel BMI2 */ -MINI_OP(OP_BZHI32, "bzhi32", IREG, IREG, IREG) -MINI_OP(OP_BZHI64, "bzhi64", LREG, LREG, LREG) -MINI_OP(OP_PEXT32, "pext32", IREG, IREG, IREG) -MINI_OP(OP_PEXT64, "pext64", LREG, LREG, LREG) -MINI_OP(OP_PDEP32, "pdep32", IREG, IREG, IREG) -MINI_OP(OP_PDEP64, "pdep64", LREG, LREG, LREG) diff --git a/mono/mini/simd-intrinsics-netcore.c b/mono/mini/simd-intrinsics-netcore.c index 9aa1813822e..dd9620b18a6 100644 --- a/mono/mini/simd-intrinsics-netcore.c +++ b/mono/mini/simd-intrinsics-netcore.c @@ -552,6 +552,8 @@ static guint16 lzcnt_methods [] = { }; static guint16 bmi1_methods [] = { + SN_AndNot, + SN_BitFieldExtract, SN_ExtractLowestSetBit, SN_GetMaskUpToLowestSetBit, SN_ResetLowestSetBit, @@ -560,6 +562,7 @@ static guint16 bmi1_methods [] = { }; static guint16 bmi2_methods [] = { + //SN_MultiplyNoFlags, SN_ParallelBitDeposit, SN_ParallelBitExtract, SN_ZeroHighBits, @@ -629,9 +632,6 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature } } if (!strcmp (class_name, "Bmi1") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Bmi1"))) { - // We only support the subset used by corelib - if (m_class_get_image (cfg->method->klass) != mono_get_corlib ()) - return NULL; if (!COMPILE_LLVM (cfg)) return NULL; id = lookup_intrins (bmi1_methods, sizeof (bmi1_methods), cmethod); @@ -645,6 +645,26 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0); ins->type = STACK_I4; return ins; + case SN_AndNot: { + // (a ^ -1) & b + // LLVM replaces it with `andn` + int tmp_reg = alloc_preg (cfg); + int result_reg = alloc_preg (cfg); + EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LXOR_IMM : OP_IXOR_IMM, tmp_reg, args [0]->dreg, -1); + EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, tmp_reg, args [1]->dreg); + return ins; + } + case SN_BitFieldExtract: { + if (fsig->param_count == 2) { + MONO_INST_NEW (cfg, ins, is_64bit ? OP_BEXTR64 : OP_BEXTR32); + ins->dreg = alloc_ireg (cfg); + ins->sreg1 = args [0]->dreg; + ins->sreg2 = args [1]->dreg; + ins->type = is_64bit ? STACK_I8 : STACK_I4; + MONO_ADD_INS (cfg->cbb, ins); + return ins; + } + } case SN_GetMaskUpToLowestSetBit: { // x ^ (x - 1) // LLVM replaces it with `blsmsk` @@ -686,7 +706,7 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature } } if (!strcmp (class_name, "Bmi2") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Bmi2"))) { - // We only support the subset used by corelib + // We only support the subset used by corelib. Remove this check once MultiplyNoFlags is implemented. if (m_class_get_image (cfg->method->klass) != mono_get_corlib ()) return NULL; if (!COMPILE_LLVM (cfg)) @@ -701,7 +721,14 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0); ins->type = STACK_I4; return ins; - case SN_ZeroHighBits: + //case SN_MultiplyNoFlags: + //// TODO: implement using _mulx_u32/u64: + //// ulong MultiplyNoFlags(ulong left, ulong right) + //// ulong MultiplyNoFlags(ulong left, ulong right, ulong* low) => MultiplyNoFlags(left, right, low); + //// uint MultiplyNoFlags(uint left, uint right) + //// uint MultiplyNoFlags(uint left, uint right, uint* low) + //return NULL; + //case SN_ZeroHighBits: MONO_INST_NEW (cfg, ins, is_64bit ? OP_BZHI64 : OP_BZHI32); ins->dreg = alloc_ireg (cfg); ins->sreg1 = args [0]->dreg; @@ -714,7 +741,7 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature ins->dreg = alloc_ireg (cfg); ins->sreg1 = args [0]->dreg; ins->sreg2 = args [1]->dreg; - ins->type = STACK_I4; + ins->type = is_64bit ? STACK_I8 : STACK_I4; MONO_ADD_INS (cfg->cbb, ins); return ins; case SN_ParallelBitDeposit: @@ -722,13 +749,12 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature ins->dreg = alloc_ireg (cfg); ins->sreg1 = args [0]->dreg; ins->sreg2 = args [1]->dreg; - ins->type = STACK_I4; + ins->type = is_64bit ? STACK_I8 : STACK_I4; MONO_ADD_INS (cfg->cbb, ins); return ins; default: g_assert_not_reached (); } - //printf ("%s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod)); } return NULL; diff --git a/mono/mini/simd-methods-netcore.h b/mono/mini/simd-methods-netcore.h index bde5da8cc3e..4fa2af25474 100644 --- a/mono/mini/simd-methods-netcore.h +++ b/mono/mini/simd-methods-netcore.h @@ -27,8 +27,8 @@ METHOD(op_Inequality) METHOD(op_Multiply) METHOD(op_Subtraction) // BMI1 -//METHOD(AndNot) -//METHOD(BitFieldExtract) +METHOD(AndNot) +METHOD(BitFieldExtract) METHOD(ExtractLowestSetBit) METHOD(GetMaskUpToLowestSetBit) METHOD(ResetLowestSetBit) -- 2.11.4.GIT