From a8c274d0682b7265af7a5a9c71251d48169c9dc9 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Fri, 27 Dec 2019 15:30:21 +0100 Subject: [PATCH] Bug 413634 - ARMv8.1 arithmetic instructions are not supported Patch from Assad Hashmi . This patch adds support for AArch64 ARMv8.1 SIMD instructions: SQRDMLAH , , SQRDMLAH ., ., . SQRDMLAH , , .[] SQRDMLAH ., ., .[] SQRDMLSH , , SQRDMLSH ., ., . SQRDMLSH , , .[] SQRDMLSH ., ., .[] --- VEX/priv/guest_arm64_toIR.c | 218 ++++++++++++++- none/tests/arm64/Makefile.am | 7 +- none/tests/arm64/simd_v81.c | 498 +++++++++++++++++++++++++++++++++++ none/tests/arm64/simd_v81.stderr.exp | 0 none/tests/arm64/simd_v81.stdout.exp | 201 ++++++++++++++ none/tests/arm64/simd_v81.vgtest | 3 + 6 files changed, 922 insertions(+), 5 deletions(-) create mode 100644 none/tests/arm64/simd_v81.c create mode 100644 none/tests/arm64/simd_v81.stderr.exp create mode 100644 none/tests/arm64/simd_v81.stdout.exp create mode 100644 none/tests/arm64/simd_v81.vgtest diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 513ceba81..2589ddfb5 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -8437,6 +8437,27 @@ void math_SQDMULH ( /*OUT*/IRTemp* res, assign(*res, mkexpr(*sat1q)); } +/* Generate IR for SQRDMLAH and SQRDMLSH: signedly wideningly multiply, + double, add a rounding constant, take the high half and accumulate. */ +static +void math_SQRDMLAH ( /*OUT*/IRTemp* res, /*OUT*/IRTemp* res_nosat, Bool isAdd, + UInt size, IRTemp vD, IRTemp vN, IRTemp vM ) +{ + vassert(size == X01 || size == X10); /* s or h only */ + + /* SQRDMLAH = SQADD(A, SQRDMULH(B, C)) */ + + IRTemp mul, mul_nosat, dummy; + mul = mul_nosat = dummy = IRTemp_INVALID; + math_SQDMULH(&mul, &dummy, &mul_nosat, True/*R*/, size, vN, vM); + + IROp op = isAdd ? mkVecADD(size) : mkVecSUB(size); + IROp qop = isAdd ? mkVecQADDS(size) : mkVecQSUBS(size); + newTempsV128_2(res, res_nosat); + assign(*res, binop(qop, mkexpr(vD), mkexpr(mul))); + assign(*res_nosat, binop(op, mkexpr(vD), mkexpr(mul_nosat))); +} + /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in a new temp in *res, and the Q difference pair in new temps in @@ -10328,6 +10349,59 @@ Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn) # undef INSN } +static +Bool dis_AdvSIMD_scalar_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn) +{ + /* 31 29 28 23 21 20 15 10 9 4 + 01 U 11110 size 0 m opcode 1 n d + Decode fields: u,size,opcode + */ +# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + if (INSN(31,30) != BITS2(0,1) + || INSN(28,24) != BITS5(1,1,1,1,0) + || INSN(21,21) != 0 + || INSN(10,10) != 1) { + return False; + } + UInt bitU = INSN(29,29); + UInt size = INSN(23,22); + UInt mm = INSN(20,16); + UInt opcode = INSN(15,11); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + vassert(size < 4); + vassert(mm < 32 && nn < 32 && dd < 32); + + if (bitU == 1 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) { + /* -------- xx,10000 SQRDMLAH s and h variants only -------- */ + /* -------- xx,10001 SQRDMLSH s and h variants only -------- */ + if (size == X00 || size == X11) return False; + Bool isAdd = opcode == BITS5(1,0,0,0,0); + + IRTemp res, res_nosat, vD, vN, vM; + res = res_nosat = vD = vN = vM = IRTemp_INVALID; + newTempsV128_3(&vD, &vN, &vM); + assign(vD, getQReg128(dd)); + assign(vN, getQReg128(nn)); + assign(vM, getQReg128(mm)); + + math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM); + putQReg128(dd, + mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res)))); + updateQCFLAGwithDifference( + math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res)), + math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res_nosat))); + + const HChar arr = "hs"[size]; + const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh"; + DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm); + return True; + } + + return False; +# undef INSN +} + static Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) @@ -10655,7 +10729,7 @@ Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn) */ # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) if (INSN(31,30) != BITS2(0,1) - || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) { + || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) != 0) { return False; } UInt bitU = INSN(29,29); @@ -10789,7 +10863,7 @@ Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn) return True; } - if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) { + if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) { /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */ /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */ UInt mm = 32; // invalid @@ -10823,6 +10897,45 @@ Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) { + /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */ + /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */ + UInt mm = 32; // invalid + UInt ix = 16; // invalid + switch (size) { + case X00: + return False; // b case is not allowed + case X01: + mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; + case X10: + mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; + case X11: + return False; // d case is not allowed + default: + vassert(0); + } + vassert(size < 4); + vassert(mm < 32 && ix < 16); + Bool isAdd = opcode == BITS4(1,1,0,1); + + IRTemp res, res_nosat, vD, vN, vM; + res = res_nosat = vD = vN = vM = IRTemp_INVALID; + newTempsV128_2(&vD, &vN); + assign(vD, getQReg128(dd)); + assign(vN, getQReg128(nn)); + vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); + + math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM); + IROp opZHI = mkVecZEROHIxxOFV128(size); + putQReg128(dd, unop(opZHI, mkexpr(res))); + updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI); + + const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh"; + HChar ch = size == X01 ? 'h' : 's'; + DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix); + return True; + } + return False; # undef INSN } @@ -12328,6 +12441,61 @@ Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn) static +Bool dis_AdvSIMD_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn) +{ + /* 31 30 29 28 23 21 20 15 14 10 9 4 + 0 Q U 01110 size 0 m 1 opcode 1 n d + Decode fields: u,size,opcode + */ +# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + if (INSN(31,31) != 0 + || INSN(28,24) != BITS5(0,1,1,1,0) + || INSN(21,21) != 0 + || INSN(15,15) != 1 + || INSN(10,10) != 1) { + return False; + } + UInt bitQ = INSN(30,30); + UInt bitU = INSN(29,29); + UInt size = INSN(23,22); + UInt mm = INSN(20,16); + UInt opcode = INSN(14,11); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + vassert(size < 4); + vassert(mm < 32 && nn < 32 && dd < 32); + + if (bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,0,1))) { + /* -------- 0,xx,10110 SQRDMLAH s and h variants only -------- */ + /* -------- 1,xx,10110 SQRDMLSH s and h variants only -------- */ + if (size == X00 || size == X11) return False; + Bool isAdd = opcode == BITS4(0,0,0,0); + + IRTemp res, res_nosat, vD, vN, vM; + res = res_nosat = vD = vN = vM = IRTemp_INVALID; + newTempsV128_3(&vD, &vN, &vM); + assign(vD, getQReg128(dd)); + assign(vN, getQReg128(nn)); + assign(vM, getQReg128(mm)); + + math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM); + IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID; + updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI); + putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); + + const HChar* arr = nameArr_Q_SZ(bitQ, size); + const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh"; + DIP("%s %s.%s, %s.%s, %s.%s\n", nm, + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + + return False; +# undef INSN +} + + +static Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) { /* 31 30 29 28 23 21 16 11 9 4 @@ -13249,7 +13417,7 @@ Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn) return True; } - if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) { + if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) { /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */ /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */ UInt mm = 32; // invalid @@ -13285,6 +13453,46 @@ Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) { + /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */ + /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */ + UInt mm = 32; // invalid + UInt ix = 16; // invalid + switch (size) { + case X00: + return False; // b case is not allowed + case X01: // h + mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; + case X10: // s + mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; + case X11: + return False; // d case is not allowed + default: + vassert(0); + } + vassert(mm < 32 && ix < 16); + + IRTemp res, res_nosat, vD, vN, vM; + res = res_nosat = vD = vN = vM = IRTemp_INVALID; + newTempsV128_2(&vD, &vN); + assign(vD, getQReg128(dd)); + assign(vN, getQReg128(nn)); + + vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); + Bool isAdd = opcode == BITS4(1,1,0,1); + math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM); + IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID; + updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI); + putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); + + const HChar* arr = nameArr_Q_SZ(bitQ, size); + const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh"; + HChar ch = size == X01 ? 'h' : 's'; + DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm, + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), ch, ix); + return True; + } + return False; # undef INSN } @@ -14529,6 +14737,8 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) if (UNLIKELY(ok)) return True; ok = dis_AdvSIMD_scalar_three_same(dres, insn); if (UNLIKELY(ok)) return True; + ok = dis_AdvSIMD_scalar_three_same_extra(dres, insn); + if (UNLIKELY(ok)) return True; ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn); if (UNLIKELY(ok)) return True; ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn); @@ -14539,6 +14749,8 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) if (UNLIKELY(ok)) return True; ok = dis_AdvSIMD_three_same(dres, insn); if (UNLIKELY(ok)) return True; + ok = dis_AdvSIMD_three_same_extra(dres, insn); + if (UNLIKELY(ok)) return True; ok = dis_AdvSIMD_two_reg_misc(dres, insn); if (UNLIKELY(ok)) return True; ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn); diff --git a/none/tests/arm64/Makefile.am b/none/tests/arm64/Makefile.am index 78a5742b2..7b3ebbdca 100644 --- a/none/tests/arm64/Makefile.am +++ b/none/tests/arm64/Makefile.am @@ -9,7 +9,8 @@ EXTRA_DIST = \ fp_and_simd.stdout.exp fp_and_simd.stderr.exp fp_and_simd.vgtest \ integer.stdout.exp integer.stderr.exp integer.vgtest \ memory.stdout.exp memory.stderr.exp memory.vgtest \ - atomics_v81.stdout.exp atomics_v81.stderr.exp atomics_v81.vgtest + atomics_v81.stdout.exp atomics_v81.stderr.exp atomics_v81.vgtest \ + simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest check_PROGRAMS = \ allexec \ @@ -23,7 +24,7 @@ if BUILD_ARMV8_CRC_TESTS endif if BUILD_ARMV81_TESTS - check_PROGRAMS += atomics_v81 + check_PROGRAMS += atomics_v81 simd_v81 endif AM_CFLAGS += @FLAG_M64@ @@ -34,7 +35,9 @@ allexec_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_NONNULL@ crc32_CFLAGS = $(AM_CFLAGS) -march=armv8-a+crc atomics_v81_CFLAGS = $(AM_CFLAGS) -march=armv8.1-a +simd_v81_CFLAGS = $(AM_CFLAGS) -march=armv8.1-a+crypto fp_and_simd_CFLAGS = $(AM_CFLAGS) -march=armv8-a+crypto integer_CFLAGS = $(AM_CFLAGS) -g -O0 -DTEST_BFM=0 fp_and_simd_LDADD = -lm +simd_v81_LDADD = -lm diff --git a/none/tests/arm64/simd_v81.c b/none/tests/arm64/simd_v81.c new file mode 100644 index 000000000..60c4c2660 --- /dev/null +++ b/none/tests/arm64/simd_v81.c @@ -0,0 +1,498 @@ +#include +#include +#include +#include + +typedef unsigned char UChar; +typedef unsigned short int UShort; +typedef unsigned int UInt; +typedef signed int Int; +typedef unsigned long long int ULong; +typedef signed long long int Long; +typedef double Double; +typedef float Float; + +typedef unsigned char Bool; +#define False ((Bool)0) +#define True ((Bool)1) + +#define ITERS 1 + +typedef + enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE } + LaneTy; + +union _V128 { + UChar u8[16]; + UShort u16[8]; + UInt u32[4]; + ULong u64[2]; + Float f32[4]; + Double f64[2]; +}; +typedef union _V128 V128; + +static void setV128( V128* v, int elements, LaneTy ty, ULong val ) +{ + assert( (elements % 2) == 0 && elements >= 2 && elements <= 16 ); + memset(v, 0, sizeof(*v)); + switch (ty) { + /* 4H or 8H */ + case TyH: + assert( val < (1 << 16) ); + assert( elements == 4 || elements == 8 ); + for ( int i = 0; i < elements; i++ ) + v->u16[i] = val; + break; + /* 2S or 4S */ + case TyS: + assert( val < (1 << 32) ); + assert( elements == 2 || elements == 4 ); + for ( int i = 0; i < elements; i++ ) + v->u32[i] = val; + break; + default: + printf("8B, 2D and 16B not implemented\n"); + } +} + +static void setV128_idx( V128* v, int elements, LaneTy ty, ULong val, int idx ) +{ + assert( (elements % 2) == 0 && elements >= 2 && elements <= 16 ); + assert( idx >= 0 ); + memset(v, 0, sizeof(*v)); + switch (ty) { + /* 4H or 8H */ + case TyH: + assert( val < (1 << 16) ); + assert( elements == 4 || elements == 8 ); + assert( idx < elements ); + v->u16[idx] = val; + break; + /* 2S or 4S */ + case TyS: + assert( val < (1 << 32) ); + assert( elements == 2 || elements == 4 ); + assert( idx < elements ); + v->u32[idx] = val; + break; + default: + printf("8B, 2D and 16B not implemented\n"); + } +} + +static inline UChar randUChar ( void ) +{ + static UInt seed = 80021; + seed = 1103515245 * seed + 12345; + return (seed >> 17) & 0xFF; +} + +/* Generates a random V128. Ensures that that it contains normalised FP numbers + * when viewed as either F32x4 or F64x2, so that it is reasonable to use in FP + * test cases. + */ +static void randV128 ( /*OUT*/V128* v ) +{ + static UInt nCalls = 0, nIters = 0; + Int i; + nCalls++; + while (1) { + nIters++; + for (i = 0; i < 16; i++) { + v->u8[i] = randUChar(); + } + if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2]) + && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1])) + break; + } + if (0 == (nCalls & 0xFF)) + printf("randV128: %u calls, %u iters\n", nCalls, nIters); +} + +static void showV128 ( V128* v ) +{ + Int i; + for (i = 15; i >= 0; i--) + printf("%02x", (Int)v->u8[i]); +} + +/* Generate a test function that involves three vector regs with no bias as + * towards which is input or output. + */ +#define GEN_THREEVEC_TEST_RND(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \ + __attribute__((noinline)) \ + static void test_##TESTNAME ( LaneTy ty ) { \ + Int i; \ + for (i = 0; i < ITERS; i++) { \ + V128 block[6+1]; \ + memset(block, 0x55, sizeof(block)); \ + randV128(&block[0]); \ + randV128(&block[1]); \ + randV128(&block[2]); \ + randV128(&block[3]); \ + randV128(&block[4]); \ + randV128(&block[5]); \ + __asm__ __volatile__( \ + "mov x30, #0 ; msr fpsr, x30 ; " \ + "ldr q"#VECREG1NO", [%0, #0] ; " \ + "ldr q"#VECREG2NO", [%0, #16] ; " \ + "ldr q"#VECREG3NO", [%0, #32] ; " \ + INSN " ; " \ + "str q"#VECREG1NO", [%0, #48] ; " \ + "str q"#VECREG2NO", [%0, #64] ; " \ + "str q"#VECREG3NO", [%0, #80] ; " \ + "mrs x30, fpsr ; str x30, [%0, #96] " \ + : : "r"(&block[0]) \ + : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, "x30" \ + ); \ + printf("%-34s", INSN); \ + UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \ + printf("vout:"); showV128(&block[0]); printf(" "); \ + printf("vin0:"); showV128(&block[1]); printf(" "); \ + printf("vin1:"); showV128(&block[2]); \ + printf("\n "); \ + printf("vout:"); showV128(&block[3]); printf(" "); \ + printf("vin0:"); showV128(&block[4]); printf(" "); \ + printf("vin1:"); showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \ + } \ + } + +/* Generate a test function that involves three vector regs with no bias as + * towards which is input or output. The three vector regs are loaded with user + * defined values rather than generated using randV128(). + */ +#define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREGOUT,VECREGIN0,VECREGIN1) \ + __attribute__((noinline)) \ + static void test_##TESTNAME ( V128* vout, V128* vin0, V128* vin1, LaneTy ty ) { \ + V128 block[6+1]; \ + memset(block, 0, sizeof(block)); \ + block[0] = *vout; \ + block[1] = *vin0; \ + block[2] = *vin1; \ + __asm__ __volatile__( \ + "mov x30, #0 ; msr fpsr, x30 ; " \ + "ldr q"#VECREGOUT", [%0, #0] ; " \ + "ldr q"#VECREGIN0", [%0, #16] ; " \ + "ldr q"#VECREGIN1", [%0, #32] ; " \ + INSN " ; " \ + "str q"#VECREGOUT", [%0, #48] ; " \ + "str q"#VECREGIN0", [%0, #64] ; " \ + "str q"#VECREGIN1", [%0, #80] ; " \ + "mrs x30, fpsr ; str x30, [%0, #96] " \ + : : "r"(&block[0]) \ + : "memory", "v"#VECREGOUT, "v"#VECREGIN0, "v"#VECREGIN1, "x30" \ + ); \ + printf("%-34s", INSN); \ + UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \ + printf("vout:"); showV128(&block[0]); printf(" "); \ + printf("vin0:"); showV128(&block[1]); printf(" "); \ + printf("vin1:"); showV128(&block[2]); \ + printf("\n "); \ + printf("vout:"); showV128(&block[3]); printf(" "); \ + printf("vin0:"); showV128(&block[4]); printf(" "); \ + printf("vin1:"); showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \ + } + +/* Generate calls to test functions generated by GEN_THREEVEC_TEST defined + * above, which require user defined data. + */ +#define GEN_THREEVEC_TEST_CALLS(INSN,ECOUNT,ETYPE,EARRANGE,PATTERN) \ + setV128(&vout, ECOUNT, ETYPE, PATTERN); \ + setV128(&vin[0], ECOUNT, ETYPE, PATTERN); \ + setV128(&vin[1], ECOUNT, ETYPE, PATTERN); \ + test_##INSN##_##EARRANGE##_##EARRANGE##_##EARRANGE##_##PATTERN(&vout, &vin[0], &vin[1], ETYPE); + +/* Indexed vector element variant of GEN_THREEVEC_TEST_CALLS above. */ +#define GEN_THREEVEC_TEST_CALLSI(INSN,ECOUNT,ETYPE,EARRANGE,PATTERN,IDX) \ + setV128_idx(&vout, ECOUNT, ETYPE, PATTERN, IDX); \ + setV128_idx(&vin[0], ECOUNT, ETYPE, PATTERN, IDX); \ + setV128_idx(&vin[1], ECOUNT, ETYPE, PATTERN, IDX); \ + test_##INSN##_##EARRANGE##_##EARRANGE##_##EARRANGE##_##PATTERN(&vout, &vin[0], &vin[1], ETYPE); + +/* Test patterns. */ +#define ALL5s_32 0x55555555ULL +#define ALLas_32 0xAAAAAAAAULL +#define ALLfs_32 0xFFFFFFFFULL +#define UP_32 0x01234567ULL +#define DOWN_32 0xFEDCBA98ULL +#define PI_32 0x31415926ULL +#define E_32 0x27182818ULL + +#define ALL5s_16 0x5555ULL +#define ALLas_16 0xAAAAULL +#define ALLfs_16 0xFFFFULL +#define UP_16 0x0123ULL +#define DOWN_16 0xFEDCULL +#define PI_16 0x3141ULL +#define E_16 0x2718ULL + + +/* --------------------------------------------------------- + * -- Tests, in the same order that they appear in main() -- + * --------------------------------------------------------- + * + * There are 4 types of test sets for each instruction: + * - vector + * - indexed vector + * - scalar + * - indexed scalar + */ + +/* sqrdmlah (vector version, no index) */ +GEN_THREEVEC_TEST_RND(sqrdmlah_4h_4h_4h, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlah_8h_8h_8h, "sqrdmlah v0.8h, v1.8h, v2.8h", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlah_2s_2s_2s, "sqrdmlah v0.2s, v1.2s, v2.2s", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlah_4s_4s_4s, "sqrdmlah v0.4s, v1.4s, v2.4s", 0, 1, 2) + +GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_0, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_ALL5s_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_ALLas_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_ALLfs_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_UP_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_DOWN_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_PI_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_E_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2) + +/* sqrdmlah (vector version, with index) */ +GEN_THREEVEC_TEST_RND(sqrdmlah_i_4h_4h_4h, "sqrdmlah v0.4h, v1.4h, v2.4h[0]", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlah_i_8h_8h_8h, "sqrdmlah v0.8h, v1.8h, v2.8h[1]", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlah_i_2s_2s_2s, "sqrdmlah v0.2s, v1.2s, v2.2s[2]", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlah_i_4s_4s_4s, "sqrdmlah v0.4s, v1.4s, v2.4s[3]", 0, 1, 2) + +GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_0, "sqrdmlah v0.4h, v1.4h, v2.4h[0]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_ALL5s_16, "sqrdmlah v0.4h, v1.4h, v2.4h[1]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_ALLas_16, "sqrdmlah v0.4h, v1.4h, v2.4h[2]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_ALLfs_16, "sqrdmlah v0.4h, v1.4h, v2.4h[3]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_UP_16, "sqrdmlah v0.4h, v1.4h, v2.4h[0]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_DOWN_16, "sqrdmlah v0.4h, v1.4h, v2.4h[1]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_PI_16, "sqrdmlah v0.4h, v1.4h, v2.4h[2]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_E_16, "sqrdmlah v0.4h, v1.4h, v2.4h[3]", 0, 1, 2) + +/* sqrdmlah (scalar version) */ +GEN_THREEVEC_TEST_RND(sqrdmlah_h0_h1_h2, "sqrdmlah h0, h1, h2", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlah_s0_s1_s2, "sqrdmlah s0, s1, s2", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlah_h3_h4_h5, "sqrdmlah h3, h4, h5", 3, 4, 5) +GEN_THREEVEC_TEST_RND(sqrdmlah_s3_s4_s5, "sqrdmlah s3, s4, s5", 3, 4, 5) +GEN_THREEVEC_TEST_RND(sqrdmlah_h6_h7_h8, "sqrdmlah h6, h7, h8", 6, 7, 8) +GEN_THREEVEC_TEST_RND(sqrdmlah_s6_s7_s8, "sqrdmlah s6, s7, s8", 6, 7, 8) +GEN_THREEVEC_TEST_RND(sqrdmlah_h9_h10_h11, "sqrdmlah h9, h10, h11", 9, 10, 11) +GEN_THREEVEC_TEST_RND(sqrdmlah_s9_s10_s11, "sqrdmlah s9, s10, s11", 9, 10, 11) +GEN_THREEVEC_TEST_RND(sqrdmlah_h12_h13_h14, "sqrdmlah h12, h13, h14", 12, 13, 14) +GEN_THREEVEC_TEST_RND(sqrdmlah_s12_s13_s14, "sqrdmlah s12, s13, s14", 12, 13, 14) +GEN_THREEVEC_TEST_RND(sqrdmlah_h15_h16_h17, "sqrdmlah h15, h16, h17", 15, 16, 17) +GEN_THREEVEC_TEST_RND(sqrdmlah_s15_s16_s17, "sqrdmlah s15, s16, s17", 15, 16, 17) +GEN_THREEVEC_TEST_RND(sqrdmlah_h18_h19_h20, "sqrdmlah h18, h19, h20", 18, 19, 20) +GEN_THREEVEC_TEST_RND(sqrdmlah_s18_s19_s20, "sqrdmlah s18, s19, s20", 18, 19, 20) +GEN_THREEVEC_TEST_RND(sqrdmlah_h21_h22_h23, "sqrdmlah h21, h22, h23", 21, 22, 23) +GEN_THREEVEC_TEST_RND(sqrdmlah_s21_s22_s23, "sqrdmlah s21, s22, s23", 21, 22, 23) +GEN_THREEVEC_TEST_RND(sqrdmlah_h24_h25_h26, "sqrdmlah h24, h25, h26", 24, 25, 26) +GEN_THREEVEC_TEST_RND(sqrdmlah_s24_s25_s26, "sqrdmlah s24, s25, s26", 24, 25, 26) +GEN_THREEVEC_TEST_RND(sqrdmlah_h27_h28_h29, "sqrdmlah h27, h28, h29", 27, 28, 29) +GEN_THREEVEC_TEST_RND(sqrdmlah_s27_s28_s29, "sqrdmlah s27, s28, s29", 27, 28, 29) +GEN_THREEVEC_TEST_RND(sqrdmlah_h30_h31_h0, "sqrdmlah h30, h31, h0", 30, 31, 0) +GEN_THREEVEC_TEST_RND(sqrdmlah_s30_s31_s0, "sqrdmlah s30, s31, s0", 30, 31, 0) + +/* sqrdmlah (scalar version, with index) */ +GEN_THREEVEC_TEST_RND(sqrdmlah_i_h0_h1_v2, "sqrdmlah h0, h1, v2.h[0]", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlah_i_s0_s1_v2, "sqrdmlah s0, s1, v2.s[0]", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlah_i_h3_h4_v5, "sqrdmlah h3, h4, v5.h[1]", 3, 4, 5) +GEN_THREEVEC_TEST_RND(sqrdmlah_i_s3_s4_v5, "sqrdmlah s3, s4, v5.s[1]", 3, 4, 5) + +/* sqrdmlsh (vector version) */ +GEN_THREEVEC_TEST_RND(sqrdmlsh_4h_4h_4h, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlsh_8h_8h_8h, "sqrdmlsh v0.8h, v1.8h, v2.8h", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlsh_2s_2s_2s, "sqrdmlsh v0.2s, v1.2s, v2.2s", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlsh_4s_4s_4s, "sqrdmlsh v0.4s, v1.4s, v2.4s", 0, 1, 2) + +GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_0, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_ALL5s_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_ALLas_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_ALLfs_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_UP_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_DOWN_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_PI_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_E_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2) + +/* sqrdmlsh (vector version, with index) */ +GEN_THREEVEC_TEST_RND(sqrdmlsh_i_4h_4h_4h, "sqrdmlsh v0.4h, v1.4h, v2.4h[0]", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlsh_i_8h_8h_8h, "sqrdmlsh v0.8h, v1.8h, v2.8h[1]", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlsh_i_2s_2s_2s, "sqrdmlsh v0.2s, v1.2s, v2.2s[2]", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlsh_i_4s_4s_4s, "sqrdmlsh v0.4s, v1.4s, v2.4s[3]", 0, 1, 2) + +GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_0, "sqrdmlsh v0.4h, v1.4h, v2.4h[0]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_ALL5s_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[1]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_ALLas_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[2]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_ALLfs_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[3]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_UP_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[0]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_DOWN_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[1]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_PI_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[2]", 0, 1, 2) +GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_E_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[3]", 0, 1, 2) + +/* sqrdmlsh (scalar version) */ +GEN_THREEVEC_TEST_RND(sqrdmlsh_h0_h1_h2, "sqrdmlsh h0, h1, h2", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlsh_s0_s1_s2, "sqrdmlsh s0, s1, s2", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlsh_h3_h4_h5, "sqrdmlsh h3, h4, h5", 3, 4, 5) +GEN_THREEVEC_TEST_RND(sqrdmlsh_s3_s4_s5, "sqrdmlsh s3, s4, s5", 3, 4, 5) +GEN_THREEVEC_TEST_RND(sqrdmlsh_h6_h7_h8, "sqrdmlsh h6, h7, h8", 6, 7, 8) +GEN_THREEVEC_TEST_RND(sqrdmlsh_s6_s7_s8, "sqrdmlsh s6, s7, s8", 6, 7, 8) +GEN_THREEVEC_TEST_RND(sqrdmlsh_h9_h10_h11, "sqrdmlsh h9, h10, h11", 9, 10, 11) +GEN_THREEVEC_TEST_RND(sqrdmlsh_s9_s10_s11, "sqrdmlsh s9, s10, s11", 9, 10, 11) +GEN_THREEVEC_TEST_RND(sqrdmlsh_h12_h13_h14, "sqrdmlsh h12, h13, h14", 12, 13, 14) +GEN_THREEVEC_TEST_RND(sqrdmlsh_s12_s13_s14, "sqrdmlsh s12, s13, s14", 12, 13, 14) +GEN_THREEVEC_TEST_RND(sqrdmlsh_h15_h16_h17, "sqrdmlsh h15, h16, h17", 15, 16, 17) +GEN_THREEVEC_TEST_RND(sqrdmlsh_s15_s16_s17, "sqrdmlsh s15, s16, s17", 15, 16, 17) +GEN_THREEVEC_TEST_RND(sqrdmlsh_h18_h19_h20, "sqrdmlsh h18, h19, h20", 18, 19, 20) +GEN_THREEVEC_TEST_RND(sqrdmlsh_s18_s19_s20, "sqrdmlsh s18, s19, s20", 18, 19, 20) +GEN_THREEVEC_TEST_RND(sqrdmlsh_h21_h22_h23, "sqrdmlsh h21, h22, h23", 21, 22, 23) +GEN_THREEVEC_TEST_RND(sqrdmlsh_s21_s22_s23, "sqrdmlsh s21, s22, s23", 21, 22, 23) +GEN_THREEVEC_TEST_RND(sqrdmlsh_h24_h25_h26, "sqrdmlsh h24, h25, h26", 24, 25, 26) +GEN_THREEVEC_TEST_RND(sqrdmlsh_s24_s25_s26, "sqrdmlsh s24, s25, s26", 24, 25, 26) +GEN_THREEVEC_TEST_RND(sqrdmlsh_h27_h28_h29, "sqrdmlsh h27, h28, h29", 27, 28, 29) +GEN_THREEVEC_TEST_RND(sqrdmlsh_s27_s28_s29, "sqrdmlsh s27, s28, s29", 27, 28, 29) +GEN_THREEVEC_TEST_RND(sqrdmlsh_h30_h31_h0, "sqrdmlsh h30, h31, h0", 30, 31, 0) +GEN_THREEVEC_TEST_RND(sqrdmlsh_s30_s31_s0, "sqrdmlsh s30, s31, s0", 30, 31, 0) + +/* sqrdmlsh (scalar version, with index) */ +GEN_THREEVEC_TEST_RND(sqrdmlsh_i_h0_h1_v2, "sqrdmlsh h0, h1, v2.h[0]", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlsh_i_s0_s1_v2, "sqrdmlsh s0, s1, v2.s[0]", 0, 1, 2) +GEN_THREEVEC_TEST_RND(sqrdmlsh_i_h3_h4_v5, "sqrdmlsh h3, h4, v5.h[1]", 3, 4, 5) +GEN_THREEVEC_TEST_RND(sqrdmlsh_i_s3_s4_v5, "sqrdmlsh s3, s4, v5.s[1]", 3, 4, 5) + +int main ( void ) +{ + assert(sizeof(V128) == 16); + + /* sqrdmlah (vector version, no index) */ + if (1) test_sqrdmlah_4h_4h_4h(TyH); + if (1) test_sqrdmlah_8h_8h_8h(TyH); + if (1) test_sqrdmlah_2s_2s_2s(TyS); + if (1) test_sqrdmlah_4s_4s_4s(TyS); + + V128 vout; + V128 vin[2]; + + if (1) { + GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, 0); + GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, ALL5s_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, ALLas_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, ALLfs_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, UP_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, DOWN_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, PI_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, E_16); + } + + /* sqrdmlah (vector version, with index) */ + if (1) test_sqrdmlah_i_4h_4h_4h(TyH); + if (1) test_sqrdmlah_i_8h_8h_8h(TyH); + if (1) test_sqrdmlah_i_2s_2s_2s(TyS); + if (1) test_sqrdmlah_i_4s_4s_4s(TyS); + + if (1) { + GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, 0, 0); + GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, ALL5s_16, 1); + GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, ALLas_16, 2); + GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, ALLfs_16, 3); + GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, UP_16, 0); + GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, DOWN_16, 1); + GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, PI_16, 2); + GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, E_16, 3); + } + + /* sqrdmlah (scalar version) */ + if (1) test_sqrdmlah_h0_h1_h2(TyH); + if (1) test_sqrdmlah_s0_s1_s2(TyS); + if (1) test_sqrdmlah_h3_h4_h5(TyH); + if (1) test_sqrdmlah_s3_s4_s5(TyS); + if (1) test_sqrdmlah_h6_h7_h8(TyH); + if (1) test_sqrdmlah_s6_s7_s8(TyS); + if (1) test_sqrdmlah_h9_h10_h11(TyH); + if (1) test_sqrdmlah_s9_s10_s11(TyS); + if (1) test_sqrdmlah_h12_h13_h14(TyH); + if (1) test_sqrdmlah_s12_s13_s14(TyS); + if (1) test_sqrdmlah_h15_h16_h17(TyH); + if (1) test_sqrdmlah_s15_s16_s17(TyS); + if (1) test_sqrdmlah_h18_h19_h20(TyH); + if (1) test_sqrdmlah_s18_s19_s20(TyS); + if (1) test_sqrdmlah_h21_h22_h23(TyH); + if (1) test_sqrdmlah_s21_s22_s23(TyS); + if (1) test_sqrdmlah_h24_h25_h26(TyH); + if (1) test_sqrdmlah_s24_s25_s26(TyS); + if (1) test_sqrdmlah_h27_h28_h29(TyH); + if (1) test_sqrdmlah_s27_s28_s29(TyS); + if (1) test_sqrdmlah_h30_h31_h0(TyH); + if (1) test_sqrdmlah_s30_s31_s0(TyS); + + /* sqrdmlah (scalar version, with index) */ + if (1) test_sqrdmlah_i_h0_h1_v2(TyH); + if (1) test_sqrdmlah_i_s0_s1_v2(TyS); + if (1) test_sqrdmlah_i_h3_h4_v5(TyH); + if (1) test_sqrdmlah_i_s3_s4_v5(TyS); + + /* sqrdmlsh (vector version) */ + if (1) test_sqrdmlsh_4h_4h_4h(TyH); + if (1) test_sqrdmlsh_8h_8h_8h(TyH); + if (1) test_sqrdmlsh_2s_2s_2s(TyS); + if (1) test_sqrdmlsh_4s_4s_4s(TyS); + + if (1) { + GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, 0); + GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, ALL5s_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, ALLas_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, ALLfs_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, UP_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, DOWN_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, PI_16); + GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, E_16); + } + + /* sqrdmlsh (vector version, with index) */ + if (1) test_sqrdmlsh_i_4h_4h_4h(TyH); + if (1) test_sqrdmlsh_i_8h_8h_8h(TyH); + if (1) test_sqrdmlsh_i_2s_2s_2s(TyS); + if (1) test_sqrdmlsh_i_4s_4s_4s(TyS); + + if (1) { + GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, 0, 0); + GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, ALL5s_16, 1); + GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, ALLas_16, 2); + GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, ALLfs_16, 3); + GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, UP_16, 0); + GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, DOWN_16, 1); + GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, PI_16, 2); + GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, E_16, 3); + } + + /* sqrdmlsh (scalar version) */ + if (1) test_sqrdmlsh_h0_h1_h2(TyH); + if (1) test_sqrdmlsh_s0_s1_s2(TyS); + if (1) test_sqrdmlsh_h3_h4_h5(TyH); + if (1) test_sqrdmlsh_s3_s4_s5(TyS); + if (1) test_sqrdmlsh_h6_h7_h8(TyH); + if (1) test_sqrdmlsh_s6_s7_s8(TyS); + if (1) test_sqrdmlsh_h9_h10_h11(TyH); + if (1) test_sqrdmlsh_s9_s10_s11(TyS); + if (1) test_sqrdmlsh_h12_h13_h14(TyH); + if (1) test_sqrdmlsh_s12_s13_s14(TyS); + if (1) test_sqrdmlsh_h15_h16_h17(TyH); + if (1) test_sqrdmlsh_s15_s16_s17(TyS); + if (1) test_sqrdmlsh_h18_h19_h20(TyH); + if (1) test_sqrdmlsh_s18_s19_s20(TyS); + if (1) test_sqrdmlsh_h21_h22_h23(TyH); + if (1) test_sqrdmlsh_s21_s22_s23(TyS); + if (1) test_sqrdmlsh_h24_h25_h26(TyH); + if (1) test_sqrdmlsh_s24_s25_s26(TyS); + if (1) test_sqrdmlsh_h27_h28_h29(TyH); + if (1) test_sqrdmlsh_s27_s28_s29(TyS); + if (1) test_sqrdmlsh_h30_h31_h0(TyH); + if (1) test_sqrdmlsh_s30_s31_s0(TyS); + + /* sqrdmlsh (scalar version, with index) */ + if (1) test_sqrdmlsh_i_h0_h1_v2(TyH); + if (1) test_sqrdmlsh_i_s0_s1_v2(TyS); + if (1) test_sqrdmlsh_i_h3_h4_v5(TyH); + if (1) test_sqrdmlsh_i_s3_s4_v5(TyS); + + return 0; +} diff --git a/none/tests/arm64/simd_v81.stderr.exp b/none/tests/arm64/simd_v81.stderr.exp new file mode 100644 index 000000000..e69de29bb diff --git a/none/tests/arm64/simd_v81.stdout.exp b/none/tests/arm64/simd_v81.stdout.exp new file mode 100644 index 000000000..4cd08f4f0 --- /dev/null +++ b/none/tests/arm64/simd_v81.stdout.exp @@ -0,0 +1,201 @@ +sqrdmlah v0.4h, v1.4h, v2.4h vout:5175e39d19c9ca1e98f24a4984175700 vin0:7d6528c5fa956a0d69c3e9a6af27d13b vin1:60b160857d45c48447b8d8c0eeef1e50 + vout:0000000000000000d43451248edf4bed vin0:7d6528c5fa956a0d69c3e9a6af27d13b vin1:60b160857d45c48447b8d8c0eeef1e50 fpsr=00000000 +sqrdmlah v0.8h, v1.8h, v2.8h vout:d89998df5035ed364a4bc43968bc40e5 vin0:cb509970b8136c85d740b80eb7839b97 vin1:f9dd4a29f8c093db56b01a12b0ca1583 + vout:db208000544891862eb2b5927fff3005 vin0:cb509970b8136c85d740b80eb7839b97 vin1:f9dd4a29f8c093db56b01a12b0ca1583 fpsr=08000000 +sqrdmlah v0.2s, v1.2s, v2.2s vout:d182c916cebc2e17cfaff39be272ef40 vin0:6897b536bbe4da8a369dab4f9465b86e vin1:407b8d9035449b06f4e06e2205236eb7 + vout:0000000000000000caf0ee75de211f32 vin0:6897b536bbe4da8a369dab4f9465b86e vin1:407b8d9035449b06f4e06e2205236eb7 fpsr=00000000 +sqrdmlah v0.4s, v1.4s, v2.4s vout:f0350ca70523e0e45ba1ec54e87d39b3 vin0:0a3e0f7c75cb0842b95ed64d3b13ff64 vin1:e98ebd1ca893312a54cae7d5e13dfe91 + vout:ee6951dab4afbfa02cd832b4da4afedf vin0:0a3e0f7c75cb0842b95ed64d3b13ff64 vin1:e98ebd1ca893312a54cae7d5e13dfe91 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 + vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h vout:00000000000000005555555555555555 vin0:00000000000000005555555555555555 vin1:00000000000000005555555555555555 + vout:00000000000000007fff7fff7fff7fff vin0:00000000000000005555555555555555 vin1:00000000000000005555555555555555 fpsr=08000000 +sqrdmlah v0.4h, v1.4h, v2.4h vout:0000000000000000aaaaaaaaaaaaaaaa vin0:0000000000000000aaaaaaaaaaaaaaaa vin1:0000000000000000aaaaaaaaaaaaaaaa + vout:0000000000000000e38ee38ee38ee38e vin0:0000000000000000aaaaaaaaaaaaaaaa vin1:0000000000000000aaaaaaaaaaaaaaaa fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h vout:0000000000000000ffffffffffffffff vin0:0000000000000000ffffffffffffffff vin1:0000000000000000ffffffffffffffff + vout:0000000000000000ffffffffffffffff vin0:0000000000000000ffffffffffffffff vin1:0000000000000000ffffffffffffffff fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h vout:00000000000000000123012301230123 vin0:00000000000000000123012301230123 vin1:00000000000000000123012301230123 + vout:00000000000000000126012601260126 vin0:00000000000000000123012301230123 vin1:00000000000000000123012301230123 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h vout:0000000000000000fedcfedcfedcfedc vin0:0000000000000000fedcfedcfedcfedc vin1:0000000000000000fedcfedcfedcfedc + vout:0000000000000000fedffedffedffedf vin0:0000000000000000fedcfedcfedcfedc vin1:0000000000000000fedcfedcfedcfedc fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h vout:00000000000000003141314131413141 vin0:00000000000000003141314131413141 vin1:00000000000000003141314131413141 + vout:00000000000000004435443544354435 vin0:00000000000000003141314131413141 vin1:00000000000000003141314131413141 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h vout:00000000000000002718271827182718 vin0:00000000000000002718271827182718 vin1:00000000000000002718271827182718 + vout:00000000000000003309330933093309 vin0:00000000000000002718271827182718 vin1:00000000000000002718271827182718 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h[0] vout:e9b5f3f66b2e58c121a6c3476d21f1e5 vin0:63483da65c8c49d096084deb9ed0411e vin1:a81b6e33c572a86aacf29b0f395c98b4 + vout:0000000000000000772a84667fffbd58 vin0:63483da65c8c49d096084deb9ed0411e vin1:a81b6e33c572a86aacf29b0f395c98b4 fpsr=08000000 +sqrdmlah v0.8h, v1.8h, v2.8h[1] vout:6f07136773a2ead356428c5a66a2ec77 vin0:28bad218e4ebf159ff1f240eb3e1553f vin1:8404eb7f0cf4ca6fee8536da9dbf68bc + vout:4fc436a47ffff61256ef80007fffab07 vin0:28bad218e4ebf159ff1f240eb3e1553f vin1:8404eb7f0cf4ca6fee8536da9dbf68bc fpsr=08000000 +sqrdmlah v0.2s, v1.2s, v2.2s[2] vout:36b2a38dcef18acf0e0f01a829ba3c66 vin0:f078b65e01737fd22bfa8f668c8b14f4 vin1:57436a097df30b8daa927a03090dfc6d + vout:000000000000000039553356b81ed47b vin0:f078b65e01737fd22bfa8f668c8b14f4 vin1:57436a097df30b8daa927a03090dfc6d fpsr=00000000 +sqrdmlah v0.4s, v1.4s, v2.4s[3] vout:6d08ed19fa045f841810cd8c109ed568 vin0:1c4a678450562685769ab818a5b7985e vin1:b984aed62671e865e6f21d40fc7bc013 + vout:5d74fb0dcdc7dcb9d6c1ecfd425568e8 vin0:1c4a678450562685769ab818a5b7985e vin1:b984aed62671e865e6f21d40fc7bc013 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h[0] vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 + vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h[1] vout:00000000000000000000000055550000 vin0:00000000000000000000000055550000 vin1:00000000000000000000000055550000 + vout:0000000000000000000000007fff0000 vin0:00000000000000000000000055550000 vin1:00000000000000000000000055550000 fpsr=08000000 +sqrdmlah v0.4h, v1.4h, v2.4h[2] vout:00000000000000000000aaaa00000000 vin0:00000000000000000000aaaa00000000 vin1:00000000000000000000aaaa00000000 + vout:00000000000000000000e38e00000000 vin0:00000000000000000000aaaa00000000 vin1:00000000000000000000aaaa00000000 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h[3] vout:0000000000000000ffff000000000000 vin0:0000000000000000ffff000000000000 vin1:0000000000000000ffff000000000000 + vout:0000000000000000ffff000000000000 vin0:0000000000000000ffff000000000000 vin1:0000000000000000ffff000000000000 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h[0] vout:00000000000000000000000000000123 vin0:00000000000000000000000000000123 vin1:00000000000000000000000000000123 + vout:00000000000000000000000000000126 vin0:00000000000000000000000000000123 vin1:00000000000000000000000000000123 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h[1] vout:000000000000000000000000fedc0000 vin0:000000000000000000000000fedc0000 vin1:000000000000000000000000fedc0000 + vout:000000000000000000000000fedf0000 vin0:000000000000000000000000fedc0000 vin1:000000000000000000000000fedc0000 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h[2] vout:00000000000000000000314100000000 vin0:00000000000000000000314100000000 vin1:00000000000000000000314100000000 + vout:00000000000000000000443500000000 vin0:00000000000000000000314100000000 vin1:00000000000000000000314100000000 fpsr=00000000 +sqrdmlah v0.4h, v1.4h, v2.4h[3] vout:00000000000000002718000000000000 vin0:00000000000000002718000000000000 vin1:00000000000000002718000000000000 + vout:00000000000000003309000000000000 vin0:00000000000000002718000000000000 vin1:00000000000000002718000000000000 fpsr=00000000 +sqrdmlah h0, h1, h2 vout:acb722146c6cbfa9ea4a022e1d3d7dbb vin0:048612e51a468e36c51cdd8f87e12ab4 vin1:0c05cb6ebd128663d7568e3e8a3ac80e + vout:00000000000000000000000000006b11 vin0:048612e51a468e36c51cdd8f87e12ab4 vin1:0c05cb6ebd128663d7568e3e8a3ac80e fpsr=00000000 +sqrdmlah s0, s1, s2 vout:6489eab2c96df363d52c4330a7aae391 vin0:c1fbfd8f4d8698c2cb9dfb4ea5d18713 vin1:14575775bc3a12029d8e66ea90352a18 + vout:000000000000000000000000f66e187a vin0:c1fbfd8f4d8698c2cb9dfb4ea5d18713 vin1:14575775bc3a12029d8e66ea90352a18 fpsr=00000000 +sqrdmlah h3, h4, h5 vout:4784d95987cd4ed80c3ca578a32bd88e vin0:08aebee85fda964fbba02737f3c98220 vin1:837be65197abe2686b1fba2604afb8d5 + vout:00000000000000000000000000001e8b vin0:08aebee85fda964fbba02737f3c98220 vin1:837be65197abe2686b1fba2604afb8d5 fpsr=00000000 +sqrdmlah s3, s4, s5 vout:0aaa836b194e242cc5fc3ae904033357 vin0:8ca3e752c306df00caab752f630ff07e vin1:0e780c65c22b4ab8778d9ed6d9eb46ea + vout:000000000000000000000000e68a6e36 vin0:8ca3e752c306df00caab752f630ff07e vin1:0e780c65c22b4ab8778d9ed6d9eb46ea fpsr=00000000 +sqrdmlah h6, h7, h8 vout:61ff7d4df3b6ca8131f01866bd76c58f vin0:02dd0e32eecfc5fa2c3ffa1aebe6a4d2 vin1:69505d14b27d9d16f25b26e0042fa9fa + vout:000000000000000000000000000002d6 vin0:02dd0e32eecfc5fa2c3ffa1aebe6a4d2 vin1:69505d14b27d9d16f25b26e0042fa9fa fpsr=00000000 +sqrdmlah s6, s7, s8 vout:0088596389c893fd879d51d4c5c764db vin0:1e61c5ec52f79c6015e3c8dc7e9273bf vin1:47086cc3da642fa7130d662777beb4a9 + vout:0000000000000000000000003c30207f vin0:1e61c5ec52f79c6015e3c8dc7e9273bf vin1:47086cc3da642fa7130d662777beb4a9 fpsr=00000000 +sqrdmlah h9, h10, h11 vout:9a49ac115048d4c4f987fa170d3ce4dd vin0:9432a2e46543b956b819f459105730e9 vin1:5da3cfd6aea6558e0c28728e28dc3c9c + vout:0000000000000000000000000000fc06 vin0:9432a2e46543b956b819f459105730e9 vin1:5da3cfd6aea6558e0c28728e28dc3c9c fpsr=00000000 +sqrdmlah s9, s10, s11 vout:e4450ababbfae0f9bc3127138b19183c vin0:1755377e9a786f014a6592749579b0f4 vin1:5f2619b1a20662f012305efa0acd1475 + vout:000000000000000000000000821bedee vin0:1755377e9a786f014a6592749579b0f4 vin1:5f2619b1a20662f012305efa0acd1475 fpsr=00000000 +sqrdmlah h12, h13, h14 vout:918107c43ea20cc00420edac31a0d599 vin0:5cce191e65591384ff4cb613013cc685 vin1:0194ddb82b49abf059a93d4f11d611db + vout:0000000000000000000000000000cd94 vin0:5cce191e65591384ff4cb613013cc685 vin1:0194ddb82b49abf059a93d4f11d611db fpsr=00000000 +sqrdmlah s12, s13, s14 vout:570037914d04ab3d05d75ec6f616ee9a vin0:17a0dc273ba9f8030a52741849e54740 vin1:f6f2b14fbb3184b2141625713239066f + vout:00000000000000000000000013156a40 vin0:17a0dc273ba9f8030a52741849e54740 vin1:f6f2b14fbb3184b2141625713239066f fpsr=00000000 +sqrdmlah h15, h16, h17 vout:e8c72e865de41295f2db8f44cbbf37e2 vin0:fcd015ff8f2e73a3a0fae06860b606c7 vin1:f34428d9c8833f5b78fb29445f3bc8d7 + vout:000000000000000000000000000034f6 vin0:fcd015ff8f2e73a3a0fae06860b606c7 vin1:f34428d9c8833f5b78fb29445f3bc8d7 fpsr=00000000 +sqrdmlah s15, s16, s17 vout:f9da7f07e00794eb00b0940ba5e08516 vin0:be625608d5abd787f5c90ee73af5d7c0 vin1:ac8dd5bbc503330eb9dd5dab8e212ab7 + vout:00000000000000000000000080000000 vin0:be625608d5abd787f5c90ee73af5d7c0 vin1:ac8dd5bbc503330eb9dd5dab8e212ab7 fpsr=08000000 +sqrdmlah h18, h19, h20 vout:3d3cc0784c2f856363d9810079bbabd9 vin0:125934a781e479d33d431279cce48fce vin1:d4d14e592776b1ef0b40d58cb22d00b1 + vout:0000000000000000000000000000ab3e vin0:125934a781e479d33d431279cce48fce vin1:d4d14e592776b1ef0b40d58cb22d00b1 fpsr=00000000 +sqrdmlah s18, s19, s20 vout:69f2843d15223a224edb6a053a967ecf vin0:acb9433f079dacacabeb000208c90296 vin1:20162517609f0f22a1a7a4c9c0a51f6b + vout:000000000000000000000000363d52c9 vin0:acb9433f079dacacabeb000208c90296 vin1:20162517609f0f22a1a7a4c9c0a51f6b fpsr=00000000 +sqrdmlah h21, h22, h23 vout:31005fb9ada2074bf63a63fedcb4d29c vin0:3f871736dc9ac5357446eb65e4e703bb vin1:445ef059e641a1ccb097e047aacc5b89 + vout:0000000000000000000000000000d547 vin0:3f871736dc9ac5357446eb65e4e703bb vin1:445ef059e641a1ccb097e047aacc5b89 fpsr=00000000 +sqrdmlah s21, s22, s23 vout:4969e55289753f038f7980d1535979e5 vin0:80c745ef729f1792ccd7e987538166e1 vin1:f4ad41832c22ba116c949cea66e687ae + vout:0000000000000000000000007fffffff vin0:80c745ef729f1792ccd7e987538166e1 vin1:f4ad41832c22ba116c949cea66e687ae fpsr=08000000 +sqrdmlah h24, h25, h26 vout:e309aef8a605af130821eb96e737777e vin0:b5a9377eb31749ef710cf757885d2728 vin1:1f1030333fb8fa4b2feb05cb92ed4f4d + vout:00000000000000000000000000007fff vin0:b5a9377eb31749ef710cf757885d2728 vin1:1f1030333fb8fa4b2feb05cb92ed4f4d fpsr=08000000 +sqrdmlah s24, s25, s26 vout:928efefdf9f5ec8d5313bd01b82612e0 vin0:bc36ca100a4a3a7d5127ba1c529aa0bf vin1:9f043af6a1aed58f1ee978efa4b054d2 + vout:00000000000000000000000080000000 vin0:bc36ca100a4a3a7d5127ba1c529aa0bf vin1:9f043af6a1aed58f1ee978efa4b054d2 fpsr=08000000 +sqrdmlah h27, h28, h29 vout:2ad7482a960fb2b27014160ebbdb47e4 vin0:a7837c83faf3cb1d360794fec60222d6 vin1:61cd123e19cf1e2bb001f1161e946f5c + vout:00000000000000000000000000006633 vin0:a7837c83faf3cb1d360794fec60222d6 vin1:61cd123e19cf1e2bb001f1161e946f5c fpsr=00000000 +sqrdmlah s27, s28, s29 vout:7c4e1775412d1d47a8872cb61d8aca05 vin0:2993e139f7d64ff4532f9ae1d7da8010 vin1:19714a711ce1284318b88425f2de758f + vout:00000000000000000000000021a91e1a vin0:2993e139f7d64ff4532f9ae1d7da8010 vin1:19714a711ce1284318b88425f2de758f fpsr=00000000 +sqrdmlah h30, h31, h0 vout:3cf6fe426e1281712ef114ddd37570e8 vin0:f76b8d9773b81b24de24e0a879648e11 vin1:7af177f11da748fc8b9145fe16d0390f + vout:00000000000000000000000000003e1e vin0:f76b8d9773b81b24de24e0a879648e11 vin1:7af177f11da748fc8b9145fe16d0390f fpsr=00000000 +sqrdmlah s30, s31, s0 vout:1dd493f59184345437d5e366d0e20c30 vin0:c50f1401e45b82d3086a7a39a1e6217d vin1:3a542e238fe5d1793d1148867eb08f81 + vout:00000000000000000000000080000000 vin0:c50f1401e45b82d3086a7a39a1e6217d vin1:3a542e238fe5d1793d1148867eb08f81 fpsr=08000000 +sqrdmlah h0, h1, v2.h[0] vout:d4ec68f21f468712f7b8ab3708137382 vin0:478209dbbd84d92508847c7642a20df9 vin1:0b9c016be95f18de62bba1a11cc04c89 + vout:00000000000000000000000000007bdd vin0:478209dbbd84d92508847c7642a20df9 vin1:0b9c016be95f18de62bba1a11cc04c89 fpsr=00000000 +sqrdmlah s0, s1, v2.s[0] vout:1541139c8b1cd0d1a11d81326f4e7880 vin0:30c9028972f8733d11f7fa4450de2529 vin1:a1cd852d9cd970502d146432e64644c9 + vout:0000000000000000000000005f0dbde5 vin0:30c9028972f8733d11f7fa4450de2529 vin1:a1cd852d9cd970502d146432e64644c9 fpsr=00000000 +sqrdmlah h3, h4, v5.h[1] vout:94d7265949ca62b46a8a793cf9d5f0d1 vin0:35e7926e777aa43f56470887bfdd3daf vin1:b2ed4ecc1e172df2d3a0a41fce854ae7 + vout:0000000000000000000000000000d8f9 vin0:35e7926e777aa43f56470887bfdd3daf vin1:b2ed4ecc1e172df2d3a0a41fce854ae7 fpsr=00000000 +sqrdmlah s3, s4, v5.s[1] vout:09e14df041cdc14f0bf7ba2283e22a31 vin0:f0fdf0aee1dda4e888e2774acbc13287 vin1:f30110c432a534d0478d5d7e053a4e0c + vout:00000000000000000000000080000000 vin0:f0fdf0aee1dda4e888e2774acbc13287 vin1:f30110c432a534d0478d5d7e053a4e0c fpsr=08000000 +sqrdmlsh v0.4h, v1.4h, v2.4h vout:62bbc77143b71e92668b24fb9133bf52 vin0:9fedb2229a090d2c018b42f3d3ec8415 vin1:6c11edd5a106e2d655f9b97953917f46 + vout:0000000000000000658249dfadfa3a89 vin0:9fedb2229a090d2c018b42f3d3ec8415 vin1:6c11edd5a106e2d655f9b97953917f46 fpsr=00000000 +sqrdmlsh v0.8h, v1.8h, v2.8h vout:bf6982b029b396ea4f1e4ed5da99d2ee vin0:7b813bf15120fbc8683cbc58f8b23fca vin1:74876ac63afb7562c67d2c86fa7c09a3 + vout:8000800004519ac87df3665eda48ce21 vin0:7b813bf15120fbc8683cbc58f8b23fca vin1:74876ac63afb7562c67d2c86fa7c09a3 fpsr=08000000 +sqrdmlsh v0.2s, v1.2s, v2.2s vout:077815d35567232e66c997070e860c39 vin0:109cfa471afbe686e2ede96f8809f947 vin1:9ce5d1a297a56adb474e1bb03bc55073 + vout:000000000000000076fb5cdb468a5f5e vin0:109cfa471afbe686e2ede96f8809f947 vin1:9ce5d1a297a56adb474e1bb03bc55073 fpsr=00000000 +sqrdmlsh v0.4s, v1.4s, v2.4s vout:2a1f00ed91e9071d79112f6f64f5079c vin0:df63bd3c7359f634f791559ff8d88161 vin1:fba1981add7938e3067d74917c37833e + vout:2902119eb1066221797ea32c6be66494 vin0:df63bd3c7359f634f791559ff8d88161 vin1:fba1981add7938e3067d74917c37833e fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 + vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h vout:00000000000000005555555555555555 vin0:00000000000000005555555555555555 vin1:00000000000000005555555555555555 + vout:00000000000000001c721c721c721c72 vin0:00000000000000005555555555555555 vin1:00000000000000005555555555555555 fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h vout:0000000000000000aaaaaaaaaaaaaaaa vin0:0000000000000000aaaaaaaaaaaaaaaa vin1:0000000000000000aaaaaaaaaaaaaaaa + vout:00000000000000008000800080008000 vin0:0000000000000000aaaaaaaaaaaaaaaa vin1:0000000000000000aaaaaaaaaaaaaaaa fpsr=08000000 +sqrdmlsh v0.4h, v1.4h, v2.4h vout:0000000000000000ffffffffffffffff vin0:0000000000000000ffffffffffffffff vin1:0000000000000000ffffffffffffffff + vout:0000000000000000ffffffffffffffff vin0:0000000000000000ffffffffffffffff vin1:0000000000000000ffffffffffffffff fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h vout:00000000000000000123012301230123 vin0:00000000000000000123012301230123 vin1:00000000000000000123012301230123 + vout:00000000000000000120012001200120 vin0:00000000000000000123012301230123 vin1:00000000000000000123012301230123 fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h vout:0000000000000000fedcfedcfedcfedc vin0:0000000000000000fedcfedcfedcfedc vin1:0000000000000000fedcfedcfedcfedc + vout:0000000000000000fed9fed9fed9fed9 vin0:0000000000000000fedcfedcfedcfedc vin1:0000000000000000fedcfedcfedcfedc fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h vout:00000000000000003141314131413141 vin0:00000000000000003141314131413141 vin1:00000000000000003141314131413141 + vout:00000000000000001e4d1e4d1e4d1e4d vin0:00000000000000003141314131413141 vin1:00000000000000003141314131413141 fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h vout:00000000000000002718271827182718 vin0:00000000000000002718271827182718 vin1:00000000000000002718271827182718 + vout:00000000000000001b271b271b271b27 vin0:00000000000000002718271827182718 vin1:00000000000000002718271827182718 fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h[0] vout:9cdd1a32cd007ff7daac12cf3a64acbd vin0:e76fcc086aeb0414a9cd126c0869c6a0 vin1:d973ba438b80fdb556878af3ad4a4cb8 + vout:00000000000000000e5607c4355acf20 vin0:e76fcc086aeb0414a9cd126c0869c6a0 vin1:d973ba438b80fdb556878af3ad4a4cb8 fpsr=00000000 +sqrdmlsh v0.8h, v1.8h, v2.8h[1] vout:fa0ba48e9db3d6f2c0c135e244f24dfe vin0:71a4885bc70f501cf18441c67d4b9e45 vin1:95a6e59e2a7fabcb65b86284a1cb27a3 + vout:4dae8000800011e8b618664b7fff0610 vin0:71a4885bc70f501cf18441c67d4b9e45 vin1:95a6e59e2a7fabcb65b86284a1cb27a3 fpsr=08000000 +sqrdmlsh v0.2s, v1.2s, v2.2s[2] vout:aef4eeb358364f4add55d3bb09c439c9 vin0:3028339e0d3a0c468e8f584ceae94e7a vin1:e33fad8f313a964967940f284cfce9a3 + vout:000000000000000008f6e02b11e090c9 vin0:3028339e0d3a0c468e8f584ceae94e7a vin1:e33fad8f313a964967940f284cfce9a3 fpsr=00000000 +sqrdmlsh v0.4s, v1.4s, v2.4s[3] vout:6c9a8e07714d3d2264ecfe407d2043c1 vin0:d6006035af2e8bb7b3736be34585abe2 vin1:7742a77a117513548f9ea7c3a323665c + vout:7fffffff7fffffff7fffffff3c59ca12 vin0:d6006035af2e8bb7b3736be34585abe2 vin1:7742a77a117513548f9ea7c3a323665c fpsr=08000000 +sqrdmlsh v0.4h, v1.4h, v2.4h[0] vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 + vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h[1] vout:00000000000000000000000055550000 vin0:00000000000000000000000055550000 vin1:00000000000000000000000055550000 + vout:0000000000000000000000001c720000 vin0:00000000000000000000000055550000 vin1:00000000000000000000000055550000 fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h[2] vout:00000000000000000000aaaa00000000 vin0:00000000000000000000aaaa00000000 vin1:00000000000000000000aaaa00000000 + vout:00000000000000000000800000000000 vin0:00000000000000000000aaaa00000000 vin1:00000000000000000000aaaa00000000 fpsr=08000000 +sqrdmlsh v0.4h, v1.4h, v2.4h[3] vout:0000000000000000ffff000000000000 vin0:0000000000000000ffff000000000000 vin1:0000000000000000ffff000000000000 + vout:0000000000000000ffff000000000000 vin0:0000000000000000ffff000000000000 vin1:0000000000000000ffff000000000000 fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h[0] vout:00000000000000000000000000000123 vin0:00000000000000000000000000000123 vin1:00000000000000000000000000000123 + vout:00000000000000000000000000000120 vin0:00000000000000000000000000000123 vin1:00000000000000000000000000000123 fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h[1] vout:000000000000000000000000fedc0000 vin0:000000000000000000000000fedc0000 vin1:000000000000000000000000fedc0000 + vout:000000000000000000000000fed90000 vin0:000000000000000000000000fedc0000 vin1:000000000000000000000000fedc0000 fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h[2] vout:00000000000000000000314100000000 vin0:00000000000000000000314100000000 vin1:00000000000000000000314100000000 + vout:000000000000000000001e4d00000000 vin0:00000000000000000000314100000000 vin1:00000000000000000000314100000000 fpsr=00000000 +sqrdmlsh v0.4h, v1.4h, v2.4h[3] vout:00000000000000002718000000000000 vin0:00000000000000002718000000000000 vin1:00000000000000002718000000000000 + vout:00000000000000001b27000000000000 vin0:00000000000000002718000000000000 vin1:00000000000000002718000000000000 fpsr=00000000 +randV128: 256 calls, 266 iters +sqrdmlsh h0, h1, h2 vout:e70216ec5cbcf49e8a09cb539549408a vin0:182fa58322b1219295b48e6f81658922 vin1:05b265c33ff4760f125b3d3899837173 + vout:00000000000000000000000000007fff vin0:182fa58322b1219295b48e6f81658922 vin1:05b265c33ff4760f125b3d3899837173 fpsr=08000000 +sqrdmlsh s0, s1, s2 vout:aaba95edd88623fc68d5d5d393ccbadd vin0:40947ccd307b129e244ee56d2260de8c vin1:d2b5bf6419898df003e6fe7283eff6cb + vout:000000000000000000000000b51ee109 vin0:40947ccd307b129e244ee56d2260de8c vin1:d2b5bf6419898df003e6fe7283eff6cb fpsr=00000000 +sqrdmlsh h3, h4, h5 vout:3fa5c4d84771e518605a54f56dfe15b7 vin0:ddeb80fe57ce3c26f9fcb34432fe8249 vin1:3b3296ac6d6e4ba4d95578b09e02700d + vout:00000000000000000000000000007fff vin0:ddeb80fe57ce3c26f9fcb34432fe8249 vin1:3b3296ac6d6e4ba4d95578b09e02700d fpsr=08000000 +sqrdmlsh s3, s4, s5 vout:8fbc05b829b247cac4e8bba2bda13050 vin0:98bf1ba36919393bc4d999db7390839e vin1:44d5584589abea635dc49b10189f4c14 + vout:000000000000000000000000a766456f vin0:98bf1ba36919393bc4d999db7390839e vin1:44d5584589abea635dc49b10189f4c14 fpsr=00000000 +sqrdmlsh h6, h7, h8 vout:0b0b9f6018e987aeba97106bb88dbd45 vin0:9d5fe4af824eabd8f8f577d6f4dd0223 vin1:d6c08bc57f47f9ba34279d2f35968b0a + vout:0000000000000000000000000000bf39 vin0:9d5fe4af824eabd8f8f577d6f4dd0223 vin1:d6c08bc57f47f9ba34279d2f35968b0a fpsr=00000000 +sqrdmlsh s6, s7, s8 vout:05dbe25a9a3951f70e8dc8821606fcca vin0:fe1783322bd1f4a0a92e2587172ec23f vin1:22d9446284e6ae8126fc5ee9b286181e + vout:000000000000000000000000240f31d7 vin0:fe1783322bd1f4a0a92e2587172ec23f vin1:22d9446284e6ae8126fc5ee9b286181e fpsr=00000000 +sqrdmlsh h9, h10, h11 vout:3131620a2265f8c8f64df6cdcb51c286 vin0:6eeb8d90d86668b60a08b6d0cfc59797 vin1:dc2316810c4e5ddd66c8f02281b3c8f2 + vout:0000000000000000000000000000959d vin0:6eeb8d90d86668b60a08b6d0cfc59797 vin1:dc2316810c4e5ddd66c8f02281b3c8f2 fpsr=00000000 +sqrdmlsh s9, s10, s11 vout:4210b3d32431d146a45cad2eccb0e21a vin0:a2de962ffdd15c3e50063f9610e753cd vin1:b7a39486894259f1290e68be98626e2d + vout:000000000000000000000000da5fd688 vin0:a2de962ffdd15c3e50063f9610e753cd vin1:b7a39486894259f1290e68be98626e2d fpsr=00000000 +sqrdmlsh h12, h13, h14 vout:ee7d691b146130944d3d038a0b69312c vin0:4df433720fd7245dafacd5bdced9cd88 vin1:685c54d57186f6e2a353dba0ead5df70 + vout:00000000000000000000000000002455 vin0:4df433720fd7245dafacd5bdced9cd88 vin1:685c54d57186f6e2a353dba0ead5df70 fpsr=00000000 +sqrdmlsh s12, s13, s14 vout:e77b184466b967d624750ac67ebe825f vin0:2533f6bc813a13365b808a28feded669 vin1:a353e8d137de89d3071b5bad6b52ee61 + vout:0000000000000000000000007fb0f67c vin0:2533f6bc813a13365b808a28feded669 vin1:a353e8d137de89d3071b5bad6b52ee61 fpsr=00000000 +sqrdmlsh h15, h16, h17 vout:e11053b38ffdcd305e88d8c318f5aa57 vin0:dc9d7472c7c07dee870474bd92394516 vin1:1b8ce6e04f0e66e88ae9fdca101c70a3 + vout:00000000000000000000000000008000 vin0:dc9d7472c7c07dee870474bd92394516 vin1:1b8ce6e04f0e66e88ae9fdca101c70a3 fpsr=08000000 +sqrdmlsh s15, s16, s17 vout:913db0cc02f1b3c72ff97f68cd517cb9 vin0:850ae0642ddae0466041d5d9cb7738db vin1:2af3bd4b509e6608a513cfe482162be8 + vout:00000000000000000000000099a3f238 vin0:850ae0642ddae0466041d5d9cb7738db vin1:2af3bd4b509e6608a513cfe482162be8 fpsr=00000000 +sqrdmlsh h18, h19, h20 vout:b903f1b29f411487312d32f1bb069e61 vin0:95d26cc246074b10bda9f7bf92a71bac vin1:fcefa19f2c8a8cfd3989634f2a294a7c + vout:00000000000000000000000000008e47 vin0:95d26cc246074b10bda9f7bf92a71bac vin1:fcefa19f2c8a8cfd3989634f2a294a7c fpsr=00000000 +sqrdmlsh s18, s19, s20 vout:470818041ac5e9b218db305838ff3248 vin0:06ced856b4d04648a668c3da0fcbe652 vin1:39d4db0931b25e927a9632b68f624628 + vout:00000000000000000000000046e512d8 vin0:06ced856b4d04648a668c3da0fcbe652 vin1:39d4db0931b25e927a9632b68f624628 fpsr=00000000 +sqrdmlsh h21, h22, h23 vout:764f859cf68f4679dab3699f129680a9 vin0:fc95f5d55c34e70e2034036b2540d210 vin1:32746a5ace2a448f4d76dd08966fd815 + vout:00000000000000000000000000008000 vin0:fc95f5d55c34e70e2034036b2540d210 vin1:32746a5ace2a448f4d76dd08966fd815 fpsr=08000000 +sqrdmlsh s21, s22, s23 vout:b00b3cdf75747e60035ee161b2ddaa1e vin0:92478e7f987ac472db7137e460cce35a vin1:2915227d7d3b3371fe1c6a2981899c14 + vout:0000000000000000000000001280e25d vin0:92478e7f987ac472db7137e460cce35a vin1:2915227d7d3b3371fe1c6a2981899c14 fpsr=00000000 +sqrdmlsh h24, h25, h26 vout:7be936badd6630980aa27329b5b3ecd2 vin0:d2bc96d6b1a87f5bc30eedfc43f567c8 vin1:ded3251e3f2e1bf337f62011aebf77d2 + vout:00000000000000000000000000008bac vin0:d2bc96d6b1a87f5bc30eedfc43f567c8 vin1:ded3251e3f2e1bf337f62011aebf77d2 fpsr=00000000 +sqrdmlsh s24, s25, s26 vout:6c7f80e89ebd80a5e34bca20163ac21e vin0:e06c5cc8e1357d72cece7967d1f50cd5 vin1:4fd7e326d29b74541ae5bf20bcc2f9c2 + vout:000000000000000000000000fe0b135f vin0:e06c5cc8e1357d72cece7967d1f50cd5 vin1:4fd7e326d29b74541ae5bf20bcc2f9c2 fpsr=00000000 +sqrdmlsh h27, h28, h29 vout:190c026f4f4108bb97f152ac79a338e2 vin0:082a07b97ea580d954e0244c1dcf60e0 vin1:b87fb552d02120cc96fce910c815b7b5 + vout:00000000000000000000000000006f99 vin0:082a07b97ea580d954e0244c1dcf60e0 vin1:b87fb552d02120cc96fce910c815b7b5 fpsr=00000000 +sqrdmlsh s27, s28, s29 vout:35954eb164b81a015d181eb0d13422c0 vin0:fefa2b0bfdbeddb488c900901dc5368c vin1:cccf2d05af86747edec1b4c5c4fa8650 + vout:000000000000000000000000deee4fe6 vin0:fefa2b0bfdbeddb488c900901dc5368c vin1:cccf2d05af86747edec1b4c5c4fa8650 fpsr=00000000 +sqrdmlsh h30, h31, h0 vout:751dfa1352e40c98674442111330555e vin0:76df5c23d344e7279f0d2317c41d637d vin1:40c9e0a4e28cc38e27b63222a6b73935 + vout:000000000000000000000000000028e7 vin0:76df5c23d344e7279f0d2317c41d637d vin1:40c9e0a4e28cc38e27b63222a6b73935 fpsr=00000000 +sqrdmlsh s30, s31, s0 vout:23de2e6573f9f357cd2f9fc5071aba58 vin0:c8746293ddf96221a55f780d618fa50b vin1:16458560adcdd7091db23c3834cb4d4d + vout:000000000000000000000000dedd6a91 vin0:c8746293ddf96221a55f780d618fa50b vin1:16458560adcdd7091db23c3834cb4d4d fpsr=00000000 +sqrdmlsh h0, h1, v2.h[0] vout:17d247361590a45a8c419b68e9c69d73 vin0:23de85e7f3ba676cd7ca3327879cb597 vin1:9a985ec5f0031343f3185309c7b360a0 + vout:0000000000000000000000000000d59f vin0:23de85e7f3ba676cd7ca3327879cb597 vin1:9a985ec5f0031343f3185309c7b360a0 fpsr=00000000 +sqrdmlsh s0, s1, v2.s[0] vout:e2e823f1fc15de5d0fe0ad1832a0f513 vin0:0a452b2c674cbddfcbf508515b068b9e vin1:6109ca6565cab2e77d69475df9b640b0 + vout:0000000000000000000000003719b567 vin0:0a452b2c674cbddfcbf508515b068b9e vin1:6109ca6565cab2e77d69475df9b640b0 fpsr=00000000 +sqrdmlsh h3, h4, v5.h[1] vout:ddb98a28084c634f63bfc3013161828e vin0:7e7d09937d452c872eb7cf99a14da407 vin1:94e09c4d7a2fb98594259c37dc0df227 + vout:00000000000000000000000000008000 vin0:7e7d09937d452c872eb7cf99a14da407 vin1:94e09c4d7a2fb98594259c37dc0df227 fpsr=08000000 +sqrdmlsh s3, s4, v5.s[1] vout:bc4a103eacf98853bc63f107d94d1889 vin0:348ab47fa96b098734939ce54eb5d374 vin1:e6246ae1a4f77a426cd3657964fa47a9 + vout:0000000000000000000000009661afff vin0:348ab47fa96b098734939ce54eb5d374 vin1:e6246ae1a4f77a426cd3657964fa47a9 fpsr=00000000 diff --git a/none/tests/arm64/simd_v81.vgtest b/none/tests/arm64/simd_v81.vgtest new file mode 100644 index 000000000..38549f2df --- /dev/null +++ b/none/tests/arm64/simd_v81.vgtest @@ -0,0 +1,3 @@ +prog: simd_v81 +prereq: test -x simd_v81 +vgopts: -q -- 2.11.4.GIT