From a62597cfcaf5fc9c435dd723872145cc39d65887 Mon Sep 17 00:00:00 2001 From: Erik Lindahl Date: Tue, 24 Nov 2020 08:23:37 +0000 Subject: [PATCH] Make sure frexp() returns correct for argument 0.0 Previous functions in gromacs have never called frexp with argument 0.0, but cbrt() needs this. This will not have caused any bugs, but since it results in FP exceptions in debug builds we want to have it fixed so our unit tests work. This change adds the correct behavior for all x86 SIMD instruction sets, and it has also been tested on all of them in both single and double, with the exception for MIC (which should be correct, but we cannot test it). The ARM SIMD flavors have not yet been fixed yet since the ARM build fails due to an apparent nblib error. Once that is fixed, I will add it in a separate change and close the issue. Refs #3773. --- .../simd/impl_arm_neon/impl_arm_neon_simd_float.h | 3 +- .../impl_arm_neon_asimd_simd_double.h | 3 +- .../simd/impl_arm_sve/impl_arm_sve_simd_double.h | 1 + .../simd/impl_arm_sve/impl_arm_sve_simd_float.h | 1 + .../simd/impl_ibm_vmx/impl_ibm_vmx_simd_float.h | 18 ++++++-- .../simd/impl_ibm_vsx/impl_ibm_vsx_simd_double.h | 13 +++++- .../simd/impl_ibm_vsx/impl_ibm_vsx_simd_float.h | 11 ++++- .../impl_reference/impl_reference_simd_double.h | 8 ++++ .../impl_reference/impl_reference_simd_float.h | 8 ++++ .../impl_x86_avx2_256_simd_double.h | 27 ++++++++---- .../impl_x86_avx2_256_simd_float.h | 22 ++++++++-- .../impl_x86_avx_256_simd_double.h | 49 +++++++++++++++------- .../impl_x86_avx_256/impl_x86_avx_256_simd_float.h | 42 ++++++++++++------- .../impl_x86_avx_512_simd_double.h | 40 ++++++++++++++++-- .../impl_x86_avx_512/impl_x86_avx_512_simd_float.h | 35 ++++++++++++++-- .../simd/impl_x86_mic/impl_x86_mic_simd_double.h | 39 ++++++++++++++--- .../simd/impl_x86_mic/impl_x86_mic_simd_float.h | 37 +++++++++++++--- .../simd/impl_x86_sse2/impl_x86_sse2_simd_double.h | 27 ++++++++---- .../simd/impl_x86_sse2/impl_x86_sse2_simd_float.h | 25 ++++++++--- src/gromacs/simd/simd_math.h | 18 +++++--- src/gromacs/simd/tests/simd.cpp | 8 ++-- src/gromacs/simd/tests/simd.h | 3 +- src/gromacs/simd/tests/simd_floatingpoint.cpp | 37 +++++++++++++--- 23 files changed, 378 insertions(+), 97 deletions(-) diff --git a/src/gromacs/simd/impl_arm_neon/impl_arm_neon_simd_float.h b/src/gromacs/simd/impl_arm_neon/impl_arm_neon_simd_float.h index 3e486a7a3e..888a4df3fd 100644 --- a/src/gromacs/simd/impl_arm_neon/impl_arm_neon_simd_float.h +++ b/src/gromacs/simd/impl_arm_neon/impl_arm_neon_simd_float.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by + * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -343,6 +343,7 @@ static inline SimdFloat gmx_simdcall min(SimdFloat a, SimdFloat b) // Round and trunc operations are defined at the end of this file, since they // need to use float-to-integer and integer-to-float conversions. +template static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent) { const int32x4_t exponentMask = vdupq_n_s32(0x7F800000); diff --git a/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd_simd_double.h b/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd_simd_double.h index 2315ea0a5c..ee263238be 100644 --- a/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd_simd_double.h +++ b/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd_simd_double.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by + * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -307,6 +307,7 @@ static inline SimdDouble gmx_simdcall trunc(SimdDouble x) return { vrndq_f64(x.simdInternal_) }; } +template static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent) { const float64x2_t exponentMask = float64x2_t(vdupq_n_s64(0x7FF0000000000000LL)); diff --git a/src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_double.h b/src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_double.h index fcddae7add..04cf849e46 100644 --- a/src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_double.h +++ b/src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_double.h @@ -376,6 +376,7 @@ static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b) // Round and trunc operations are defined at the end of this file, since they // need to use double-to-integer and integer-to-double conversions. +template static inline SimdDouble gmx_simdcall frexp(SimdDouble value, SimdDInt32* exponent) { svbool_t pg = svptrue_b64(); diff --git a/src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_float.h b/src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_float.h index cb0e4f60c1..ffa53071d2 100644 --- a/src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_float.h +++ b/src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_float.h @@ -379,6 +379,7 @@ static inline SimdFloat gmx_simdcall min(SimdFloat a, SimdFloat b) // Round and trunc operations are defined at the end of this file, since they // need to use float-to-integer and integer-to-float conversions. +template static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent) { svbool_t pg = svptrue_b32(); diff --git a/src/gromacs/simd/impl_ibm_vmx/impl_ibm_vmx_simd_float.h b/src/gromacs/simd/impl_ibm_vmx/impl_ibm_vmx_simd_float.h index 159bac73f7..4284e004bd 100644 --- a/src/gromacs/simd/impl_ibm_vmx/impl_ibm_vmx_simd_float.h +++ b/src/gromacs/simd/impl_ibm_vmx/impl_ibm_vmx_simd_float.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by + * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -97,7 +97,7 @@ public: SimdFBool() {} SimdFBool(bool b) : - simdInternal_(reinterpret_cast<__vector vmxBool int>(vec_splat_u32(b ? 0xFFFFFFFF : 0))) + simdInternal_(reinterpret_cast<__vector vmxBool int>(vec_splats(b ? 0xFFFFFFFF : 0))) { } @@ -288,6 +288,7 @@ static inline SimdFloat gmx_simdcall trunc(SimdFloat x) return { vec_trunc(x.simdInternal_) }; } +template static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent) { // Generate constants without memory operations @@ -299,13 +300,22 @@ static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent const SimdFloat half(0.5F); __vector signed int iExponent; + __vector vmxBool int valueIsZero = + vec_cmpeq(value.simdInternal_, reinterpret_cast<__vector float>(vec_splat_u32(0))); + iExponent = vec_and(reinterpret_cast<__vector signed int>(value.simdInternal_), exponentMask); iExponent = vec_sr(iExponent, vec_add(vec_splat_u32(15), vec_splat_u32(8))); iExponent = vec_sub(iExponent, exponentBias); + iExponent = vec_andc(iExponent, reinterpret_cast<__vector int>(valueIsZero)); + + __vector float result = + vec_or(vec_andc(value.simdInternal_, reinterpret_cast<__vector float>(exponentMask)), + half.simdInternal_); + result = vec_sel(result, value.simdInternal_, valueIsZero); + exponent->simdInternal_ = iExponent; - return { vec_or(vec_andc(value.simdInternal_, reinterpret_cast<__vector float>(exponentMask)), - half.simdInternal_) }; + return { result }; } template diff --git a/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_double.h b/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_double.h index d21cc14e39..3c6f3009b1 100644 --- a/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_double.h +++ b/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_double.h @@ -349,6 +349,7 @@ static inline SimdDouble gmx_simdcall trunc(SimdDouble x) return { vec_trunc(x.simdInternal_) }; } +template static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent) { const __vector double exponentMask = @@ -357,6 +358,9 @@ static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent) const __vector double half = vec_splats(0.5); __vector signed int iExponent; + __vector vsxBool long long valueIsZero = + vec_cmpeq(value.simdInternal_, reinterpret_cast<__vector double>(vec_splats(0.0))); + iExponent = reinterpret_cast<__vector signed int>(vec_and(value.simdInternal_, exponentMask)); // The data is in the upper half of each double (corresponding to elements 1 and 3). // First shift 52-32=20bits, and then permute to swap element 0 with 1 and element 2 with 3 @@ -366,10 +370,15 @@ static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent) const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 }; iExponent = vec_perm(iExponent, iExponent, perm); #endif - iExponent = vec_sub(iExponent, exponentBias); + iExponent = vec_sub(iExponent, exponentBias); + iExponent = vec_andc(iExponent, reinterpret_cast<__vector int>(valueIsZero)); + + __vector double result = vec_or(vec_andc(value.simdInternal_, exponentMask), half); + result = vec_sel(result, value.simdInternal_, valueIsZero); + exponent->simdInternal_ = iExponent; - return { vec_or(vec_andc(value.simdInternal_, exponentMask), half) }; + return { result }; } template diff --git a/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_float.h b/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_float.h index 04504ffbde..ab8932d0fc 100644 --- a/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_float.h +++ b/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_float.h @@ -320,6 +320,7 @@ static inline SimdFloat gmx_simdcall trunc(SimdFloat x) return { vec_trunc(x.simdInternal_) }; } +template static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent) { const __vector float exponentMask = reinterpret_cast<__vector float>(vec_splats(0x7F800000U)); @@ -327,11 +328,19 @@ static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent const __vector float half = vec_splats(0.5F); __vector signed int iExponent; + __vector vsxBool int valueIsZero = + vec_cmpeq(value.simdInternal_, reinterpret_cast<__vector float>(vec_splats(0.0))); + iExponent = reinterpret_cast<__vector signed int>(vec_and(value.simdInternal_, exponentMask)); iExponent = vec_sub(vec_sr(iExponent, vec_splats(23U)), exponentBias); + iExponent = vec_andc(iExponent, reinterpret_cast<__vector int>(valueIsZero)); + + __vector float result = vec_or(vec_andc(value.simdInternal_, exponentMask), half); + result = vec_sel(result, value.simdInternal_, valueIsZero); + exponent->simdInternal_ = iExponent; - return { vec_or(vec_andc(value.simdInternal_, exponentMask), half) }; + return { result }; } template diff --git a/src/gromacs/simd/impl_reference/impl_reference_simd_double.h b/src/gromacs/simd/impl_reference/impl_reference_simd_double.h index d8a698e393..6640eb82f2 100644 --- a/src/gromacs/simd/impl_reference/impl_reference_simd_double.h +++ b/src/gromacs/simd/impl_reference/impl_reference_simd_double.h @@ -814,10 +814,18 @@ static inline SimdDouble gmx_simdcall trunc(SimdDouble a) /*! \brief Extract (integer) exponent and fraction from double precision SIMD. * + * \tparam opt By default this function behaves like the standard + * library such that frexp(+-0,exp) returns +-0 and + * stores 0 in the exponent when value is 0. If you + * know the argument is always nonzero, you can set + * the template parameter to MathOptimization::Unsafe + * to make it slightly faster. + * * \param value Floating-point value to extract from * \param[out] exponent Returned exponent of value, integer SIMD format. * \return Fraction of value, floating-point SIMD format. */ +template static inline SimdDouble gmx_simdcall frexp(SimdDouble value, SimdDInt32* exponent) { SimdDouble fraction; diff --git a/src/gromacs/simd/impl_reference/impl_reference_simd_float.h b/src/gromacs/simd/impl_reference/impl_reference_simd_float.h index b0ed1105bc..aa740c7fec 100644 --- a/src/gromacs/simd/impl_reference/impl_reference_simd_float.h +++ b/src/gromacs/simd/impl_reference/impl_reference_simd_float.h @@ -807,10 +807,18 @@ static inline SimdFloat gmx_simdcall trunc(SimdFloat a) /*! \brief Extract (integer) exponent and fraction from single precision SIMD. * + * \tparam opt By default this function behaves like the standard + * library such that frexp(+-0,exp) returns +-0 and + * stores 0 in the exponent when value is 0. If you + * know the argument is always nonzero, you can set + * the template parameter to MathOptimization::Unsafe + * to make it slightly faster. + * * \param value Floating-point value to extract from * \param[out] exponent Returned exponent of value, integer SIMD format. * \return Fraction of value, floating-point SIMD format. */ +template static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent) { SimdFloat fraction; diff --git a/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd_double.h b/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd_double.h index 56253042a4..8689654bf7 100644 --- a/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd_double.h +++ b/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd_double.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015,2017,2019, by the GROMACS development team, led by + * Copyright (c) 2014,2015,2017,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -75,6 +75,7 @@ static inline SimdDBool gmx_simdcall testBits(SimdDouble a) return { _mm256_castsi256_pd(res) }; } +template static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent) { const __m256d exponentMask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FF0000000000000LL)); @@ -82,17 +83,27 @@ static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent) const __m256i exponentBias = _mm256_set1_epi64x(1022LL); // add 1 to make our definition identical to frexp() const __m256d half = _mm256_set1_pd(0.5); - __m256i iExponent; - __m128i iExponent128; - iExponent = _mm256_castpd_si256(_mm256_and_pd(value.simdInternal_, exponentMask)); - iExponent = _mm256_sub_epi64(_mm256_srli_epi64(iExponent, 52), exponentBias); - iExponent = _mm256_shuffle_epi32(iExponent, _MM_SHUFFLE(3, 1, 2, 0)); + __m256i iExponent = _mm256_castpd_si256(_mm256_and_pd(value.simdInternal_, exponentMask)); + iExponent = _mm256_sub_epi64(_mm256_srli_epi64(iExponent, 52), exponentBias); + + __m256d result = _mm256_or_pd(_mm256_and_pd(value.simdInternal_, mantissaMask), half); + + if (opt == MathOptimization::Safe) + { + __m256d valueIsZero = _mm256_cmp_pd(_mm256_setzero_pd(), value.simdInternal_, _CMP_EQ_OQ); + // Set the 64-bit-fields of "iExponent" to 0-bits if the corresponding input value was +-0.0 + iExponent = _mm256_andnot_si256(_mm256_castpd_si256(valueIsZero), iExponent); + // Set result to +-0 if the corresponding input value was +-0 + result = _mm256_blendv_pd(result, value.simdInternal_, valueIsZero); + } - iExponent128 = _mm256_extractf128_si256(iExponent, 1); + // Shuffle exponent so that 32-bit-fields 0 & 1 contain the relevant exponent values to return + iExponent = _mm256_shuffle_epi32(iExponent, _MM_SHUFFLE(3, 1, 2, 0)); + __m128i iExponent128 = _mm256_extractf128_si256(iExponent, 1); exponent->simdInternal_ = _mm_unpacklo_epi64(_mm256_castsi256_si128(iExponent), iExponent128); - return { _mm256_or_pd(_mm256_and_pd(value.simdInternal_, mantissaMask), half) }; + return { result }; } template diff --git a/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd_float.h b/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd_float.h index 76ecda4285..7c027b10af 100644 --- a/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd_float.h +++ b/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256_simd_float.h @@ -89,18 +89,32 @@ static inline SimdFBool gmx_simdcall testBits(SimdFloat a) return { _mm256_castsi256_ps(res) }; } +template static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent) { const __m256 exponentMask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7F800000)); const __m256 mantissaMask = _mm256_castsi256_ps(_mm256_set1_epi32(0x807FFFFF)); const __m256i exponentBias = _mm256_set1_epi32(126); // add 1 to make our definition identical to frexp() const __m256 half = _mm256_set1_ps(0.5); - __m256i iExponent; - iExponent = _mm256_castps_si256(_mm256_and_ps(value.simdInternal_, exponentMask)); - exponent->simdInternal_ = _mm256_sub_epi32(_mm256_srli_epi32(iExponent, 23), exponentBias); + __m256i iExponent = _mm256_castps_si256(_mm256_and_ps(value.simdInternal_, exponentMask)); + iExponent = _mm256_sub_epi32(_mm256_srli_epi32(iExponent, 23), exponentBias); + + // Combine mantissa and exponent for result + __m256 result = _mm256_or_ps(_mm256_and_ps(value.simdInternal_, mantissaMask), half); + + if (opt == MathOptimization::Safe) + { + __m256 valueIsZero = _mm256_cmp_ps(_mm256_setzero_ps(), value.simdInternal_, _CMP_EQ_OQ); + // If value was non-zero, use the exponent we calculated, otherwise set return-value exponent to zero. + iExponent = _mm256_andnot_si256(_mm256_castps_si256(valueIsZero), iExponent); + // set the result to zero for all elements where the input value was zero. + result = _mm256_blendv_ps(result, value.simdInternal_, valueIsZero); + } + + exponent->simdInternal_ = iExponent; - return { _mm256_or_ps(_mm256_and_ps(value.simdInternal_, mantissaMask), half) }; + return { result }; } template diff --git a/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_simd_double.h b/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_simd_double.h index e0984d0d05..db17ac26bd 100644 --- a/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_simd_double.h +++ b/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_simd_double.h @@ -303,26 +303,47 @@ static inline SimdDouble gmx_simdcall trunc(SimdDouble x) // Override for AVX2 and higher #if GMX_SIMD_X86_AVX_256 +template static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent) { const __m256d exponentMask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FF0000000000000LL)); const __m256d mantissaMask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x800FFFFFFFFFFFFFLL)); const __m256d half = _mm256_set1_pd(0.5); const __m128i exponentBias = _mm_set1_epi32(1022); // add 1 to make our definition identical to frexp() - __m256i iExponent; - __m128i iExponentLow, iExponentHigh; - - iExponent = _mm256_castpd_si256(_mm256_and_pd(value.simdInternal_, exponentMask)); - iExponentHigh = _mm256_extractf128_si256(iExponent, 0x1); - iExponentLow = _mm256_castsi256_si128(iExponent); - iExponentLow = _mm_srli_epi64(iExponentLow, 52); - iExponentHigh = _mm_srli_epi64(iExponentHigh, 52); - iExponentLow = _mm_shuffle_epi32(iExponentLow, _MM_SHUFFLE(1, 1, 2, 0)); - iExponentHigh = _mm_shuffle_epi32(iExponentHigh, _MM_SHUFFLE(2, 0, 1, 1)); - iExponentLow = _mm_or_si128(iExponentLow, iExponentHigh); - exponent->simdInternal_ = _mm_sub_epi32(iExponentLow, exponentBias); - - return { _mm256_or_pd(_mm256_and_pd(value.simdInternal_, mantissaMask), half) }; + + __m256i iExponent = _mm256_castpd_si256(_mm256_and_pd(value.simdInternal_, exponentMask)); + __m128i iExponentHigh = _mm256_extractf128_si256(iExponent, 0x1); + __m128i iExponentLow = _mm256_castsi256_si128(iExponent); + iExponentLow = _mm_srli_epi64(iExponentLow, 52); + iExponentHigh = _mm_srli_epi64(iExponentHigh, 52); + iExponentLow = _mm_shuffle_epi32(iExponentLow, _MM_SHUFFLE(1, 1, 2, 0)); + iExponentHigh = _mm_shuffle_epi32(iExponentHigh, _MM_SHUFFLE(2, 0, 1, 1)); + // We need to store the return in a 128-bit integer variable, so reuse iExponentLow for both + iExponentLow = _mm_or_si128(iExponentLow, iExponentHigh); + iExponentLow = _mm_sub_epi32(iExponentLow, exponentBias); + + __m256d result = _mm256_or_pd(_mm256_and_pd(value.simdInternal_, mantissaMask), half); + + if (opt == MathOptimization::Safe) + { + __m256d valueIsZero = _mm256_cmp_pd(_mm256_setzero_pd(), value.simdInternal_, _CMP_EQ_OQ); + // This looks more complex than it is: the valueIsZero variable contains 4x 64-bit double + // precision fields, but a bit below we'll need a corresponding integer variable with 4x + // 32-bit fields. Since AVX1 does not support shuffling across the upper/lower 128-bit + // lanes, we need to extract those first, and then shuffle between two 128-bit variables. + __m128i iValueIsZero = _mm_castps_si128(_mm_shuffle_ps( + _mm256_extractf128_ps(_mm256_castpd_ps(valueIsZero), 0x0), + _mm256_extractf128_ps(_mm256_castpd_ps(valueIsZero), 0x1), _MM_SHUFFLE(2, 0, 2, 0))); + + // Set exponent to 0 when input value was zero + iExponentLow = _mm_andnot_si128(iValueIsZero, iExponentLow); + + // Set result to +-0 if the corresponding input value was +-0 + result = _mm256_blendv_pd(result, value.simdInternal_, valueIsZero); + } + exponent->simdInternal_ = iExponentLow; + + return { result }; } template diff --git a/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_simd_float.h b/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_simd_float.h index 7f219d9937..8bccb00094 100644 --- a/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_simd_float.h +++ b/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_simd_float.h @@ -285,26 +285,40 @@ static inline SimdFloat gmx_simdcall trunc(SimdFloat x) // Override for AVX2 and higher #if GMX_SIMD_X86_AVX_256 +template static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent) { const __m256 exponentMask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7F800000)); const __m256 mantissaMask = _mm256_castsi256_ps(_mm256_set1_epi32(0x807FFFFF)); const __m256 half = _mm256_set1_ps(0.5); const __m128i exponentBias = _mm_set1_epi32(126); // add 1 to make our definition identical to frexp() - __m256i iExponent; - __m128i iExponentLow, iExponentHigh; - - iExponent = _mm256_castps_si256(_mm256_and_ps(value.simdInternal_, exponentMask)); - iExponentHigh = _mm256_extractf128_si256(iExponent, 0x1); - iExponentLow = _mm256_castsi256_si128(iExponent); - iExponentLow = _mm_srli_epi32(iExponentLow, 23); - iExponentHigh = _mm_srli_epi32(iExponentHigh, 23); - iExponentLow = _mm_sub_epi32(iExponentLow, exponentBias); - iExponentHigh = _mm_sub_epi32(iExponentHigh, exponentBias); - iExponent = _mm256_castsi128_si256(iExponentLow); - exponent->simdInternal_ = _mm256_insertf128_si256(iExponent, iExponentHigh, 0x1); - - return { _mm256_or_ps(_mm256_and_ps(value.simdInternal_, mantissaMask), half) }; + + __m256i iExponent = _mm256_castps_si256(_mm256_and_ps(value.simdInternal_, exponentMask)); + __m128i iExponentHigh = _mm256_extractf128_si256(iExponent, 0x1); + __m128i iExponentLow = _mm256_castsi256_si128(iExponent); + iExponentLow = _mm_srli_epi32(iExponentLow, 23); + iExponentHigh = _mm_srli_epi32(iExponentHigh, 23); + iExponentLow = _mm_sub_epi32(iExponentLow, exponentBias); + iExponentHigh = _mm_sub_epi32(iExponentHigh, exponentBias); + iExponent = _mm256_castsi128_si256(iExponentLow); + iExponent = _mm256_insertf128_si256(iExponent, iExponentHigh, 0x1); + + __m256 result = _mm256_or_ps(_mm256_and_ps(value.simdInternal_, mantissaMask), half); + + if (opt == MathOptimization::Safe) + { + __m256 valueIsZero = _mm256_cmp_ps(_mm256_setzero_ps(), value.simdInternal_, _CMP_EQ_OQ); + // Set the upper/lower 64-bit-fields of "iExponent" to 0-bits if the corresponding input value was +-0.0 + // ... but we need to do the actual andnot operation as float, since AVX1 does not support integer ops. + iExponent = _mm256_castps_si256(_mm256_andnot_ps(valueIsZero, _mm256_castsi256_ps(iExponent))); + + // Set result to +-0 if the corresponding input value was +-0 + result = _mm256_blendv_ps(result, value.simdInternal_, valueIsZero); + } + + exponent->simdInternal_ = iExponent; + + return { result }; } template diff --git a/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_double.h b/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_double.h index 6ef1b20feb..8dd20c2e6c 100644 --- a/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_double.h +++ b/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_double.h @@ -291,14 +291,46 @@ static inline SimdDouble gmx_simdcall trunc(SimdDouble x) #endif } +template static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent) { - __m512d rExponent = _mm512_getexp_pd(value.simdInternal_); - __m256i iExponent = _mm512_cvtpd_epi32(rExponent); + __m512d rExponent; + __m256i iExponent; + __m512d result; - exponent->simdInternal_ = _mm256_add_epi32(iExponent, _mm256_set1_epi32(1)); + if (opt == MathOptimization::Safe) + { + // For the safe branch, we use the masked operations to only assign results if the + // input value was nonzero, and otherwise set exponent to 0, and the fraction to the input (+-0). + __mmask8 valueIsNonZero = + _mm512_cmp_pd_mask(_mm512_setzero_pd(), value.simdInternal_, _CMP_NEQ_OQ); + rExponent = _mm512_maskz_getexp_pd(valueIsNonZero, value.simdInternal_); + + // AVX512F does not contain any function to use masking when adding 1 to a 256-bit register + // (that comes with AVX512VL), so to work around this we create an integer -1 value, and + // use masking in the _conversion_ instruction where it is supported. When we later add + // 1 to all fields, the files that were formerly -1 (corresponding to zero exponent) + // will be assigned -1 + 1 = 0. + iExponent = _mm512_mask_cvtpd_epi32(_mm256_set1_epi32(-1), valueIsNonZero, rExponent); + iExponent = _mm256_add_epi32(iExponent, _mm256_set1_epi32(1)); + + // Set result to value (+-0) when it is zero. + result = _mm512_mask_getmant_pd(value.simdInternal_, valueIsNonZero, value.simdInternal_, + _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); + } + else + { + // For the fast branch, it's the user's responsibility to make sure never to call the + // function with input values of +-0.0 + rExponent = _mm512_getexp_pd(value.simdInternal_); + iExponent = _mm512_cvtpd_epi32(rExponent); + iExponent = _mm256_add_epi32(iExponent, _mm256_set1_epi32(1)); + + result = _mm512_getmant_pd(value.simdInternal_, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); + } - return { _mm512_getmant_pd(value.simdInternal_, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src) }; + exponent->simdInternal_ = iExponent; + return { result }; } template diff --git a/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_float.h b/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_float.h index ee025bfa96..6ef6b774d3 100644 --- a/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_float.h +++ b/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_float.h @@ -291,14 +291,41 @@ static inline SimdFloat gmx_simdcall trunc(SimdFloat x) #endif } +template static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent) { - __m512 rExponent = _mm512_getexp_ps(value.simdInternal_); - __m512i iExponent = _mm512_cvtps_epi32(rExponent); + __m512 rExponent; + __m512i iExponent; + __m512 result; + + if (opt == MathOptimization::Safe) + { + // For the safe branch, we use the masked operations to only assign results if the + // input value was nonzero, and otherwise set exponent to 0, and the fraction to the input (+-0). + __mmask16 valueIsNonZero = + _mm512_cmp_ps_mask(_mm512_setzero_ps(), value.simdInternal_, _CMP_NEQ_OQ); + rExponent = _mm512_mask_getexp_ps(_mm512_setzero_ps(), valueIsNonZero, value.simdInternal_); + iExponent = _mm512_cvtps_epi32(rExponent); + iExponent = _mm512_mask_add_epi32(iExponent, valueIsNonZero, iExponent, _mm512_set1_epi32(1)); + + // Set result to input value when the latter is +-0 + result = _mm512_mask_getmant_ps(value.simdInternal_, valueIsNonZero, value.simdInternal_, + _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); + } + else + { + // For the fast branch, it's the user's responsibility to make sure never to call the + // function with input values of +-0.0 + rExponent = _mm512_getexp_ps(value.simdInternal_); + iExponent = _mm512_cvtps_epi32(rExponent); + iExponent = _mm512_add_epi32(iExponent, _mm512_set1_epi32(1)); + + result = _mm512_getmant_ps(value.simdInternal_, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); + } - exponent->simdInternal_ = _mm512_add_epi32(iExponent, _mm512_set1_epi32(1)); + exponent->simdInternal_ = iExponent; - return { _mm512_getmant_ps(value.simdInternal_, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src) }; + return { result }; } template diff --git a/src/gromacs/simd/impl_x86_mic/impl_x86_mic_simd_double.h b/src/gromacs/simd/impl_x86_mic/impl_x86_mic_simd_double.h index d549bd83c8..7fe86f9726 100644 --- a/src/gromacs/simd/impl_x86_mic/impl_x86_mic_simd_double.h +++ b/src/gromacs/simd/impl_x86_mic/impl_x86_mic_simd_double.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by + * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -293,14 +293,43 @@ static inline SimdDouble gmx_simdcall trunc(SimdDouble x) return { _mm512_roundfxpnt_adjust_pd(x.simdInternal_, _MM_FROUND_TO_ZERO, _MM_EXPADJ_NONE) }; } +template static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent) { - __m512d rExponent = _mm512_getexp_pd(value.simdInternal_); - __m512i iExponent = _mm512_cvtfxpnt_roundpd_epi32lo(rExponent, _MM_FROUND_TO_NEAREST_INT); + __m512d rExponent; + __m512i iExponent; + __m512d result; + + if (opt == MathOptimization::Safe) + { + // For the safe branch, we use the masked operations to only assign results if the + // input value was nonzero, and otherwise set exponent to 0, and the fraction to the input (+-0). + __mmask8 valueIsNonZero = + _mm512_cmp_pd_mask(_mm512_setzero_pd(), value.simdInternal_, _CMP_NEQ_OQ); + rExponent = _mm512_mask_getexp_pd(_mm512_setzero_pd(), valueIsNonZero, value.simdInternal_); + + // Create an integer -1 value, and use masking in the conversion as the result for + // zero-value input. When we later add 1 to all fields, the fields that were formerly -1 + // (corresponding to zero exponent) will be assigned -1 + 1 = 0. + iExponent = _mm512_mask_cvtfxpnt_roundpd_epi32lo(_mm512_set_epi32(-1), valueIsNonZero, + rExponent, _MM_FROUND_TO_NEAREST_INT); + iExponent = _mm512__add_epi32(iExponent, _mm512_set1_epi32(1)); + + // Set result to value (+-0) when it is zero. + result = _mm512_mask_getmant_pd(value.simdInternal_, valueIsNonZero, value.simdInternal_, + _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); + } + else + { + rExponent = _mm512_getexp_pd(value.simdInternal_); + iExponent = _mm512_cvtfxpnt_roundpd_epi32lo(rExponent, _MM_FROUND_TO_NEAREST_INT); + iExponent = _mm512_add_epi32(iExponent, _mm512_set1_epi32(1)); + result = _mm512_getmant_pd(value.simdInternal_, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); + } - exponent->simdInternal_ = _mm512_add_epi32(iExponent, _mm512_set1_epi32(1)); + exponent->simdInternal_ = iExponent; - return { _mm512_getmant_pd(value.simdInternal_, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src) }; + return { result }; } template diff --git a/src/gromacs/simd/impl_x86_mic/impl_x86_mic_simd_float.h b/src/gromacs/simd/impl_x86_mic/impl_x86_mic_simd_float.h index ec01923119..f1ab6da335 100644 --- a/src/gromacs/simd/impl_x86_mic/impl_x86_mic_simd_float.h +++ b/src/gromacs/simd/impl_x86_mic/impl_x86_mic_simd_float.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by + * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -291,15 +291,40 @@ static inline SimdFloat gmx_simdcall trunc(SimdFloat x) return { _mm512_round_ps(x.simdInternal_, _MM_FROUND_TO_ZERO, _MM_EXPADJ_NONE) }; } +template static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent) { - __m512 rExponent = _mm512_getexp_ps(value.simdInternal_); - __m512i iExponent = - _mm512_cvtfxpnt_round_adjustps_epi32(rExponent, _MM_FROUND_TO_NEAREST_INT, _MM_EXPADJ_NONE); + __m512 rExponent; + __m512i iExponent; + __m512 result; + + if (opt == MathOptimization::Safe) + { + // For the safe branch, we use the masked operations to only assign results if the + // input value was nonzero, and otherwise set exponent to 0, and the fraction to the input (+-0). + __mmask16 valueIsNonZero = + _mm512_cmp_ps_mask(_mm512_setzero_ps(), value.simdInternal_, _CMP_NEQ_OQ); + rExponent = _mm512_mask_getexp_ps(_mm512_setzero_ps(), valueIsNonZero, value.simdInternal_); + iExponent = _mm512_cvtfxpnt_round_adjustps_epi32(rExponent, _MM_FROUND_TO_NEAREST_INT, + _MM_EXPADJ_NONE); + iExponent = _mm512_mask_add_epi32(iExponent, valueIsNonZero, iExponent, _mm512_set1_epi32(1)); + + // Set result to input value when the latter is +-0 + result = _mm512_mask_getmant_ps(value.simdInternal_, valueIsNonZero, value.simdInternal_, + _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); + } + else + { + rExponent = _mm512_getexp_ps(value.simdInternal_); + iExponent = _mm512_cvtfxpnt_round_adjustps_epi32(rExponent, _MM_FROUND_TO_NEAREST_INT, + _MM_EXPADJ_NONE); + iExponent = _mm512_add_epi32(iExponent, _mm512_set1_epi32(1)); + result = _mm512_getmant_ps(value.simdInternal_, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); + } - exponent->simdInternal_ = _mm512_add_epi32(iExponent, _mm512_set1_epi32(1)); + exponent->simdInternal_ = iExponent; - return { _mm512_getmant_ps(value.simdInternal_, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src) }; + return { result }; } template diff --git a/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2_simd_double.h b/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2_simd_double.h index b81839eb5c..01b2ca2059 100644 --- a/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2_simd_double.h +++ b/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2_simd_double.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by + * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -309,6 +309,7 @@ static inline SimdDouble gmx_simdcall trunc(SimdDouble x) #endif +template static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent) { // Don't use _mm_set1_epi64x() - on MSVC it is only supported for 64-bit builds @@ -318,14 +319,26 @@ static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent) _mm_castsi128_pd(_mm_set_epi32(0x800FFFFF, 0xFFFFFFFF, 0x800FFFFF, 0xFFFFFFFF)); const __m128i exponentBias = _mm_set1_epi32(1022); // add 1 to make our definition identical to frexp() const __m128d half = _mm_set1_pd(0.5); - __m128i iExponent; - iExponent = _mm_castpd_si128(_mm_and_pd(value.simdInternal_, exponentMask)); - iExponent = _mm_sub_epi32(_mm_srli_epi64(iExponent, 52), exponentBias); - iExponent = _mm_shuffle_epi32(iExponent, _MM_SHUFFLE(3, 1, 2, 0)); - exponent->simdInternal_ = iExponent; + __m128i iExponent = _mm_castpd_si128(_mm_and_pd(value.simdInternal_, exponentMask)); + iExponent = _mm_sub_epi32(_mm_srli_epi64(iExponent, 52), exponentBias); + + __m128d result = _mm_or_pd(_mm_and_pd(value.simdInternal_, mantissaMask), half); + + if (opt == MathOptimization::Safe) + { + __m128d valueIsZero = _mm_cmpeq_pd(_mm_setzero_pd(), value.simdInternal_); + // Set the upper/lower 64-bit-fields of "iExponent" to 0-bits if the corresponding input value was +-0.0 + iExponent = _mm_andnot_si128(_mm_castpd_si128(valueIsZero), iExponent); + // Set result to +-0 if the corresponding input value was +-0 + result = _mm_or_pd(_mm_andnot_pd(valueIsZero, result), + _mm_and_pd(valueIsZero, value.simdInternal_)); + } + + // Shuffle exponent so that 32-bit-fields 0 & 1 contain the relevant exponent values to return + exponent->simdInternal_ = _mm_shuffle_epi32(iExponent, _MM_SHUFFLE(3, 1, 2, 0)); - return { _mm_or_pd(_mm_and_pd(value.simdInternal_, mantissaMask), half) }; + return { result }; } // Override for SSE4.1 diff --git a/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2_simd_float.h b/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2_simd_float.h index 37e78f88a0..7bc392c5e8 100644 --- a/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2_simd_float.h +++ b/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2_simd_float.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by + * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -304,19 +304,32 @@ static inline SimdFloat gmx_simdcall trunc(SimdFloat x) #endif +template static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent) { const __m128 exponentMask = _mm_castsi128_ps(_mm_set1_epi32(0x7F800000)); const __m128 mantissaMask = _mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)); const __m128i exponentBias = _mm_set1_epi32(126); // add 1 to make our definition identical to frexp() const __m128 half = _mm_set1_ps(0.5F); - __m128i iExponent; - iExponent = _mm_castps_si128(_mm_and_ps(value.simdInternal_, exponentMask)); - iExponent = _mm_sub_epi32(_mm_srli_epi32(iExponent, 23), exponentBias); - exponent->simdInternal_ = iExponent; + __m128i iExponent = _mm_castps_si128(_mm_and_ps(value.simdInternal_, exponentMask)); + iExponent = _mm_sub_epi32(_mm_srli_epi32(iExponent, 23), exponentBias); + + // Combine mantissa and exponent for result + __m128 result = _mm_or_ps(_mm_and_ps(value.simdInternal_, mantissaMask), half); + + if (opt == MathOptimization::Safe) + { + __m128 valueIsZero = _mm_cmpeq_ps(_mm_setzero_ps(), value.simdInternal_); + // If value was non-zero, use the exponent we calculated, otherwise set return-value exponent to zero. + iExponent = _mm_andnot_si128(_mm_castps_si128(valueIsZero), iExponent); + // set the fraction result to zero for all elements where the input value was zero. + result = _mm_or_ps(_mm_andnot_ps(valueIsZero, result), + _mm_and_ps(valueIsZero, value.simdInternal_)); + } - return { _mm_or_ps(_mm_and_ps(value.simdInternal_, mantissaMask), half) }; + exponent->simdInternal_ = iExponent; + return { result }; } // Override for SSE4.1 diff --git a/src/gromacs/simd/simd_math.h b/src/gromacs/simd/simd_math.h index 1c435b8abb..abd001c5f5 100644 --- a/src/gromacs/simd/simd_math.h +++ b/src/gromacs/simd/simd_math.h @@ -555,7 +555,8 @@ static inline SimdFloat gmx_simdcall log2(SimdFloat x) SimdFBool m; SimdFInt32 iExp; - x = frexp(x, &iExp); + // For the log2() function, the argument can never be 0, so use the faster version of frexp() + x = frexp(x, &iExp); fExp = cvtI2R(iExp); m = x < invsqrt2; @@ -597,7 +598,8 @@ static inline SimdFloat gmx_simdcall log(SimdFloat x) SimdFBool m; SimdFInt32 iExp; - x = frexp(x, &iExp); + // For log(), the argument cannot be 0, so use the faster version of frexp() + x = frexp(x, &iExp); fExp = cvtI2R(iExp); m = x < invsqrt2; @@ -2142,7 +2144,8 @@ static inline SimdDouble gmx_simdcall log2(SimdDouble x) SimdDBool m; SimdDInt32 iExp; - x = frexp(x, &iExp); + // For log2(), the argument cannot be 0, so use the faster version of frexp() + x = frexp(x, &iExp); fExp = cvtI2R(iExp); m = x < invsqrt2; @@ -2190,7 +2193,8 @@ static inline SimdDouble gmx_simdcall log(SimdDouble x) SimdDBool m; SimdDInt32 iExp; - x = frexp(x, &iExp); + // For log(), the argument cannot be 0, so use the faster version of frexp() + x = frexp(x, &iExp); fExp = cvtI2R(iExp); m = x < invsqrt2; @@ -3690,7 +3694,8 @@ static inline SimdDouble gmx_simdcall log2SingleAccuracy(SimdDouble x) SimdDInt32 iexp; SimdDBool mask; - x = frexp(x, &iexp); + // For log2(), the argument cannot be 0, so use the faster version of frexp + x = frexp(x, &iexp); fexp = cvtI2R(iexp); mask = (x < sqrt2); @@ -3734,7 +3739,8 @@ static inline SimdDouble gmx_simdcall logSingleAccuracy(SimdDouble x) SimdDInt32 iexp; SimdDBool mask; - x = frexp(x, &iexp); + // For log(), the argument cannot be 0, so use the faster version of frexp + x = frexp(x, &iexp); fexp = cvtI2R(iexp); mask = x < invsqrt2; diff --git a/src/gromacs/simd/tests/simd.cpp b/src/gromacs/simd/tests/simd.cpp index 70b425074e..c742ba1134 100644 --- a/src/gromacs/simd/tests/simd.cpp +++ b/src/gromacs/simd/tests/simd.cpp @@ -83,11 +83,13 @@ const SimdReal rSimd_m3p75 = setSimdRealFrom1R(-3.75); const SimdReal rSimd_Exp = setSimdRealFrom3R(1.4055235171027452623914516e+18, 5.3057102734253445623914516e-13, -2.1057102745623934534514516e+16); + # if GMX_SIMD_HAVE_DOUBLE && GMX_DOUBLE // Make sure we also test exponents outside single precision when we use double -const SimdReal rSimd_ExpDouble = setSimdRealFrom3R(6.287393598732017379054414e+176, - 8.794495252903116023030553e-140, - -3.637060701570496477655022e+202); +const SimdReal rSimd_ExpDouble1 = + setSimdRealFrom3R(0.0, 8.794495252903116023030553e-140, -3.637060701570496477655022e+202); +const SimdReal rSimd_ExpDouble2 = + setSimdRealFrom3R(6.287393598732017379054414e+176, 0.0, -3.637060701570496477655022e+202); # endif // GMX_SIMD_HAVE_DOUBLE && GMX_DOUBLE # if GMX_SIMD_HAVE_LOGICAL diff --git a/src/gromacs/simd/tests/simd.h b/src/gromacs/simd/tests/simd.h index 41fd57415e..6e1d32dcce 100644 --- a/src/gromacs/simd/tests/simd.h +++ b/src/gromacs/simd/tests/simd.h @@ -137,7 +137,8 @@ extern const SimdReal rSimd_logicalResultAnd; //!< Result or bitwise 'and' of A # if GMX_SIMD_HAVE_DOUBLE && GMX_DOUBLE // Make sure we also test exponents outside single precision when we use double -extern const SimdReal rSimd_ExpDouble; +extern const SimdReal rSimd_ExpDouble1; +extern const SimdReal rSimd_ExpDouble2; # endif // Magic FP numbers corresponding to specific bit patterns extern const SimdReal rSimd_Bits1; //!< Pattern F0 repeated to fill single/double. diff --git a/src/gromacs/simd/tests/simd_floatingpoint.cpp b/src/gromacs/simd/tests/simd_floatingpoint.cpp index 3dbcbc2028..28d07cb771 100644 --- a/src/gromacs/simd/tests/simd_floatingpoint.cpp +++ b/src/gromacs/simd/tests/simd_floatingpoint.cpp @@ -241,21 +241,46 @@ TEST_F(SimdFloatingpointTest, frexp) SimdReal fraction; SimdInt32 exponent; + fraction = frexp(rSimd_Exp, &exponent); + GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(0.609548660288905419513128, 0.5833690139241746175358116, + -0.584452007502232362412542), + fraction); + GMX_EXPECT_SIMD_INT_EQ(setSimdIntFrom3I(61, -40, 55), exponent); + // Test the unsafe flavor too, in case they use different branches + fraction = frexp(rSimd_Exp, &exponent); GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(0.609548660288905419513128, 0.5833690139241746175358116, -0.584452007502232362412542), fraction); GMX_EXPECT_SIMD_INT_EQ(setSimdIntFrom3I(61, -40, 55), exponent); + // Use ulp testing with 0 bit ulp tolerance for testing to separate 0.0 and -0.0 + setUlpTol(0); + fraction = frexp(setSimdRealFrom1R(0.0), &exponent); + GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0), fraction); + GMX_EXPECT_SIMD_INT_EQ(setSimdIntFrom1I(0), exponent); -# if GMX_SIMD_HAVE_DOUBLE && GMX_DOUBLE - fraction = frexp(rSimd_ExpDouble, &exponent); + // Second -0.0. + fraction = frexp(setSimdRealFrom1R(-0.0), &exponent); + GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(-0.0), fraction); + GMX_EXPECT_SIMD_INT_EQ(setSimdIntFrom1I(0), exponent); - GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(0.6206306194761728178832527, 0.5236473618795619566768096, - -0.9280331023751380303821179), - fraction); - GMX_EXPECT_SIMD_INT_EQ(setSimdIntFrom3I(588, -461, 673), exponent); + // Reset to default ulp tolerance + setUlpTol(defaultRealUlpTol()); + +# if GMX_SIMD_HAVE_DOUBLE && GMX_DOUBLE + // Test exponents larger than what fit in single precision, as well as mixtures of 0 and non-zero values, to + // make sure the shuffling operations in the double-precision implementations don't do anything bad. + fraction = frexp(rSimd_ExpDouble1, &exponent); + GMX_EXPECT_SIMD_REAL_EQ( + setSimdRealFrom3R(0.0, 0.5236473618795619566768096, -0.9280331023751380303821179), fraction); + GMX_EXPECT_SIMD_INT_EQ(setSimdIntFrom3I(0, -461, 673), exponent); + + fraction = frexp(rSimd_ExpDouble2, &exponent); + GMX_EXPECT_SIMD_REAL_EQ( + setSimdRealFrom3R(0.6206306194761728178832527, 0.0, -0.9280331023751380303821179), fraction); + GMX_EXPECT_SIMD_INT_EQ(setSimdIntFrom3I(588, 0, 673), exponent); # endif } -- 2.11.4.GIT