From dd8ee44616308cbd3e3a0008844d839bbef2e5a5 Mon Sep 17 00:00:00 2001 From: Erik Lindahl Date: Sun, 5 Jul 2015 16:47:59 +0200 Subject: [PATCH] Moved GMX_SIMD_* macros from ifdefs to values 0/1 This will help catch bugs in SIMD-related files that use ifdef checks but where the symbol contains a typo, or if the correct file has not been included. Change-Id: I9353db28babc9e70e17c095889d9b0131ce07361 --- cmake/TestAVXMaskload.c | 2 +- cmake/gmxTestAVXMaskload.cmake | 6 +- docs/doxygen/lib/simd.md | 17 +- src/config.h.cmakein | 32 ++-- src/gromacs/ewald/pme-simd.h | 4 +- src/gromacs/ewald/pme-solve.cpp | 2 +- src/gromacs/gmxlib/gmx_cpuid.c | 28 +-- .../kernelutil_x86_avx_128_fma_single.h | 2 +- .../kernelutil_x86_avx_256_single.h | 2 +- src/gromacs/gmxlib/nonbonded/nonbonded.cpp | 55 +++--- src/gromacs/listed-forces/bonded.cpp | 16 +- src/gromacs/listed-forces/bonded.h | 2 +- src/gromacs/listed-forces/listed-forces.cpp | 16 +- src/gromacs/mdlib/clincs.cpp | 17 +- src/gromacs/mdlib/forcerec.cpp | 15 +- src/gromacs/mdlib/nbnxn_atomdata.cpp | 2 +- src/gromacs/mdlib/nbnxn_internal.h | 8 +- .../mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h | 12 +- .../nbnxn_kernel_simd_utils_x86_256s.h | 2 +- .../simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h | 1 - .../simd_4xn/nbnxn_kernel_simd_4xn_common.h | 6 +- src/gromacs/mdlib/nbnxn_simd.h | 11 +- src/gromacs/pbcutil/pbc-simd.cpp | 2 +- src/gromacs/pbcutil/pbc-simd.h | 4 +- src/gromacs/simd/impl_arm_neon/impl_arm_neon.h | 39 ++-- .../simd/impl_arm_neon_asimd/impl_arm_neon_asimd.h | 17 +- src/gromacs/simd/impl_ibm_qpx/impl_ibm_qpx.h | 37 ++-- src/gromacs/simd/impl_ibm_vmx/impl_ibm_vmx.h | 37 ++-- src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx.h | 43 +++-- src/gromacs/simd/impl_intel_mic/impl_intel_mic.h | 37 ++-- .../pme-simd.h => simd/impl_none/impl_none.h} | 56 +++--- src/gromacs/simd/impl_reference/impl_reference.h | 134 +++++++------- .../impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h | 37 ++-- .../simd/impl_x86_avx2_256/impl_x86_avx2_256.h | 13 +- .../impl_x86_avx_128_fma/impl_x86_avx_128_fma.h | 11 +- .../simd/impl_x86_avx_256/impl_x86_avx_256.h | 46 ++--- .../simd/impl_x86_avx_512f/impl_x86_avx_512f.h | 37 ++-- src/gromacs/simd/impl_x86_sse2/impl_x86_sse2.h | 40 ++-- src/gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1.h | 8 +- src/gromacs/simd/simd.h | 205 +++++++++------------ src/gromacs/simd/simd_math.h | 56 +++--- src/gromacs/simd/tests/bootstrap_loadstore.cpp | 30 +-- src/gromacs/simd/tests/simd.cpp | 14 +- src/gromacs/simd/tests/simd.h | 20 +- src/gromacs/simd/tests/simd4.cpp | 8 +- src/gromacs/simd/tests/simd4.h | 10 +- src/gromacs/simd/tests/simd4_floatingpoint.cpp | 8 +- src/gromacs/simd/tests/simd4_math.cpp | 6 +- src/gromacs/simd/tests/simd4_vector_operations.cpp | 8 +- src/gromacs/simd/tests/simd_floatingpoint.cpp | 14 +- src/gromacs/simd/tests/simd_integer.cpp | 18 +- src/gromacs/simd/tests/simd_math.cpp | 6 +- src/gromacs/simd/tests/simd_vector_operations.cpp | 8 +- src/gromacs/simd/vector_operations.h | 16 +- src/gromacs/utility/gmxomp.h | 12 +- 55 files changed, 673 insertions(+), 622 deletions(-) copy src/gromacs/{ewald/pme-simd.h => simd/impl_none/impl_none.h} (57%) diff --git a/cmake/TestAVXMaskload.c b/cmake/TestAVXMaskload.c index 41e571a219..e8438a1cc7 100644 --- a/cmake/TestAVXMaskload.c +++ b/cmake/TestAVXMaskload.c @@ -8,7 +8,7 @@ int main() a = _mm256_setzero_pd(); mask = _mm256_castpd_si256(a); -#ifdef GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG +#if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG a = _mm256_maskload_pd(d,_mm256_castsi256_pd(mask)); #else a = _mm256_maskload_pd(d,mask); diff --git a/cmake/gmxTestAVXMaskload.cmake b/cmake/gmxTestAVXMaskload.cmake index 028bb49ad9..a522d6c49b 100644 --- a/cmake/gmxTestAVXMaskload.cmake +++ b/cmake/gmxTestAVXMaskload.cmake @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -48,14 +48,14 @@ MACRO(GMX_TEST_AVX_GCC_MASKLOAD_BUG VARIABLE AVX_CFLAGS) # so first try a normal compile to avoid flagging those as buggy. TRY_COMPILE(${VARIABLE}_COMPILEOK "${CMAKE_BINARY_DIR}" "${CMAKE_SOURCE_DIR}/cmake/TestAVXMaskload.c" - COMPILE_DEFINITIONS "${AVX_CFLAGS}" ) + COMPILE_DEFINITIONS "${AVX_CFLAGS} -DGMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG=0" ) IF(${VARIABLE}_COMPILEOK) SET(${VARIABLE} 0 CACHE INTERNAL "Work around GCC bug in AVX maskload argument" FORCE) MESSAGE(STATUS "Checking for gcc AVX maskload bug - not present") ELSE() TRY_COMPILE(${VARIABLE}_COMPILEOK "${CMAKE_BINARY_DIR}" "${CMAKE_SOURCE_DIR}/cmake/TestAVXMaskload.c" - COMPILE_DEFINITIONS "${AVX_CFLAGS} -DGMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG" ) + COMPILE_DEFINITIONS "${AVX_CFLAGS} -DGMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG=1" ) IF(${VARIABLE}_COMPILEOK) SET(${VARIABLE} 1 CACHE INTERNAL "Work around GCC bug in AVX maskload argument" FORCE) MESSAGE(STATUS "Checking for gcc AVX maskload bug - found, will try to work around") diff --git a/docs/doxygen/lib/simd.md b/docs/doxygen/lib/simd.md index ee5bb056c1..4b66be5896 100644 --- a/docs/doxygen/lib/simd.md +++ b/docs/doxygen/lib/simd.md @@ -327,17 +327,20 @@ Predefined SIMD preprocessor macros Functionality-wise, we have a small set of core set of features that we require to be present on all platforms, while more avanced features can be -used in the code when defines like e.g. `GMX_SIMD_HAVE_LOADU` are set. +used in the code when defines like e.g. `GMX_SIMD_HAVE_LOADU` are set to 1. +To avoid bugs when we forget to include the SIMD header, we always define +these macros to either 1 or 0. Thus, it is important that you always +check the value rather than whether it is defined. This is a summary of the currently available preprocessor defines that you should use to check for support when using the corresponding features. We first list the float/double/int defines set by the _implementation_; in -most cases you do not want to check directly for float/double defines, but -you should instead use the derived "real" defines set in this file - we list +most cases you do not want to check directly for float/double define values, +but you should instead use the derived "real" defines set in this file - we list those at the end below. Preprocessor predefined macro defines set by the low-level implementation. -These are only set if they work for all datatypes; `GMX_SIMD_HAVE_LOADU` +These are only set to 1 if they work for all datatypes; `GMX_SIMD_HAVE_LOADU` thus means we can load both float, double, and integers from unaligned memory, and that the unaligned loads are available for SIMD4 too. @@ -350,10 +353,6 @@ Single-precision instructions available.
Double-precision instructions available.
-
`GMX_SIMD_HAVE_HARDWARE`
-
-Set when we are NOT emulating SIMD. -
`GMX_SIMD_HAVE_LOADU`
Load from unaligned memory available. @@ -412,7 +411,7 @@ Arithmetic ops for `gmx_simd_dint32_t`. There are also two macros specific to SIMD4: `GMX_SIMD4_HAVE_FLOAT` is set -if we can use SIMD4 in single precision, and `GMX_SIMD4_HAVE_DOUBLE` +to 1 if we can use SIMD4 in single precision, and `GMX_SIMD4_HAVE_DOUBLE` similarly denotes support for a double-precision SIMD4 implementation. For generic properties (e.g. whether SIMD4 FMA is supported), you should check the normal SIMD macros above. diff --git a/src/config.h.cmakein b/src/config.h.cmakein index 8cf1d6231b..82aa15d55d 100644 --- a/src/config.h.cmakein +++ b/src/config.h.cmakein @@ -69,52 +69,52 @@ #cmakedefine GMX_CYGWIN /* GCC bug in AVX maskload/maskstore arguments - worked around internally */ -#cmakedefine GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG +#cmakedefine01 GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG /* SSE2 was selected for SIMD instruction set level */ -#cmakedefine GMX_SIMD_X86_SSE2 +#cmakedefine01 GMX_SIMD_X86_SSE2 /* SSE4.1 was selected as SIMD instructions */ -#cmakedefine GMX_SIMD_X86_SSE4_1 +#cmakedefine01 GMX_SIMD_X86_SSE4_1 /* AVX 128-bit FMA was selected as SIMD instructions */ -#cmakedefine GMX_SIMD_X86_AVX_128_FMA +#cmakedefine01 GMX_SIMD_X86_AVX_128_FMA /* AVX 256-bit was selected as SIMD instructions */ -#cmakedefine GMX_SIMD_X86_AVX_256 +#cmakedefine01 GMX_SIMD_X86_AVX_256 /* AVX2 256-bit SIMD instruction set level was selected */ -#cmakedefine GMX_SIMD_X86_AVX2_256 +#cmakedefine01 GMX_SIMD_X86_AVX2_256 /* MIC (Xeon Phi) SIMD instruction set level was selected */ -#cmakedefine GMX_SIMD_X86_MIC +#cmakedefine01 GMX_SIMD_X86_MIC /* AVX-512F foundation level instruction SIMD */ -#cmakedefine GMX_SIMD_X86_AVX_512F +#cmakedefine01 GMX_SIMD_X86_AVX_512F /* AVX-512ER foundation level instruction SIMD */ -#cmakedefine GMX_SIMD_X86_AVX_512ER +#cmakedefine01 GMX_SIMD_X86_AVX_512ER /* 32-bit ARM NEON SIMD instruction set level was selected */ -#cmakedefine GMX_SIMD_ARM_NEON +#cmakedefine01 GMX_SIMD_ARM_NEON /* ARM (AArch64) NEON Advanced SIMD instruction set level was selected */ -#cmakedefine GMX_SIMD_ARM_NEON_ASIMD +#cmakedefine01 GMX_SIMD_ARM_NEON_ASIMD /* IBM QPX was selected as SIMD instructions (e.g. BlueGene/Q) */ -#cmakedefine GMX_SIMD_IBM_QPX +#cmakedefine01 GMX_SIMD_IBM_QPX /* IBM VMX was selected as SIMD instructions (Power 6 and later) */ -#cmakedefine GMX_SIMD_IBM_VMX +#cmakedefine01 GMX_SIMD_IBM_VMX /* IBM VSX was selected as SIMD instructions (Power 7 and later) */ -#cmakedefine GMX_SIMD_IBM_VSX +#cmakedefine01 GMX_SIMD_IBM_VSX /* Fujitsu Sparc64 HPC-ACE SIMD acceleration */ -#cmakedefine GMX_SIMD_SPARC64_HPC_ACE +#cmakedefine01 GMX_SIMD_SPARC64_HPC_ACE /* Reference SIMD implementation for testing */ -#cmakedefine GMX_SIMD_REFERENCE +#cmakedefine01 GMX_SIMD_REFERENCE /* String for SIMD instruction choice (for writing to log files and stdout) */ #define GMX_SIMD_STRING "@GMX_SIMD@" diff --git a/src/gromacs/ewald/pme-simd.h b/src/gromacs/ewald/pme-simd.h index 2b65de6d92..b605b437a3 100644 --- a/src/gromacs/ewald/pme-simd.h +++ b/src/gromacs/ewald/pme-simd.h @@ -39,13 +39,13 @@ #include "gromacs/simd/simd.h" /* Check if we have 4-wide SIMD macro support */ -#if (defined GMX_SIMD4_HAVE_REAL) +#if GMX_SIMD4_HAVE_REAL /* Do PME spread and gather with 4-wide SIMD. * NOTE: SIMD is only used with PME order 4 and 5 (which are the most common). */ # define PME_SIMD4_SPREAD_GATHER -# if (defined GMX_SIMD_HAVE_LOADU) && (defined GMX_SIMD_HAVE_STOREU) +# if GMX_SIMD_HAVE_LOADU && GMX_SIMD_HAVE_STOREU /* With PME-order=4 on x86, unaligned load+store is slightly faster * than doubling all SIMD operations when using aligned load+store. */ diff --git a/src/gromacs/ewald/pme-solve.cpp b/src/gromacs/ewald/pme-solve.cpp index af9975f903..9e04f1a144 100644 --- a/src/gromacs/ewald/pme-solve.cpp +++ b/src/gromacs/ewald/pme-solve.cpp @@ -50,7 +50,7 @@ #include "pme-internal.h" -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /* Turn on arbitrary width SIMD intrinsics for PME solve */ # define PME_SIMD_SOLVE #endif diff --git a/src/gromacs/gmxlib/gmx_cpuid.c b/src/gromacs/gmxlib/gmx_cpuid.c index 90e4ed15e1..844fc08906 100644 --- a/src/gromacs/gmxlib/gmx_cpuid.c +++ b/src/gromacs/gmxlib/gmx_cpuid.c @@ -255,33 +255,33 @@ gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid) /* What type of SIMD was compiled in, if any? */ -#ifdef GMX_SIMD_X86_AVX_512ER +#if GMX_SIMD_X86_AVX_512ER static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512ER; -#elif defined GMX_SIMD_X86_AVX_512F +#elif GMX_SIMD_X86_AVX_512F static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512F; -#elif defined GMX_SIMD_X86_AVX2_256 +#elif GMX_SIMD_X86_AVX2_256 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX2_256; -#elif defined GMX_SIMD_X86_AVX_256 +#elif GMX_SIMD_X86_AVX_256 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_256; -#elif defined GMX_SIMD_X86_AVX_128_FMA +#elif GMX_SIMD_X86_AVX_128_FMA static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_128_FMA; -#elif defined GMX_SIMD_X86_SSE4_1 +#elif GMX_SIMD_X86_SSE4_1 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE4_1; -#elif defined GMX_SIMD_X86_SSE2 +#elif GMX_SIMD_X86_SSE2 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE2; -#elif defined GMX_SIMD_ARM_NEON +#elif GMX_SIMD_ARM_NEON static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON; -#elif defined GMX_SIMD_ARM_NEON_ASIMD +#elif GMX_SIMD_ARM_NEON_ASIMD static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON_ASIMD; -#elif defined GMX_SIMD_SPARC64_HPC_ACE +#elif GMX_SIMD_SPARC64_HPC_ACE static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_SPARC64_HPC_ACE; -#elif defined GMX_SIMD_IBM_QPX +#elif GMX_SIMD_IBM_QPX static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_QPX; -#elif defined GMX_SIMD_IBM_VMX +#elif GMX_SIMD_IBM_VMX static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VMX; -#elif defined GMX_SIMD_IBM_VSX +#elif GMX_SIMD_IBM_VSX static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VSX; -#elif defined GMX_SIMD_REFERENCE +#elif GMX_SIMD_REFERENCE static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_REFERENCE; #else static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_NONE; diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/kernelutil_x86_avx_128_fma_single.h b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/kernelutil_x86_avx_128_fma_single.h index e41a05a68f..25a7c83cd1 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/kernelutil_x86_avx_128_fma_single.h +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/kernelutil_x86_avx_128_fma_single.h @@ -50,7 +50,7 @@ #define gmx_mm_extract_epi32 _mm_extract_epi32 /* Work around gcc bug with wrong type for mask formal parameter to maskload/maskstore */ -#ifdef GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG +#if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG # define gmx_mm_maskload_ps(mem, mask) _mm_maskload_ps((mem), _mm_castsi128_ps(mask)) # define gmx_mm_maskstore_ps(mem, mask, x) _mm_maskstore_ps((mem), _mm_castsi128_ps(mask), (x)) # define gmx_mm256_maskload_ps(mem, mask) _mm256_maskload_ps((mem), _mm256_castsi256_ps(mask)) diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/kernelutil_x86_avx_256_single.h b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/kernelutil_x86_avx_256_single.h index ed64fd4abf..bfbccd54de 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/kernelutil_x86_avx_256_single.h +++ b/src/gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/kernelutil_x86_avx_256_single.h @@ -60,7 +60,7 @@ gmx_mm256_set_m128(__m128 hi, __m128 lo) } /* Work around gcc bug with wrong type for mask formal parameter to maskload/maskstore */ -#ifdef GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG +#if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG # define gmx_mm_maskload_ps(mem, mask) _mm_maskload_ps((mem), _mm_castsi128_ps(mask)) # define gmx_mm_maskstore_ps(mem, mask, x) _mm_maskstore_ps((mem), _mm_castsi128_ps(mask), (x)) # define gmx_mm256_maskload_ps(mem, mask) _mm256_maskload_ps((mem), _mm256_castsi256_ps(mask)) diff --git a/src/gromacs/gmxlib/nonbonded/nonbonded.cpp b/src/gromacs/gmxlib/nonbonded/nonbonded.cpp index 3bea5bf6a5..51b9377725 100644 --- a/src/gromacs/gmxlib/nonbonded/nonbonded.cpp +++ b/src/gromacs/gmxlib/nonbonded/nonbonded.cpp @@ -66,6 +66,7 @@ #include "gromacs/pbcutil/ishift.h" #include "gromacs/pbcutil/mshift.h" #include "gromacs/pbcutil/pbc.h" +#include "gromacs/simd/simd.h" #include "gromacs/tables/forcetable.h" #include "gromacs/utility/arraysize.h" #include "gromacs/utility/basedefinitions.h" @@ -76,31 +77,31 @@ /* Different default (c) and SIMD instructions interaction-specific kernels */ #include "gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.h" -#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE2 && !(defined GMX_DOUBLE) # include "gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.h" #endif -#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE4_1 && !(defined GMX_DOUBLE) # include "gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h" #endif -#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_128_FMA && !(defined GMX_DOUBLE) # include "gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h" #endif -#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_256_OR_HIGHER && !(defined GMX_DOUBLE) # include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.h" #endif -#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE # include "gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.h" #endif -#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE # include "gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h" #endif -#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE # include "gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h" #endif -#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE # include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.h" #endif -#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE) +#if GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE # include "gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h" #endif @@ -126,32 +127,32 @@ gmx_nonbonded_setup(t_forcerec * fr, { /* Add interaction-specific kernels for different architectures */ /* Single precision */ -#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE2 && !(defined GMX_DOUBLE) nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size); #endif -#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE4_1 && !(defined GMX_DOUBLE) nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size); #endif -#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_128_FMA && !(defined GMX_DOUBLE) nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size); #endif -#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_256_OR_HIGHER && !(defined GMX_DOUBLE) nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size); #endif /* Double precision */ -#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size); #endif -#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size); #endif -#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size); #endif -#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size); #endif -#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE) +#if GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size); #endif ; /* empty statement to avoid a completely empty block */ @@ -185,38 +186,38 @@ gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl, gmx_bool bElecAndVdwS arch_and_padding[] = { /* Single precision */ -#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_256_OR_HIGHER && !(defined GMX_DOUBLE) { "avx_256_single", 8 }, #endif -#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_128_FMA && !(defined GMX_DOUBLE) { "avx_128_fma_single", 4 }, #endif -#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE4_1 && !(defined GMX_DOUBLE) { "sse4_1_single", 4 }, #endif -#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE2 && !(defined GMX_DOUBLE) { "sse2_single", 4 }, #endif /* Double precision */ -#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE { "avx_256_double", 4 }, #endif -#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE) +#if GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE /* Sic. Double precision 2-way SIMD does not require neighbor list padding, * since the kernels execute a loop unrolled a factor 2, followed by * a possible single odd-element epilogue. */ { "avx_128_fma_double", 1 }, #endif -#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE /* No padding - see comment above */ { "sse2_double", 1 }, #endif -#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE) +#if GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE /* No padding - see comment above */ { "sse4_1_double", 1 }, #endif -#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE) +#if GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE /* No padding - see comment above */ { "sparc64_hpc_ace_double", 1 }, #endif diff --git a/src/gromacs/listed-forces/bonded.cpp b/src/gromacs/listed-forces/bonded.cpp index 7804b0c5ec..926035007d 100644 --- a/src/gromacs/listed-forces/bonded.cpp +++ b/src/gromacs/listed-forces/bonded.cpp @@ -72,7 +72,7 @@ #include "restcbt.h" -#if defined(GMX_SIMD_X86_AVX_256) || defined(GMX_SIMD_X86_AVX2_256) +#if GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256 // This was originally work-in-progress for augmenting the SIMD module with // masked load/store operations. Instead, that turned into and extended SIMD @@ -117,7 +117,7 @@ gmx_hack_simd4_transpose_to_simd_r(const gmx_simd4_double_t *a, gmx_hack_simd_transpose4_r(row0, row1, row2, row3); } -# ifdef GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG +# if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG # define gmx_hack_simd4_load3_r(mem) _mm256_maskload_pd((mem), _mm_castsi128_ps(_mm256_set_epi32(0, 0, -1, -1, -1, -1, -1, -1))) # else # define gmx_hack_simd4_load3_r(mem) _mm256_maskload_pd((mem), _mm256_set_epi32(0, 0, -1, -1, -1, -1, -1, -1)) @@ -156,7 +156,7 @@ gmx_hack_simd4_transpose_to_simd_r(const gmx_simd4_float_t *a, gmx_hack_simd_transpose4_r(row0, row1, row2, row3); } -#ifdef GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG +#if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG # define gmx_hack_simd4_load3_r(mem) _mm_maskload_ps((mem), _mm_castsi256_pd(_mm_set_epi32(0, -1, -1, -1))) #else # define gmx_hack_simd4_load3_r(mem) _mm_maskload_ps((mem), _mm_set_epi32(0, -1, -1, -1)) @@ -168,7 +168,7 @@ gmx_hack_simd4_transpose_to_simd_r(const gmx_simd4_float_t *a, -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /*! \brief Store differences between indexed rvecs in SIMD registers. * * Returns SIMD register with the difference vectors: @@ -191,7 +191,7 @@ gmx_hack_simd_gather_rvec_dist_two_index(const rvec *v, gmx_simd_real_t *dy, gmx_simd_real_t *dz) { -#if defined(GMX_SIMD_X86_AVX_256) || defined(GMX_SIMD_X86_AVX2_256) +#if GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256 int i; gmx_simd4_real_t d[GMX_SIMD_REAL_WIDTH]; gmx_simd_real_t tmp; @@ -1119,7 +1119,7 @@ real angles(int nbonds, return vtot; } -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /* As angles, but using SIMD to calculate many angles at once. * This routines does not calculate energies and shift forces. @@ -1573,7 +1573,7 @@ real dih_angle(const rvec xi, const rvec xj, const rvec xk, const rvec xl, } -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD, * also calculates the pre-factor required for the dihedral force update. @@ -2020,7 +2020,7 @@ pdihs_noener(int nbonds, } -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /* As pdihs_noner above, but using SIMD to calculate many dihedrals at once */ void diff --git a/src/gromacs/listed-forces/bonded.h b/src/gromacs/listed-forces/bonded.h index d21e8ab2bb..9277101bd9 100644 --- a/src/gromacs/listed-forces/bonded.h +++ b/src/gromacs/listed-forces/bonded.h @@ -128,7 +128,7 @@ void const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd, int gmx_unused *global_atom_index); -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /* As angles(), but using SIMD to calculate many angles at once. * This routines does not calculate energies and shift forces. diff --git a/src/gromacs/listed-forces/listed-forces.cpp b/src/gromacs/listed-forces/listed-forces.cpp index 2504df8f77..ad4744489d 100644 --- a/src/gromacs/listed-forces/listed-forces.cpp +++ b/src/gromacs/listed-forces/listed-forces.cpp @@ -268,8 +268,14 @@ calc_one_bond(int thread, gmx_bool bCalcEnerVir, int *global_atom_index) { -#ifdef GMX_SIMD_HAVE_REAL - gmx_bool bUseSIMD = fr->use_simd_kernels; +#if GMX_SIMD_HAVE_REAL + gmx_bool bUseSIMD; + /* MSVC 2010 produces buggy SIMD PBC code, disable SIMD for MSVC <= 2010 */ +# if defined _MSC_VER && _MSC_VER < 1700 && !defined(__ICL) + bUseSIMD = FALSE; +# else + bUseSIMD = fr->use_simd_kernels; +# endif #endif int nat1, nbonds, efptFTYPE; @@ -307,7 +313,7 @@ calc_one_bond(int thread, pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd, global_atom_index); } -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL else if (ftype == F_ANGLES && bUseSIMD && !bCalcEnerVir && fr->efep == efepNO) { @@ -324,7 +330,7 @@ calc_one_bond(int thread, !bCalcEnerVir && fr->efep == efepNO) { /* No energies, shift forces, dvdl */ -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL if (bUseSIMD) { pdihs_noener_simd(nbn, idef->il[ftype].iatoms+nb0, @@ -344,7 +350,7 @@ calc_one_bond(int thread, } v = 0; } -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL else if (ftype == F_RBDIHS && bUseSIMD && !bCalcEnerVir && fr->efep == efepNO) { diff --git a/src/gromacs/mdlib/clincs.cpp b/src/gromacs/mdlib/clincs.cpp index 5a6c4649fb..20dc51a02a 100644 --- a/src/gromacs/mdlib/clincs.cpp +++ b/src/gromacs/mdlib/clincs.cpp @@ -69,12 +69,13 @@ #include "gromacs/utility/gmxomp.h" #include "gromacs/utility/smalloc.h" -#if defined GMX_SIMD_HAVE_REAL -#define LINCS_SIMD +/* MSVC 2010 produces buggy SIMD PBC code, disable SIMD for MSVC <= 2010 */ +#if GMX_SIMD_HAVE_REAL && !(defined _MSC_VER && _MSC_VER < 1700) && !defined(__ICL) +# define LINCS_SIMD #endif -#if defined(GMX_SIMD_X86_AVX_256) || defined(GMX_SIMD_X86_AVX2_256) +#if GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256 // This was originally work-in-progress for augmenting the SIMD module with // masked load/store operations. Instead, that turned into and extended SIMD @@ -135,7 +136,7 @@ gmx_hack_simd_transpose_to_simd4_r(gmx_simd_double_t row0, } -# ifdef GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG +# if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG # define gmx_hack_simd4_load3_r(mem) _mm256_maskload_pd((mem), _mm_castsi128_ps(_mm256_set_epi32(0, 0, -1, -1, -1, -1, -1, -1))) # define gmx_hack_simd4_store3_r(mem, x) _mm256_maskstore_pd((mem), _mm_castsi128_ps(_mm256_set_epi32(0, 0, -1, -1, -1, -1, -1, -1)), (x)) # else @@ -195,7 +196,7 @@ gmx_hack_simd_transpose_to_simd4_r(gmx_simd_float_t row0, a[6] = _mm256_extractf128_ps(row2, 1); a[7] = _mm256_extractf128_ps(row3, 1); } -#ifdef GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG +#if GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG # define gmx_hack_simd4_load3_r(mem) _mm_maskload_ps((mem), _mm_castsi256_pd(_mm_set_epi32(0, -1, -1, -1))) # define gmx_hack_simd4_store3_r(mem, x) _mm_maskstore_ps((mem), _mm_castsi256_pd(_mm_set_epi32(0, -1, -1, -1)), (x)) #else @@ -207,7 +208,7 @@ gmx_hack_simd_transpose_to_simd4_r(gmx_simd_float_t row0, #endif /* AVX */ -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /*! \brief Store differences between indexed rvecs in SIMD registers. * * Returns SIMD register with the difference vectors: @@ -228,7 +229,7 @@ gmx_hack_simd_gather_rvec_dist_pair_index(const rvec *v, gmx_simd_real_t *dy, gmx_simd_real_t *dz) { -#if defined(GMX_SIMD_X86_AVX_256) || defined(GMX_SIMD_X86_AVX2_256) +#if GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256 int i; gmx_simd4_real_t d[GMX_SIMD_REAL_WIDTH]; gmx_simd_real_t tmp; @@ -280,7 +281,7 @@ gmx_simd_store_vec_to_rvec(gmx_simd_real_t x, real gmx_unused *buf, rvec *v) { -#if defined(GMX_SIMD_X86_AVX_256) || defined(GMX_SIMD_X86_AVX2_256) +#if GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256 int i; gmx_simd4_real_t s4[GMX_SIMD_REAL_WIDTH]; gmx_simd_real_t zero = gmx_simd_setzero_r(); diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp index 7b8c2baf52..297c96de14 100644 --- a/src/gromacs/mdlib/forcerec.cpp +++ b/src/gromacs/mdlib/forcerec.cpp @@ -1627,7 +1627,7 @@ static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir, */ *kernel_type = nbnxnk4xN_SIMD_4xN; -#ifndef GMX_SIMD_HAVE_FMA +#if !GMX_SIMD_HAVE_FMA if (EEL_PME_EWALD(ir->coulombtype) || EVDW_PME(ir->vdwtype)) { @@ -1666,8 +1666,7 @@ static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir, * In single precision, this is faster on Bulldozer. */ #if GMX_SIMD_REAL_WIDTH >= 8 || \ - (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \ - defined GMX_SIMD_IBM_QPX + (GMX_SIMD_REAL_WIDTH >= 4 && GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || GMX_SIMD_IBM_QPX *ewald_excl = ewaldexclAnalytical; #endif if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL) @@ -1698,15 +1697,15 @@ const char *lookup_nbnxn_kernel_name(int kernel_type) case nbnxnk4xN_SIMD_4xN: case nbnxnk4xN_SIMD_2xNN: #ifdef GMX_NBNXN_SIMD -#if defined GMX_SIMD_X86_SSE2 +#if GMX_SIMD_X86_SSE2 returnvalue = "SSE2"; -#elif defined GMX_SIMD_X86_SSE4_1 +#elif GMX_SIMD_X86_SSE4_1 returnvalue = "SSE4.1"; -#elif defined GMX_SIMD_X86_AVX_128_FMA +#elif GMX_SIMD_X86_AVX_128_FMA returnvalue = "AVX_128_FMA"; -#elif defined GMX_SIMD_X86_AVX_256 +#elif GMX_SIMD_X86_AVX_256 returnvalue = "AVX_256"; -#elif defined GMX_SIMD_X86_AVX2_256 +#elif GMX_SIMD_X86_AVX2_256 returnvalue = "AVX2_256"; #else returnvalue = "SIMD"; diff --git a/src/gromacs/mdlib/nbnxn_atomdata.cpp b/src/gromacs/mdlib/nbnxn_atomdata.cpp index af0195394d..894bcd9690 100644 --- a/src/gromacs/mdlib/nbnxn_atomdata.cpp +++ b/src/gromacs/mdlib/nbnxn_atomdata.cpp @@ -499,7 +499,7 @@ nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat) nbat->simd_exclusion_filter2[j*2 + 1] = (1U << j); } -#if (defined GMX_SIMD_IBM_QPX) +#if GMX_SIMD_IBM_QPX /* The QPX kernels shouldn't do the bit masking that is done on * x86, because the SIMD units lack bit-wise operations. Instead, * we generate a vector of all 2^4 possible ways an i atom diff --git a/src/gromacs/mdlib/nbnxn_internal.h b/src/gromacs/mdlib/nbnxn_internal.h index ac25b6ba81..8981c2a69c 100644 --- a/src/gromacs/mdlib/nbnxn_internal.h +++ b/src/gromacs/mdlib/nbnxn_internal.h @@ -63,13 +63,13 @@ extern "C" { * we only need to check for single precision support here. * This uses less (cache-)memory and SIMD is faster, at least on x86. */ -#ifdef GMX_SIMD4_HAVE_FLOAT -#define NBNXN_SEARCH_BB_SIMD4 +#if GMX_SIMD4_HAVE_FLOAT +# define NBNXN_SEARCH_BB_SIMD4 /* Memory alignment in bytes as required by SIMD aligned loads/stores */ -#define NBNXN_SEARCH_BB_MEM_ALIGN (GMX_SIMD4_WIDTH*sizeof(float)) +# define NBNXN_SEARCH_BB_MEM_ALIGN (GMX_SIMD4_WIDTH*sizeof(float)) #else /* No alignment required, but set it so we can call the same routines */ -#define NBNXN_SEARCH_BB_MEM_ALIGN 32 +# define NBNXN_SEARCH_BB_MEM_ALIGN 32 #endif diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h index 8bc432bfc5..0bcd4e32e8 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h +++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h @@ -57,7 +57,7 @@ #error "Must define an NBNxN kernel flavour before including NBNxN kernel utility functions" #endif -#ifdef GMX_SIMD_REFERENCE +#if GMX_SIMD_REFERENCE /* Align a stack-based thread-local working array. */ static gmx_inline int * @@ -70,7 +70,7 @@ prepare_table_load_buffer(const int gmx_unused *array) #else /* GMX_SIMD_REFERENCE */ -#if defined GMX_TARGET_X86 && !defined GMX_SIMD_X86_MIC +#if defined GMX_TARGET_X86 && !GMX_SIMD_X86_MIC /* Include x86 SSE2 compatible SIMD functions */ /* Set the stride for the lookup of the two LJ parameters from their @@ -125,11 +125,11 @@ static const int nbfp_stride = GMX_SIMD_REAL_WIDTH; #define TAB_FDV0 #endif -#ifdef GMX_SIMD_IBM_QPX +#if GMX_SIMD_IBM_QPX #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils_ibm_qpx.h" #endif /* GMX_SIMD_IBM_QPX */ -#ifdef GMX_SIMD_X86_MIC +#if GMX_SIMD_X86_MIC #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils_x86_mic.h" #endif @@ -141,8 +141,8 @@ static const int nbfp_stride = GMX_SIMD_REAL_WIDTH; * reuse the simd real type and the four instructions we need. */ #if GMX_SIMD_REAL_WIDTH == 4 && \ - !((!defined GMX_DOUBLE && defined GMX_SIMD4_HAVE_FLOAT) || \ - (defined GMX_DOUBLE && defined GMX_SIMD4_HAVE_DOUBLE)) + !((!defined GMX_DOUBLE && GMX_SIMD4_HAVE_FLOAT) || \ + (defined GMX_DOUBLE && GMX_SIMD4_HAVE_DOUBLE)) #define gmx_simd4_real_t gmx_simd_real_t #define gmx_simd4_load_r gmx_simd_load_r #define gmx_simd4_store_r gmx_simd_store_r diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils_x86_256s.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils_x86_256s.h index 1764d22cf6..57771fe31f 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils_x86_256s.h +++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils_x86_256s.h @@ -275,7 +275,7 @@ load_table_f_v(const real *tab_coul_FDV0, gmx_simd_int32_t ti_S, int *ti, *ctabv_S = gmx_2_mm_to_m256(ctabvt_S[0], ctabvt_S[1]); } -#ifdef GMX_SIMD_HAVE_FINT32_LOGICAL +#if GMX_SIMD_HAVE_FINT32_LOGICAL typedef gmx_simd_int32_t gmx_exclfilter; static const int filter_stride = GMX_SIMD_INT32_WIDTH/GMX_SIMD_REAL_WIDTH; diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h index 0848a11e2f..37c79ac574 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h +++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h @@ -463,7 +463,6 @@ #endif frac_S0 = gmx_simd_sub_r(rs_S0, rf_S0); frac_S2 = gmx_simd_sub_r(rs_S2, rf_S2); - /* Load and interpolate table forces and possibly energies. * Force and energy can be combined in one table, stride 4: FDV0 * or in two separate tables with stride 1: F and V diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h index 70d6edff67..55146415a3 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h +++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -71,14 +71,14 @@ gmx_load_simd_4xn_interactions(int excl, gmx_simd_bool_t *interact_S2, gmx_simd_bool_t *interact_S3) { -#if defined GMX_SIMD_X86_SSE2_OR_HIGHER || defined GMX_SIMD_REFERENCE +#if GMX_SIMD_X86_SSE2_OR_HIGHER || GMX_SIMD_REFERENCE /* Load integer interaction mask */ gmx_exclfilter mask_pr_S = gmx_load1_exclfilter(excl); *interact_S0 = gmx_checkbitmask_pb(mask_pr_S, filter_S0); *interact_S1 = gmx_checkbitmask_pb(mask_pr_S, filter_S1); *interact_S2 = gmx_checkbitmask_pb(mask_pr_S, filter_S2); *interact_S3 = gmx_checkbitmask_pb(mask_pr_S, filter_S3); -#elif defined GMX_SIMD_IBM_QPX +#elif GMX_SIMD_IBM_QPX const int size = GMX_SIMD_REAL_WIDTH * sizeof(real); *interact_S0 = gmx_load_interaction_mask_pb(size*((excl >> (0 * UNROLLJ)) & 0xF), simd_interaction_array); *interact_S1 = gmx_load_interaction_mask_pb(size*((excl >> (1 * UNROLLJ)) & 0xF), simd_interaction_array); diff --git a/src/gromacs/mdlib/nbnxn_simd.h b/src/gromacs/mdlib/nbnxn_simd.h index a15a796201..bd7ed0d0b5 100644 --- a/src/gromacs/mdlib/nbnxn_simd.h +++ b/src/gromacs/mdlib/nbnxn_simd.h @@ -39,27 +39,24 @@ #include "config.h" #include "gromacs/legacyheaders/typedefs.h" - -/* Include SIMD, below we select kernels based on the SIMD width */ #include "gromacs/simd/simd.h" -#ifdef GMX_SIMD_REFERENCE +#if GMX_SIMD_REFERENCE #define GMX_NBNXN_SIMD #endif /* As we modularize the verlet kernels, we should remove stuff like this * that checks internal SIMD implementation details. */ -#if (defined GMX_SIMD_X86_SSE2) || (defined GMX_SIMD_X86_SSE4_1) || \ - (defined GMX_SIMD_X86_AVX_128_FMA) || (defined GMX_SIMD_X86_AVX_256) || \ - (defined GMX_SIMD_X86_AVX2_256) || (defined GMX_SIMD_IBM_QPX) +#if GMX_SIMD_X86_SSE2 || GMX_SIMD_X86_SSE4_1 || GMX_SIMD_X86_AVX_128_FMA || \ + GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256 || GMX_SIMD_IBM_QPX /* Use SIMD accelerated nbnxn search and kernels */ #define GMX_NBNXN_SIMD #endif /* MIC for double is implemented in the SIMD module but so far missing in mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils_x86_mic.h */ -#if defined GMX_SIMD_X86_MIC && !defined GMX_DOUBLE +#if GMX_SIMD_X86_MIC && !defined GMX_DOUBLE #define GMX_NBNXN_SIMD #endif diff --git a/src/gromacs/pbcutil/pbc-simd.cpp b/src/gromacs/pbcutil/pbc-simd.cpp index 7ede573517..4126be91f5 100644 --- a/src/gromacs/pbcutil/pbc-simd.cpp +++ b/src/gromacs/pbcutil/pbc-simd.cpp @@ -51,7 +51,7 @@ void set_pbc_simd(const t_pbc gmx_unused *pbc, pbc_simd_t gmx_unused *pbc_simd) { -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL rvec inv_box_diag; int d; diff --git a/src/gromacs/pbcutil/pbc-simd.h b/src/gromacs/pbcutil/pbc-simd.h index a8f90af4c1..48196a2306 100644 --- a/src/gromacs/pbcutil/pbc-simd.h +++ b/src/gromacs/pbcutil/pbc-simd.h @@ -64,7 +64,7 @@ extern "C" { * This can avoid some ifdef'ing. */ typedef struct { -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL gmx_simd_real_t inv_bzz; /**< 1/box[ZZ][ZZ] */ gmx_simd_real_t inv_byy; /**< 1/box[YY][YY] */ gmx_simd_real_t inv_bxx; /**< 1/box[XX][XX] */ @@ -88,7 +88,7 @@ typedef struct { void set_pbc_simd(const t_pbc *pbc, pbc_simd_t *pbc_simd); -#if defined GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /*! \brief Correct SIMD distance vector *dx,*dy,*dz for PBC using SIMD. * diff --git a/src/gromacs/simd/impl_arm_neon/impl_arm_neon.h b/src/gromacs/simd/impl_arm_neon/impl_arm_neon.h index 59d99d6300..289eb02da8 100644 --- a/src/gromacs/simd/impl_arm_neon/impl_arm_neon.h +++ b/src/gromacs/simd/impl_arm_neon/impl_arm_neon.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014, by the GROMACS development team, led by + * Copyright (c) 2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -46,30 +46,31 @@ */ /* Capability definitions for ARM 32-bit NEON */ -#define GMX_SIMD_HAVE_FLOAT -#undef GMX_SIMD_HAVE_DOUBLE -#define GMX_SIMD_HAVE_HARDWARE -#define GMX_SIMD_HAVE_LOADU -#define GMX_SIMD_HAVE_STOREU -#define GMX_SIMD_HAVE_LOGICAL -#define GMX_SIMD_HAVE_FMA -#undef GMX_SIMD_HAVE_FRACTION -#define GMX_SIMD_HAVE_FINT32 -#define GMX_SIMD_HAVE_FINT32_EXTRACT -#define GMX_SIMD_HAVE_FINT32_LOGICAL -#define GMX_SIMD_HAVE_FINT32_ARITHMETICS -#undef GMX_SIMD_HAVE_DINT32 -#undef GMX_SIMD_HAVE_DINT32_EXTRACT -#undef GMX_SIMD_HAVE_DINT32_LOGICAL -#undef GMX_SIMD_HAVE_DINT32_ARITHMETICS -#define GMX_SIMD4_HAVE_FLOAT -#undef GMX_SIMD4_HAVE_DOUBLE +#define GMX_SIMD 1 +#define GMX_SIMD_HAVE_FLOAT 1 +#define GMX_SIMD_HAVE_DOUBLE 0 +#define GMX_SIMD_HAVE_LOADU 1 +#define GMX_SIMD_HAVE_STOREU 1 +#define GMX_SIMD_HAVE_LOGICAL 1 +#define GMX_SIMD_HAVE_FMA 1 +#define GMX_SIMD_HAVE_FRACTION 0 +#define GMX_SIMD_HAVE_FINT32 1 +#define GMX_SIMD_HAVE_FINT32_EXTRACT 1 +#define GMX_SIMD_HAVE_FINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 1 +#define GMX_SIMD_HAVE_DINT32 0 +#define GMX_SIMD_HAVE_DINT32_EXTRACT 0 +#define GMX_SIMD_HAVE_DINT32_LOGICAL 0 +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 0 +#define GMX_SIMD4_HAVE_FLOAT 1 +#define GMX_SIMD4_HAVE_DOUBLE 0 /* Implementation details */ #define GMX_SIMD_FLOAT_WIDTH 4 #undef GMX_SIMD_DOUBLE_WIDTH #define GMX_SIMD_FINT32_WIDTH 4 #undef GMX_SIMD_DINT32_WIDTH +#define GMX_SIMD4_WIDTH 4 #define GMX_SIMD_RSQRT_BITS 8 #define GMX_SIMD_RCP_BITS 8 diff --git a/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd.h b/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd.h index 609354a723..700e87e7fc 100644 --- a/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd.h +++ b/src/gromacs/simd/impl_arm_neon_asimd/impl_arm_neon_asimd.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014, by the GROMACS development team, led by + * Copyright (c) 2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -49,11 +49,16 @@ #include "gromacs/simd/impl_arm_neon/impl_arm_neon.h" /* Override some capability definitions from ARM 32-bit NEON - we now have double */ -#define GMX_SIMD_HAVE_DOUBLE -#define GMX_SIMD_HAVE_DINT32 -#define GMX_SIMD_HAVE_DINT32_EXTRACT -#define GMX_SIMD_HAVE_DINT32_LOGICAL -#define GMX_SIMD_HAVE_DINT32_ARITHMETICS +#undef GMX_SIMD_HAVE_DOUBLE +#define GMX_SIMD_HAVE_DOUBLE 1 +#undef GMX_SIMD_HAVE_DINT32 +#define GMX_SIMD_HAVE_DINT32 1 +#undef GMX_SIMD_HAVE_DINT32_EXTRACT +#define GMX_SIMD_HAVE_DINT32_EXTRACT 1 +#undef GMX_SIMD_HAVE_DINT32_LOGICAL +#define GMX_SIMD_HAVE_DINT32_LOGICAL 1 +#undef GMX_SIMD_HAVE_DINT32_ARITHMETICS +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 1 /* Implementation details */ #define GMX_SIMD_DOUBLE_WIDTH 2 diff --git a/src/gromacs/simd/impl_ibm_qpx/impl_ibm_qpx.h b/src/gromacs/simd/impl_ibm_qpx/impl_ibm_qpx.h index bed7790e3b..ecf0d43b9f 100644 --- a/src/gromacs/simd/impl_ibm_qpx/impl_ibm_qpx.h +++ b/src/gromacs/simd/impl_ibm_qpx/impl_ibm_qpx.h @@ -49,30 +49,31 @@ * defines. */ /* Capability definitions for IBM QPX */ -#define GMX_SIMD_HAVE_FLOAT -#define GMX_SIMD_HAVE_DOUBLE -#define GMX_SIMD_HAVE_HARDWARE -#undef GMX_SIMD_HAVE_STOREU -#undef GMX_SIMD_HAVE_STOREU -#undef GMX_SIMD_HAVE_LOGICAL -#define GMX_SIMD_HAVE_FMA -#undef GMX_SIMD_HAVE_FRACTION -#define GMX_SIMD_HAVE_FINT32 -#undef GMX_SIMD_HAVE_FINT32_EXTRACT -#undef GMX_SIMD_HAVE_FINT32_LOGICAL -#undef GMX_SIMD_HAVE_FINT32_ARITHMETICS -#define GMX_SIMD_HAVE_DINT32 -#undef GMX_SIMD_HAVE_DINT32_EXTRACT -#undef GMX_SIMD_HAVE_DINT32_LOGICAL -#undef GMX_SIMD_HAVE_DINT32_ARITHMETICS -#define GMX_SIMD4_HAVE_FLOAT -#define GMX_SIMD4_HAVE_DOUBLE +#define GMX_SIMD 1 +#define GMX_SIMD_HAVE_FLOAT 1 +#define GMX_SIMD_HAVE_DOUBLE 1 +#define GMX_SIMD_HAVE_STOREU 0 +#define GMX_SIMD_HAVE_STOREU 0 +#define GMX_SIMD_HAVE_LOGICAL 0 +#define GMX_SIMD_HAVE_FMA 1 +#define GMX_SIMD_HAVE_FRACTION 0 +#define GMX_SIMD_HAVE_FINT32 1 +#define GMX_SIMD_HAVE_FINT32_EXTRACT 0 +#define GMX_SIMD_HAVE_FINT32_LOGICAL 0 +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 0 +#define GMX_SIMD_HAVE_DINT32 1 +#define GMX_SIMD_HAVE_DINT32_EXTRACT 0 +#define GMX_SIMD_HAVE_DINT32_LOGICAL 0 +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 0 +#define GMX_SIMD4_HAVE_FLOAT 1 +#define GMX_SIMD4_HAVE_DOUBLE 1 /* Implementation details */ #define GMX_SIMD_FLOAT_WIDTH 4 #define GMX_SIMD_DOUBLE_WIDTH 4 #define GMX_SIMD_FINT32_WIDTH 4 #define GMX_SIMD_DINT32_WIDTH 4 +#define GMX_SIMD4_WIDTH 4 #define GMX_SIMD_RSQRT_BITS 14 #define GMX_SIMD_RCP_BITS 14 diff --git a/src/gromacs/simd/impl_ibm_vmx/impl_ibm_vmx.h b/src/gromacs/simd/impl_ibm_vmx/impl_ibm_vmx.h index 3f35a7fce8..919e437c79 100644 --- a/src/gromacs/simd/impl_ibm_vmx/impl_ibm_vmx.h +++ b/src/gromacs/simd/impl_ibm_vmx/impl_ibm_vmx.h @@ -50,33 +50,34 @@ * defines. */ /* Capability definitions for IBM VMX */ -#define GMX_SIMD_HAVE_FLOAT -#undef GMX_SIMD_HAVE_DOUBLE -#define GMX_SIMD_HAVE_HARDWARE -#undef GMX_SIMD_HAVE_LOADU -#undef GMX_SIMD_HAVE_STOREU -#define GMX_SIMD_HAVE_LOGICAL +#define GMX_SIMD 1 +#define GMX_SIMD_HAVE_FLOAT 1 +#define GMX_SIMD_HAVE_DOUBLE 0 +#define GMX_SIMD_HAVE_LOADU 0 +#define GMX_SIMD_HAVE_STOREU 0 +#define GMX_SIMD_HAVE_LOGICAL 1 /* VMX only provides fmadd/fnmadd (our definitions), but not fmsub/fnmsub. * However, fnmadd is what we need for 1/sqrt(x). */ -#define GMX_SIMD_HAVE_FMA -#undef GMX_SIMD_HAVE_FRACTION -#define GMX_SIMD_HAVE_FINT32 -#undef GMX_SIMD_HAVE_FINT32_EXTRACT -#define GMX_SIMD_HAVE_FINT32_LOGICAL -#define GMX_SIMD_HAVE_FINT32_ARITHMETICS -#undef GMX_SIMD_HAVE_DINT32 -#undef GMX_SIMD_HAVE_DINT32_EXTRACT -#undef GMX_SIMD_HAVE_DINT32_LOGICAL -#undef GMX_SIMD_HAVE_DINT32_ARITHMETICS -#define GMX_SIMD4_HAVE_FLOAT -#undef GMX_SIMD4_HAVE_DOUBLE +#define GMX_SIMD_HAVE_FMA 1 +#define GMX_SIMD_HAVE_FRACTION 0 +#define GMX_SIMD_HAVE_FINT32 1 +#define GMX_SIMD_HAVE_FINT32_EXTRACT 0 +#define GMX_SIMD_HAVE_FINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 1 +#define GMX_SIMD_HAVE_DINT32 0 +#define GMX_SIMD_HAVE_DINT32_EXTRACT 0 +#define GMX_SIMD_HAVE_DINT32_LOGICAL 0 +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 0 +#define GMX_SIMD4_HAVE_FLOAT 1 +#define GMX_SIMD4_HAVE_DOUBLE 0 /* Implementation details */ #define GMX_SIMD_FLOAT_WIDTH 4 #undef GMX_SIMD_DOUBLE_WIDTH #define GMX_SIMD_FINT32_WIDTH 4 #undef GMX_SIMD_DINT32_WIDTH +#define GMX_SIMD4_WIDTH 4 #define GMX_SIMD_RSQRT_BITS 14 #define GMX_SIMD_RCP_BITS 14 diff --git a/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx.h b/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx.h index e553aa8522..4d9a98787e 100644 --- a/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx.h +++ b/src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx.h @@ -75,38 +75,45 @@ #endif /* Capability definitions for IBM VSX */ -#define GMX_SIMD_HAVE_FLOAT -#define GMX_SIMD_HAVE_HARDWARE -#define GMX_SIMD_HAVE_LOADU -#define GMX_SIMD_HAVE_STOREU -#define GMX_SIMD_HAVE_LOGICAL -#define GMX_SIMD_HAVE_FMA -#undef GMX_SIMD_HAVE_FRACTION -#define GMX_SIMD_HAVE_FINT32 -#define GMX_SIMD_HAVE_FINT32_EXTRACT -#define GMX_SIMD_HAVE_FINT32_LOGICAL -#define GMX_SIMD_HAVE_FINT32_ARITHMETICS +#define GMX_SIMD 1 +#define GMX_SIMD_HAVE_FLOAT 1 +#define GMX_SIMD_HAVE_LOADU 1 +#define GMX_SIMD_HAVE_STOREU 1 +#define GMX_SIMD_HAVE_LOGICAL 1 +#define GMX_SIMD_HAVE_FMA 1 +#define GMX_SIMD_HAVE_FRACTION 0 +#define GMX_SIMD_HAVE_FINT32 1 +#define GMX_SIMD_HAVE_FINT32_EXTRACT 1 +#define GMX_SIMD_HAVE_FINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 1 /* With GCC, only version 4.9 or later supports all parts of double precision VSX. * We check explicitly for xlc, since that compiler appears to like pretending it is gcc, * but there double precision seems to work fine. */ #if defined(__ibmxl__) || defined(__xlC__) || !(defined(__GNUC__) && ((__GNUC__ < 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 9)))) -# define GMX_SIMD_HAVE_DOUBLE -# define GMX_SIMD_HAVE_DINT32 -# define GMX_SIMD_HAVE_DINT32_EXTRACT -# define GMX_SIMD_HAVE_DINT32_LOGICAL -# define GMX_SIMD_HAVE_DINT32_ARITHMETICS +# define GMX_SIMD_HAVE_DOUBLE 1 +# define GMX_SIMD_HAVE_DINT32 1 +# define GMX_SIMD_HAVE_DINT32_EXTRACT 1 +# define GMX_SIMD_HAVE_DINT32_LOGICAL 1 +# define GMX_SIMD_HAVE_DINT32_ARITHMETICS 1 +#else +# define GMX_SIMD_HAVE_DOUBLE 0 +# define GMX_SIMD_HAVE_DINT32 0 +# define GMX_SIMD_HAVE_DINT32_EXTRACT 0 +# define GMX_SIMD_HAVE_DINT32_LOGICAL 0 +# define GMX_SIMD_HAVE_DINT32_ARITHMETICS 0 #endif -#define GMX_SIMD4_HAVE_FLOAT -#undef GMX_SIMD4_HAVE_DOUBLE +#define GMX_SIMD4_HAVE_FLOAT 1 +#define GMX_SIMD4_HAVE_DOUBLE 0 /* Implementation details */ #define GMX_SIMD_FLOAT_WIDTH 4 #define GMX_SIMD_DOUBLE_WIDTH 2 #define GMX_SIMD_FINT32_WIDTH 4 #define GMX_SIMD_DINT32_WIDTH 2 +#define GMX_SIMD4_WIDTH 4 #define GMX_SIMD_RSQRT_BITS 14 #define GMX_SIMD_RCP_BITS 14 diff --git a/src/gromacs/simd/impl_intel_mic/impl_intel_mic.h b/src/gromacs/simd/impl_intel_mic/impl_intel_mic.h index 0fd51fb399..d58f03b7fb 100644 --- a/src/gromacs/simd/impl_intel_mic/impl_intel_mic.h +++ b/src/gromacs/simd/impl_intel_mic/impl_intel_mic.h @@ -51,30 +51,31 @@ */ /* Capability definitions for Xeon Phi SIMD */ -#define GMX_SIMD_HAVE_FLOAT -#define GMX_SIMD_HAVE_DOUBLE -#define GMX_SIMD_HAVE_SIMD_HARDWARE -#define GMX_SIMD_HAVE_LOADU -#define GMX_SIMD_HAVE_STOREU -#define GMX_SIMD_HAVE_LOGICAL -#define GMX_SIMD_HAVE_FMA -#undef GMX_SIMD_HAVE_FRACTION -#define GMX_SIMD_HAVE_FINT32 -#define GMX_SIMD_HAVE_FINT32_EXTRACT -#define GMX_SIMD_HAVE_FINT32_LOGICAL -#define GMX_SIMD_HAVE_FINT32_ARITHMETICS -#define GMX_SIMD_HAVE_DINT32 -#define GMX_SIMD_HAVE_DINT32_EXTRACT -#define GMX_SIMD_HAVE_DINT32_LOGICAL -#define GMX_SIMD_HAVE_DINT32_ARITHMETICS -#define GMX_SIMD4_HAVE_FLOAT -#define GMX_SIMD4_HAVE_DOUBLE +#define GMX_SIMD 1 +#define GMX_SIMD_HAVE_FLOAT 1 +#define GMX_SIMD_HAVE_DOUBLE 1 +#define GMX_SIMD_HAVE_LOADU 1 +#define GMX_SIMD_HAVE_STOREU 1 +#define GMX_SIMD_HAVE_LOGICAL 1 +#define GMX_SIMD_HAVE_FMA 1 +#define GMX_SIMD_HAVE_FRACTION 0 +#define GMX_SIMD_HAVE_FINT32 1 +#define GMX_SIMD_HAVE_FINT32_EXTRACT 1 +#define GMX_SIMD_HAVE_FINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 1 +#define GMX_SIMD_HAVE_DINT32 1 +#define GMX_SIMD_HAVE_DINT32_EXTRACT 1 +#define GMX_SIMD_HAVE_DINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 1 +#define GMX_SIMD4_HAVE_FLOAT 1 +#define GMX_SIMD4_HAVE_DOUBLE 1 /* Implementation details */ #define GMX_SIMD_FLOAT_WIDTH 16 #define GMX_SIMD_DOUBLE_WIDTH 8 #define GMX_SIMD_FINT32_WIDTH 16 #define GMX_SIMD_DINT32_WIDTH 8 +#define GMX_SIMD4_WIDTH 4 #define GMX_SIMD_RSQRT_BITS 23 #define GMX_SIMD_RCP_BITS 23 diff --git a/src/gromacs/ewald/pme-simd.h b/src/gromacs/simd/impl_none/impl_none.h similarity index 57% copy from src/gromacs/ewald/pme-simd.h copy to src/gromacs/simd/impl_none/impl_none.h index 2b65de6d92..ceef6b47e6 100644 --- a/src/gromacs/ewald/pme-simd.h +++ b/src/gromacs/simd/impl_none/impl_none.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015, by the GROMACS development team, led by + * Copyright (c) 2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -32,32 +32,36 @@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifndef GMX_EWALD_PME_SIMD_H -#define GMX_EWALD_PME_SIMD_H -/* Include the SIMD macro file and then check for support */ -#include "gromacs/simd/simd.h" +#ifndef GMX_SIMD_IMPL_NONE_H +#define GMX_SIMD_IMPL_NONE_H -/* Check if we have 4-wide SIMD macro support */ -#if (defined GMX_SIMD4_HAVE_REAL) -/* Do PME spread and gather with 4-wide SIMD. - * NOTE: SIMD is only used with PME order 4 and 5 (which are the most common). - */ -# define PME_SIMD4_SPREAD_GATHER - -# if (defined GMX_SIMD_HAVE_LOADU) && (defined GMX_SIMD_HAVE_STOREU) -/* With PME-order=4 on x86, unaligned load+store is slightly faster - * than doubling all SIMD operations when using aligned load+store. - */ -# define PME_SIMD4_UNALIGNED -# endif -#endif +/* No SIMD implementation - assign 0 to all defines */ +#define GMX_SIMD 0 +#define GMX_SIMD_HAVE_FLOAT 0 +#define GMX_SIMD_HAVE_DOUBLE 0 +#define GMX_SIMD_HAVE_LOADU 0 +#define GMX_SIMD_HAVE_STOREU 0 +#define GMX_SIMD_HAVE_LOGICAL 0 +#define GMX_SIMD_HAVE_FMA 0 +#define GMX_SIMD_HAVE_FRACTION 0 +#define GMX_SIMD_HAVE_FINT32 0 +#define GMX_SIMD_HAVE_FINT32_EXTRACT 0 +#define GMX_SIMD_HAVE_FINT32_LOGICAL 0 +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 0 +#define GMX_SIMD_HAVE_DINT32 0 +#define GMX_SIMD_HAVE_DINT32_EXTRACT 0 +#define GMX_SIMD_HAVE_DINT32_LOGICAL 0 +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 0 +#define GMX_SIMD4_HAVE_FLOAT 0 +#define GMX_SIMD4_HAVE_DOUBLE 0 -#ifdef PME_SIMD4_SPREAD_GATHER -# define SIMD4_ALIGNMENT (GMX_SIMD4_WIDTH*sizeof(real)) -#else -/* We can use any alignment, apart from 0, so we use 4 reals */ -# define SIMD4_ALIGNMENT (4*sizeof(real)) -#endif +#undef GMX_SIMD_FLOAT_WIDTH +#undef GMX_SIMD_DOUBLE_WIDTH +#undef GMX_SIMD_FINT32_WIDTH +#undef GMX_SIMD_DINT32_WIDTH +#undef GMX_SIMD4_WIDTH +#undef GMX_SIMD_RSQRT_BITS +#undef GMX_SIMD_RCP_BITS -#endif +#endif /* GMX_SIMD_IMPL_NONE_H */ diff --git a/src/gromacs/simd/impl_reference/impl_reference.h b/src/gromacs/simd/impl_reference/impl_reference.h index 5c05040413..f57cae5e51 100644 --- a/src/gromacs/simd/impl_reference/impl_reference.h +++ b/src/gromacs/simd/impl_reference/impl_reference.h @@ -59,51 +59,52 @@ */ /*! \brief - * Defined when SIMD float support is present. + * GMX_SIMD indicates that some sort of SIMD support is present in software. + * + * It is 0 if no architecture (not even reference SIMD) has been enabled. + */ +#define GMX_SIMD 1 + +/*! \brief + * 1 when SIMD float support is present, otherwise 0 * * You should only use this to specifically check for single precision SIMD, * support, even when the rest of Gromacs uses double precision. * \sa GMX_SIMD_HAVE_REAL, GMX_SIMD_HAVE_DOUBLE */ -#define GMX_SIMD_HAVE_FLOAT - -/*! \brief Defined if SIMD double support is present. */ -#define GMX_SIMD_HAVE_DOUBLE +#define GMX_SIMD_HAVE_FLOAT 1 -/*! \brief Defined if SIMD is implemented with real hardware instructions. */ -#define GMX_SIMD_HAVE_HARDWARE /* For Doxygen */ -#undef GMX_SIMD_HAVE_HARDWARE /* Reference implementation setting */ +/*! \brief 1 if SIMD double support is present, otherwise 0 */ +#define GMX_SIMD_HAVE_DOUBLE 1 -/*! \brief Defined if the SIMD implementation supports unaligned loads. */ -#define GMX_SIMD_HAVE_LOADU +/*! \brief 1 if the SIMD implementation supports unaligned loads, otherwise 0 */ +#define GMX_SIMD_HAVE_LOADU 1 -/*! \brief Defined if the SIMD implementation supports unaligned stores. */ -#define GMX_SIMD_HAVE_STOREU +/*! \brief 1 if the SIMD implementation supports unaligned stores, otherwise 0 */ +#define GMX_SIMD_HAVE_STOREU 1 -/*! \brief Defined if SIMD implementation has logical operations on floating-point data. */ -#define GMX_SIMD_HAVE_LOGICAL +/*! \brief 1 if SIMD impl has logical operations on floating-point data, otherwise 0 */ +#define GMX_SIMD_HAVE_LOGICAL 1 -/*! \brief Defined if SIMD fused multiply-add uses hardware instructions */ -#define GMX_SIMD_HAVE_FMA /* For Doxygen */ -#undef GMX_SIMD_HAVE_FMA /* Reference implementation setting */ +/*! \brief 1 if SIMD fused multiply-add uses hardware instructions, otherwise 0 */ +#define GMX_SIMD_HAVE_FMA 0 -/*! \brief Defined if the SIMD fraction has a direct hardware instruction. */ -#define GMX_SIMD_HAVE_FRACTION /* For Doxygen */ -#undef GMX_SIMD_HAVE_FRACTION /* Reference implementation setting */ +/*! \brief 1 if the SIMD fraction has a direct hardware instruction, otherwise 0 */ +#define GMX_SIMD_HAVE_FRACTION 0 -/*! \brief Defined if the SIMD implementation has \ref gmx_simd_fint32_t. */ -#define GMX_SIMD_HAVE_FINT32 +/*! \brief 1 if the SIMD implementation has \ref gmx_simd_fint32_t, otherwise 0 */ +#define GMX_SIMD_HAVE_FINT32 1 -/*! \brief Support for extracting integers from \ref gmx_simd_fint32_t. */ -#define GMX_SIMD_HAVE_FINT32_EXTRACT +/*! \brief Support for extracting integers from \ref gmx_simd_fint32_t (1/0 for present/absent) */ +#define GMX_SIMD_HAVE_FINT32_EXTRACT 1 -/*! \brief Defined if SIMD logical operations are supported for \ref gmx_simd_fint32_t */ -#define GMX_SIMD_HAVE_FINT32_LOGICAL +/*! \brief 1 if SIMD logical ops are supported for \ref gmx_simd_fint32_t, otherwise 0 */ +#define GMX_SIMD_HAVE_FINT32_LOGICAL 1 -/*! \brief Defined if SIMD arithmetic operations are supported for \ref gmx_simd_fint32_t */ -#define GMX_SIMD_HAVE_FINT32_ARITHMETICS +/*! \brief 1 if SIMD arithmetic ops are supported for \ref gmx_simd_fint32_t, otherwise 0 */ +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 1 -/*! \brief Defined if the SIMD implementation has \ref gmx_simd_dint32_t. +/*! \brief 1 if the SIMD implementation has \ref gmx_simd_dint32_t, otherwise 0. * * \note The Gromacs SIMD module works entirely with 32 bit integers, both * in single and double precision, since some platforms do not support 64 bit @@ -124,22 +125,22 @@ * provide separate defines for the width of SIMD integer variables that you * should use. */ -#define GMX_SIMD_HAVE_DINT32 +#define GMX_SIMD_HAVE_DINT32 1 -/*! \brief Support for extracting integer from \ref gmx_simd_dint32_t */ -#define GMX_SIMD_HAVE_DINT32_EXTRACT +/*! \brief Support for extracting integer from \ref gmx_simd_dint32_t (1/0 for present/absent) */ +#define GMX_SIMD_HAVE_DINT32_EXTRACT 1 -/*! \brief Defined if logical operations are supported for \ref gmx_simd_dint32_t */ -#define GMX_SIMD_HAVE_DINT32_LOGICAL +/*! \brief 1 if logical operations are supported for \ref gmx_simd_dint32_t, otherwise 0 */ +#define GMX_SIMD_HAVE_DINT32_LOGICAL 1 -/*! \brief Defined if SIMD arithmetic operations are supported for \ref gmx_simd_dint32_t */ -#define GMX_SIMD_HAVE_DINT32_ARITHMETICS +/*! \brief 1 if SIMD arithmetic ops are supported for \ref gmx_simd_dint32_t, otherwise 0 */ +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 1 -/*! \brief Defined if the implementation provides \ref gmx_simd4_float_t. */ -#define GMX_SIMD4_HAVE_FLOAT +/*! \brief 1 if implementation provides \ref gmx_simd4_float_t, otherwise 0 */ +#define GMX_SIMD4_HAVE_FLOAT 1 -/*! \brief Defined if the implementation provides \ref gmx_simd4_double_t. */ -#define GMX_SIMD4_HAVE_DOUBLE +/*! \brief 1 if the implementation provides \ref gmx_simd4_double_t, otherwise 0 */ +#define GMX_SIMD4_HAVE_DOUBLE 1 #ifdef GMX_SIMD_REF_FLOAT_WIDTH # define GMX_SIMD_FLOAT_WIDTH GMX_SIMD_REF_FLOAT_WIDTH @@ -161,6 +162,13 @@ /*! \brief Width of the \ref gmx_simd_dint32_t datatype. */ #define GMX_SIMD_DINT32_WIDTH GMX_SIMD_DOUBLE_WIDTH +/*! \brief + * SIMD4 width is always 4, but use this for clarity in definitions. + * + * It improves code readability to allocate e.g. 2*GMX_SIMD4_WIDTH instead of 8. + */ +#define GMX_SIMD4_WIDTH 4 + /*! \brief Accuracy of SIMD 1/sqrt(x) lookup. Used to determine number of iterations. */ #define GMX_SIMD_RSQRT_BITS 23 @@ -877,7 +885,7 @@ gmx_simd_mul_f(gmx_simd_float_t a, gmx_simd_float_t b) * * You should typically call the real-precision \ref gmx_simd_fmadd_r. * - * If \ref GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction. + * If \ref GMX_SIMD_HAVE_FMA is 1 this is a single hardware instruction. * * \param a value * \param b value @@ -894,7 +902,7 @@ gmx_simd_mul_f(gmx_simd_float_t a, gmx_simd_float_t b) * * You should typically call the real-precision \ref gmx_simd_fmsub_r. * - * If \ref GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction. + * If \ref GMX_SIMD_HAVE_FMA is 1 this is a single hardware instruction. * * \param a value * \param b value @@ -911,7 +919,7 @@ gmx_simd_mul_f(gmx_simd_float_t a, gmx_simd_float_t b) * * You should typically call the real-precision \ref gmx_simd_fnmadd_r. * - * If \ref GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction. + * If \ref GMX_SIMD_HAVE_FMA is 1 this is a single hardware instruction. * * \param a value * \param b value @@ -928,7 +936,7 @@ gmx_simd_mul_f(gmx_simd_float_t a, gmx_simd_float_t b) * * You should typically call the real-precision \ref gmx_simd_fnmsub_r. * - * If \ref GMX_SIMD_HAVE_FMA is defined this is a single hardware instruction. + * If \ref GMX_SIMD_HAVE_FMA is 1 this is a single hardware instruction. * * \param a value * \param b value @@ -2113,7 +2121,7 @@ gmx_simd_reduce_d(gmx_simd_double_t a) * Logical shift. Each element is shifted (independently) up to 32 positions * left, while zeros are shifted in from the right. Only available if * \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single) or \ref GMX_SIMD_HAVE_DINT32_LOGICAL - * (double) is defined. + * (double) is 1. * * \param a integer data to shift * \param n number of positions to shift left. n<=32. @@ -2139,7 +2147,7 @@ gmx_simd_slli_fi(gmx_simd_fint32_t a, int n) * Logical shift. Each element is shifted (independently) up to 32 positions * right, while zeros are shifted in from the left. Only available if * \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single) or \ref GMX_SIMD_HAVE_DINT32_LOGICAL - * (double) is defined. + * (double) is 1. * * \param a integer data to shift * \param n number of positions to shift right. n<=32. @@ -2163,7 +2171,7 @@ gmx_simd_srli_fi(gmx_simd_fint32_t a, int n) * You should typically call the real-precision \ref gmx_simd_and_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single) - * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is 1. * * \note You can \a not use this operation directly to select based on a boolean * SIMD variable, since booleans are separate from integer SIMD. If that @@ -2191,7 +2199,7 @@ gmx_simd_and_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b) * You should typically call the real-precision \ref gmx_simd_andnot_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single) - * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is 1. * * Note that you can NOT use this operation directly to select based on a boolean * SIMD variable, since booleans are separate from integer SIMD. If that @@ -2219,7 +2227,7 @@ gmx_simd_andnot_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b) * You should typically call the real-precision \ref gmx_simd_or_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single) - * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is 1. * * \param a first integer SIMD * \param b second integer SIMD @@ -2243,7 +2251,7 @@ gmx_simd_or_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b) * You should typically call the real-precision \ref gmx_simd_xor_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL (single) - * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_LOGICAL (double) is 1. * * \param a first integer SIMD * \param b second integer SIMD @@ -2272,7 +2280,7 @@ gmx_simd_xor_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b) * You should typically call the real-precision \ref gmx_simd_xor_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) - * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1. * * \param a term1 * \param b term2 @@ -2296,7 +2304,7 @@ gmx_simd_add_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b) * You should typically call the real-precision \ref gmx_simd_xor_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) - * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1. * * \param a term1 * \param b term2 @@ -2320,7 +2328,7 @@ gmx_simd_sub_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b) * You should typically call the real-precision \ref gmx_simd_xor_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) - * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1. * * \param a factor1 * \param b factor2 @@ -2352,7 +2360,7 @@ gmx_simd_mul_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b) * You should typically call the real-precision \ref gmx_simd_cmpeq_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) - * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1. * * \param a SIMD integer1 * \param b SIMD integer2 @@ -2376,7 +2384,7 @@ gmx_simd_cmpeq_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b) * You should typically call the real-precision \ref gmx_simd_cmplt_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) - * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1. * * \param a SIMD integer1 * \param b SIMD integer2 @@ -2400,7 +2408,7 @@ gmx_simd_cmplt_fi(gmx_simd_fint32_t a, gmx_simd_fint32_t b) * You should typically call the real-precision \ref gmx_simd_and_ib. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) - * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1. * * \param a SIMD boolean 1 * \param b SIMD boolean 2 @@ -2424,7 +2432,7 @@ gmx_simd_and_fib(gmx_simd_fibool_t a, gmx_simd_fibool_t b) * You should typically call the real-precision \ref gmx_simd_or_ib. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) - * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1. * * \param a SIMD boolean 1 * \param b SIMD boolean 2 @@ -2448,7 +2456,7 @@ gmx_simd_or_fib(gmx_simd_fibool_t a, gmx_simd_fibool_t b) * You should typically call the real-precision \ref gmx_simd_anytrue_ib. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) - * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1. * * The actual return value for "any true" will depend on the architecture. * Any non-zero value should be considered truth. @@ -2475,7 +2483,7 @@ gmx_simd_anytrue_fib(gmx_simd_fibool_t a) * You should typically call the real-precision \ref gmx_simd_blendzero_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) - * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1. * * \param a SIMD integer to select from * \param sel Boolean selector @@ -2499,7 +2507,7 @@ gmx_simd_blendzero_fi(gmx_simd_fint32_t a, gmx_simd_fibool_t sel) * You should typically call the real-precision \ref gmx_simd_blendnotzero_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) - * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1. * * \param a SIMD integer to select from * \param sel Boolean selector @@ -2523,7 +2531,7 @@ gmx_simd_blendnotzero_fi(gmx_simd_fint32_t a, gmx_simd_fibool_t sel) * You should typically call the real-precision \ref gmx_simd_blendv_i. * * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single) - * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is defined. + * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1. * * \param a First source * \param b Second source @@ -3392,7 +3400,7 @@ gmx_simd4_dotproduct3_f(gmx_simd_float_t a, gmx_simd_float_t b) # define gmx_simd4_reduce_f gmx_simd_reduce_f #else /* GMX_SIMD_FLOAT_WIDTH!=4 */ -# undef GMX_SIMD4_HAVE_FLOAT +# define GMX_SIMD4_HAVE_FLOAT 0 #endif @@ -3599,7 +3607,7 @@ gmx_simd4_dotproduct3_d(gmx_simd_double_t a, gmx_simd_double_t b) # define gmx_simd4_reduce_d gmx_simd_reduce_d #else /* GMX_SIMD4_DOUBLE_WIDTH!=4 */ -# undef GMX_SIMD4_HAVE_DOUBLE +# define GMX_SIMD4_HAVE_DOUBLE 0 #endif /*! \} */ diff --git a/src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h b/src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h index e3f474c3bf..e873fe9336 100644 --- a/src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h +++ b/src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h @@ -69,30 +69,31 @@ * a single-precision interface where we only offer single-precision accuracy * in math functions - this can save quite a few cycles. */ -#define GMX_SIMD_HAVE_FLOAT -#define GMX_SIMD_HAVE_DOUBLE -#define GMX_SIMD_HAVE_HARDWARE -#undef GMX_SIMD_HAVE_LOADU -#undef GMX_SIMD_HAVE_STOREU -#define GMX_SIMD_HAVE_LOGICAL -#define GMX_SIMD_HAVE_FMA -#undef GMX_SIMD_HAVE_FRACTION -#define GMX_SIMD_HAVE_FINT32 -#define GMX_SIMD_HAVE_FINT32_EXTRACT -#define GMX_SIMD_HAVE_FINT32_LOGICAL -#undef GMX_SIMD_HAVE_FINT32_ARITHMETICS -#define GMX_SIMD_HAVE_DINT32 -#define GMX_SIMD_HAVE_DINT32_EXTRACT -#define GMX_SIMD_HAVE_DINT32_LOGICAL -#undef GMX_SIMD_HAVE_DINT32_ARITHMETICS -#undef GMX_SIMD4_HAVE_FLOAT -#undef GMX_SIMD4_HAVE_DOUBLE +#define GMX_SIMD 1 +#define GMX_SIMD_HAVE_FLOAT 1 +#define GMX_SIMD_HAVE_DOUBLE 1 +#define GMX_SIMD_HAVE_LOADU 0 +#define GMX_SIMD_HAVE_STOREU 0 +#define GMX_SIMD_HAVE_LOGICAL 1 +#define GMX_SIMD_HAVE_FMA 1 +#define GMX_SIMD_HAVE_FRACTION 0 +#define GMX_SIMD_HAVE_FINT32 1 +#define GMX_SIMD_HAVE_FINT32_EXTRACT 1 +#define GMX_SIMD_HAVE_FINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 0 +#define GMX_SIMD_HAVE_DINT32 1 +#define GMX_SIMD_HAVE_DINT32_EXTRACT 1 +#define GMX_SIMD_HAVE_DINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 0 +#define GMX_SIMD4_HAVE_FLOAT 0 +#define GMX_SIMD4_HAVE_DOUBLE 0 /* Implementation details */ #define GMX_SIMD_FLOAT_WIDTH 2 #define GMX_SIMD_DOUBLE_WIDTH 2 #define GMX_SIMD_FINT32_WIDTH 2 #define GMX_SIMD_DINT32_WIDTH 2 +#undef GMX_SIMD4_WIDTH #define GMX_SIMD_RSQRT_BITS 10 #define GMX_SIMD_RCP_BITS 9 diff --git a/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256.h b/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256.h index e19320add3..70eb7a4b8e 100644 --- a/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256.h +++ b/src/gromacs/simd/impl_x86_avx2_256/impl_x86_avx2_256.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014, by the GROMACS development team, led by + * Copyright (c) 2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -49,13 +49,14 @@ /* Inherit parts of AVX2_256 from AVX_256 */ #include "gromacs/simd/impl_x86_avx_256/impl_x86_avx_256.h" -/* Increment over AVX_256 capabilities */ -#define GMX_SIMD_X86_AVX2_256_OR_HIGHER /* Override some capability definitions for things added in AVX2 */ -#define GMX_SIMD_HAVE_FMA -#define GMX_SIMD_HAVE_FINT32_LOGICAL /* AVX2 adds 256-bit integer shifts */ -#define GMX_SIMD_HAVE_FINT32_ARITHMETICS /* AVX2 adds 256-bit integer +,-,* */ +#undef GMX_SIMD_HAVE_FMA +#define GMX_SIMD_HAVE_FMA 1 +#undef GMX_SIMD_HAVE_FINT32_LOGICAL +#define GMX_SIMD_HAVE_FINT32_LOGICAL 1 /* AVX2 adds 256-bit integer shifts */ +#undef GMX_SIMD_HAVE_FINT32_ARITHMETICS +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 1 /* AVX2 adds 256-bit integer +,-,* */ /**************************************************** * SINGLE PRECISION SIMD IMPLEMENTATION * diff --git a/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma.h b/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma.h index 1901b14576..508c9dbdfb 100644 --- a/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma.h +++ b/src/gromacs/simd/impl_x86_avx_128_fma/impl_x86_avx_128_fma.h @@ -50,13 +50,14 @@ /* Inherit parts of AVX_128_FMA from SSE4.1 */ #include "gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1.h" -/* Increment over SSE4.1 capabilities */ -#define GMX_SIMD_X86_AVX_128_FMA_OR_HIGHER /* Override some capability definitions for things added in AVX over SSE4.1 */ -#define GMX_SIMD_HAVE_FMA -#define GMX_SIMD_HAVE_FRACTION -#define GMX_SIMD4_HAVE_DOUBLE /* We can use 256-bit operations for this */ +#undef GMX_SIMD_HAVE_FMA +#define GMX_SIMD_HAVE_FMA 1 +#undef GMX_SIMD_HAVE_FRACTION +#define GMX_SIMD_HAVE_FRACTION 1 +#undef GMX_SIMD4_HAVE_DOUBLE +#define GMX_SIMD4_HAVE_DOUBLE 1 /* We can use 256-bit operations for this */ /* SINGLE */ #undef gmx_simd_fmadd_f diff --git a/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256.h b/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256.h index 499882a105..c12691d4c6 100644 --- a/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256.h +++ b/src/gromacs/simd/impl_x86_avx_256/impl_x86_avx_256.h @@ -40,45 +40,37 @@ #include -/* It is cleaner to start the AVX implementation from scratch rather than - * first inheriting from SSE4.1, which in turn inherits from SSE2. However, - * the capabilities still form a superset. - */ -#define GMX_SIMD_X86_SSE2_OR_HIGHER -#define GMX_SIMD_X86_SSE4_1_OR_HIGHER -#define GMX_SIMD_X86_AVX_256_OR_HIGHER - - /* x86 256-bit AVX SIMD instruction wrappers * * Please see documentation in gromacs/simd/simd.h for defines. */ /* Capability definitions for 256-bit AVX - no inheritance from SSE */ -#define GMX_SIMD_HAVE_FLOAT -#define GMX_SIMD_HAVE_DOUBLE -#define GMX_SIMD_HAVE_SIMD_HARDWARE -#define GMX_SIMD_HAVE_LOADU -#define GMX_SIMD_HAVE_STOREU -#define GMX_SIMD_HAVE_LOGICAL -#undef GMX_SIMD_HAVE_FMA -#undef GMX_SIMD_HAVE_FRACTION -#define GMX_SIMD_HAVE_FINT32 -#define GMX_SIMD_HAVE_FINT32_EXTRACT /* Emulated */ -#undef GMX_SIMD_HAVE_FINT32_LOGICAL /* AVX1 cannot do 256-bit int shifts */ -#undef GMX_SIMD_HAVE_FINT32_ARITHMETICS /* AVX1 cannot do 256-bit int +,-,* */ -#define GMX_SIMD_HAVE_DINT32 -#define GMX_SIMD_HAVE_DINT32_EXTRACT /* Native, dint uses 128-bit SIMD */ -#define GMX_SIMD_HAVE_DINT32_LOGICAL -#define GMX_SIMD_HAVE_DINT32_ARITHMETICS -#define GMX_SIMD4_HAVE_FLOAT -#define GMX_SIMD4_HAVE_DOUBLE +#define GMX_SIMD 1 +#define GMX_SIMD_HAVE_FLOAT 1 +#define GMX_SIMD_HAVE_DOUBLE 1 +#define GMX_SIMD_HAVE_LOADU 1 +#define GMX_SIMD_HAVE_STOREU 1 +#define GMX_SIMD_HAVE_LOGICAL 1 +#define GMX_SIMD_HAVE_FMA 0 +#define GMX_SIMD_HAVE_FRACTION 0 +#define GMX_SIMD_HAVE_FINT32 1 +#define GMX_SIMD_HAVE_FINT32_EXTRACT 1 /* Emulated */ +#define GMX_SIMD_HAVE_FINT32_LOGICAL 0 /* AVX1 cannot do 256-bit int shifts */ +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 0 /* AVX1 cannot do 256-bit int +,-,* */ +#define GMX_SIMD_HAVE_DINT32 1 +#define GMX_SIMD_HAVE_DINT32_EXTRACT 1 /* Native, dint uses 128-bit SIMD */ +#define GMX_SIMD_HAVE_DINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 1 +#define GMX_SIMD4_HAVE_FLOAT 1 +#define GMX_SIMD4_HAVE_DOUBLE 1 /* Implementation details */ #define GMX_SIMD_FLOAT_WIDTH 8 #define GMX_SIMD_DOUBLE_WIDTH 4 #define GMX_SIMD_FINT32_WIDTH 8 #define GMX_SIMD_DINT32_WIDTH 4 +#define GMX_SIMD4_WIDTH 4 #define GMX_SIMD_RSQRT_BITS 11 #define GMX_SIMD_RCP_BITS 11 diff --git a/src/gromacs/simd/impl_x86_avx_512f/impl_x86_avx_512f.h b/src/gromacs/simd/impl_x86_avx_512f/impl_x86_avx_512f.h index 0f7b387dd7..014c7e1bc2 100644 --- a/src/gromacs/simd/impl_x86_avx_512f/impl_x86_avx_512f.h +++ b/src/gromacs/simd/impl_x86_avx_512f/impl_x86_avx_512f.h @@ -65,34 +65,35 @@ */ /* Capability definitions for AVX-512 SIMD. */ -#define GMX_SIMD_HAVE_FLOAT -#define GMX_SIMD_HAVE_DOUBLE -#define GMX_SIMD_HAVE_SIMD_HARDWARE -#define GMX_SIMD_HAVE_LOADU -#define GMX_SIMD_HAVE_STOREU -#define GMX_SIMD_HAVE_LOGICAL -#define GMX_SIMD_HAVE_FMA -#undef GMX_SIMD_HAVE_FRACTION -#define GMX_SIMD_HAVE_FINT32 +#define GMX_SIMD 1 +#define GMX_SIMD_HAVE_FLOAT 1 +#define GMX_SIMD_HAVE_DOUBLE 1 +#define GMX_SIMD_HAVE_LOADU 1 +#define GMX_SIMD_HAVE_STOREU 1 +#define GMX_SIMD_HAVE_LOGICAL 1 +#define GMX_SIMD_HAVE_FMA 1 +#define GMX_SIMD_HAVE_FRACTION 0 +#define GMX_SIMD_HAVE_FINT32 1 /* Technically it is straightforward to emulate extract on AVX-512F through * memory operations, but when applied to 16 elements as part of a table lookup * it will be faster to just store the entire vector once, so we avoid setting it. */ -#undef GMX_SIMD_HAVE_FINT32_EXTRACT -#define GMX_SIMD_HAVE_FINT32_LOGICAL -#define GMX_SIMD_HAVE_FINT32_ARITHMETICS -#define GMX_SIMD_HAVE_DINT32 -#undef GMX_SIMD_HAVE_DINT32_EXTRACT -#define GMX_SIMD_HAVE_DINT32_LOGICAL -#define GMX_SIMD_HAVE_DINT32_ARITHMETICS -#define GMX_SIMD4_HAVE_FLOAT -#define GMX_SIMD4_HAVE_DOUBLE +#define GMX_SIMD_HAVE_FINT32_EXTRACT 0 +#define GMX_SIMD_HAVE_FINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 1 +#define GMX_SIMD_HAVE_DINT32 1 +#define GMX_SIMD_HAVE_DINT32_EXTRACT 0 +#define GMX_SIMD_HAVE_DINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 1 +#define GMX_SIMD4_HAVE_FLOAT 1 +#define GMX_SIMD4_HAVE_DOUBLE 1 /* Implementation details */ #define GMX_SIMD_FLOAT_WIDTH 16 #define GMX_SIMD_DOUBLE_WIDTH 8 #define GMX_SIMD_FINT32_WIDTH 16 #define GMX_SIMD_DINT32_WIDTH 8 +#define GMX_SIMD4_WIDTH 4 #define GMX_SIMD_RSQRT_BITS 14 #define GMX_SIMD_RCP_BITS 14 diff --git a/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2.h b/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2.h index 3151d97b3f..ca6b3f1257 100644 --- a/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2.h +++ b/src/gromacs/simd/impl_x86_sse2/impl_x86_sse2.h @@ -42,39 +42,37 @@ #include -/* Set capabilities that can be inherited */ -#define GMX_SIMD_X86_SSE2_OR_HIGHER - /* x86 SSE2 SIMD instruction wrappers * * Please see documentation in gromacs/simd/simd.h for defines. */ /* Capability definitions for SSE2 */ -#define GMX_SIMD_HAVE_FLOAT -#define GMX_SIMD_HAVE_DOUBLE -#define GMX_SIMD_HAVE_HARDWARE -#define GMX_SIMD_HAVE_LOADU -#define GMX_SIMD_HAVE_STOREU -#define GMX_SIMD_HAVE_LOGICAL -#undef GMX_SIMD_HAVE_FMA -#undef GMX_SIMD_HAVE_FRACTION -#define GMX_SIMD_HAVE_FINT32 -#define GMX_SIMD_HAVE_FINT32_EXTRACT /* No SSE2 instruction, but use shifts */ -#define GMX_SIMD_HAVE_FINT32_LOGICAL -#define GMX_SIMD_HAVE_FINT32_ARITHMETICS -#define GMX_SIMD_HAVE_DINT32 -#define GMX_SIMD_HAVE_DINT32_EXTRACT /* No SSE2 instruction, but use shifts */ -#define GMX_SIMD_HAVE_DINT32_LOGICAL -#define GMX_SIMD_HAVE_DINT32_ARITHMETICS -#define GMX_SIMD4_HAVE_FLOAT -#undef GMX_SIMD4_HAVE_DOUBLE +#define GMX_SIMD 1 +#define GMX_SIMD_HAVE_FLOAT 1 +#define GMX_SIMD_HAVE_DOUBLE 1 +#define GMX_SIMD_HAVE_LOADU 1 +#define GMX_SIMD_HAVE_STOREU 1 +#define GMX_SIMD_HAVE_LOGICAL 1 +#define GMX_SIMD_HAVE_FMA 0 +#define GMX_SIMD_HAVE_FRACTION 0 +#define GMX_SIMD_HAVE_FINT32 1 +#define GMX_SIMD_HAVE_FINT32_EXTRACT 1 /* No SSE2 instruction, but use shifts */ +#define GMX_SIMD_HAVE_FINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_FINT32_ARITHMETICS 1 +#define GMX_SIMD_HAVE_DINT32 1 +#define GMX_SIMD_HAVE_DINT32_EXTRACT 1 /* No SSE2 instruction, but use shifts */ +#define GMX_SIMD_HAVE_DINT32_LOGICAL 1 +#define GMX_SIMD_HAVE_DINT32_ARITHMETICS 1 +#define GMX_SIMD4_HAVE_FLOAT 1 +#define GMX_SIMD4_HAVE_DOUBLE 0 /* Implementation details */ #define GMX_SIMD_FLOAT_WIDTH 4 #define GMX_SIMD_DOUBLE_WIDTH 2 #define GMX_SIMD_FINT32_WIDTH 4 #define GMX_SIMD_DINT32_WIDTH 2 +#define GMX_SIMD4_WIDTH 4 #define GMX_SIMD_RSQRT_BITS 11 #define GMX_SIMD_RCP_BITS 11 diff --git a/src/gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1.h b/src/gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1.h index 8b8384a835..7e09684130 100644 --- a/src/gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1.h +++ b/src/gromacs/simd/impl_x86_sse4_1/impl_x86_sse4_1.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014, by the GROMACS development team, led by + * Copyright (c) 2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -50,12 +50,6 @@ /* Inherit most of SSE4.1 from SSE2 */ #include "gromacs/simd/impl_x86_sse2/impl_x86_sse2.h" -/* Increment over SSE2 capabilities */ -#define GMX_SIMD_X86_SSE4_1_OR_HIGHER - - -/* Override capability definitions from SSE2 */ -#define GMX_SIMD4_HAVE_FLOAT_DOTPRODUCT3 /* Almost all SSE4.1 instructions already exist in SSE2, but a few of them * can be implemented more efficiently in SSE4.1. diff --git a/src/gromacs/simd/simd.h b/src/gromacs/simd/simd.h index 58069bd909..46f9a69a4c 100644 --- a/src/gromacs/simd/simd.h +++ b/src/gromacs/simd/simd.h @@ -97,48 +97,40 @@ static gmx_inline double * gmx_simd4_align_d(double *p); * \{ */ -/*! \brief - * GMX_SIMD indicates that some sort of SIMD support is present in software. - * - * It is disabled if no architecture, neither reference SIMD, has been selected. - */ -#define GMX_SIMD - - /* Intel MIC is a bit special since it is a co-processor. This means the rest * of GROMACS (which runs on the CPU) can use a default SIMD set like AVX. * All functions in this SIMD module are static, so it will work perfectly fine * to include this file with different SIMD definitions for different files. */ -#if defined GMX_SIMD_X86_AVX_512ER +#if GMX_SIMD_X86_AVX_512ER # include "impl_x86_avx_512er/impl_x86_avx_512er.h" -#elif defined GMX_SIMD_X86_AVX_512F +#elif GMX_SIMD_X86_AVX_512F # include "impl_x86_avx_512f/impl_x86_avx_512f.h" -#elif defined GMX_SIMD_X86_MIC +#elif GMX_SIMD_X86_MIC # include "impl_intel_mic/impl_intel_mic.h" -#elif defined GMX_SIMD_X86_AVX2_256 +#elif GMX_SIMD_X86_AVX2_256 # include "impl_x86_avx2_256/impl_x86_avx2_256.h" -#elif defined GMX_SIMD_X86_AVX_256 +#elif GMX_SIMD_X86_AVX_256 # include "impl_x86_avx_256/impl_x86_avx_256.h" -#elif defined GMX_SIMD_X86_AVX_128_FMA +#elif GMX_SIMD_X86_AVX_128_FMA # include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h" -#elif defined GMX_SIMD_X86_SSE4_1 +#elif GMX_SIMD_X86_SSE4_1 # include "impl_x86_sse4_1/impl_x86_sse4_1.h" -#elif defined GMX_SIMD_X86_SSE2 +#elif GMX_SIMD_X86_SSE2 # include "impl_x86_sse2/impl_x86_sse2.h" -#elif defined GMX_SIMD_ARM_NEON +#elif GMX_SIMD_ARM_NEON # include "impl_arm_neon/impl_arm_neon.h" -#elif defined GMX_SIMD_ARM_NEON_ASIMD +#elif GMX_SIMD_ARM_NEON_ASIMD # include "impl_arm_neon_asimd/impl_arm_neon_asimd.h" -#elif defined GMX_SIMD_IBM_QPX +#elif GMX_SIMD_IBM_QPX # include "impl_ibm_qpx/impl_ibm_qpx.h" -#elif defined GMX_SIMD_IBM_VMX +#elif GMX_SIMD_IBM_VMX # include "impl_ibm_vmx/impl_ibm_vmx.h" -#elif defined GMX_SIMD_IBM_VSX +#elif GMX_SIMD_IBM_VSX # include "impl_ibm_vsx/impl_ibm_vsx.h" -#elif defined GMX_SIMD_SPARC64_HPC_ACE +#elif GMX_SIMD_SPARC64_HPC_ACE # include "impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h" -#elif (defined GMX_SIMD_REFERENCE) || (defined DOXYGEN) +#elif (GMX_SIMD_REFERENCE || defined DOXYGEN) /* Plain C SIMD reference implementation, also serves as documentation. * For now this code path will also be taken for Sparc64_HPC_ACE since we have * not yet added the verlet kernel extensions there. The group kernels do not @@ -146,16 +138,19 @@ static gmx_inline double * gmx_simd4_align_d(double *p); */ # include "impl_reference/impl_reference.h" #else -/* Turn off the GMX_SIMD flag if we do not even have reference support */ -# undef GMX_SIMD +# include "impl_none/impl_none.h" #endif -/*! \brief - * SIMD4 width is always 4, but use this for clarity in definitions. - * - * It improves code readability to allocate e.g. 2*GMX_SIMD4_WIDTH instead of 8. +/* These convenience macros are ugly hacks where some source files still make + * assumptions about the SIMD architecture. They will be removed as we implement + * the new verlet kernels, but for now we need them, and to make sure they + * always have values 0 or 1 we define them here rather than in the implementations. */ -#define GMX_SIMD4_WIDTH 4 +#define GMX_SIMD_X86_AVX2_256_OR_HIGHER (GMX_SIMD_X86_AVX2_256) +#define GMX_SIMD_X86_AVX_256_OR_HIGHER (GMX_SIMD_X86_AVX2_256_OR_HIGHER || GMX_SIMD_X86_AVX_256) +#define GMX_SIMD_X86_AVX_128_FMA_OR_HIGHER (GMX_SIMD_X86_AVX_128_FMA) +#define GMX_SIMD_X86_SSE4_1_OR_HIGHER (GMX_SIMD_X86_AVX_256_OR_HIGHER || GMX_SIMD_X86_AVX_128_FMA_OR_HIGHER || GMX_SIMD_X86_SSE4_1) +#define GMX_SIMD_X86_SSE2_OR_HIGHER (GMX_SIMD_X86_SSE4_1_OR_HIGHER || GMX_SIMD_X86_SSE2) /*! \} */ @@ -183,11 +178,11 @@ static gmx_inline double * gmx_simd4_align_d(double *p); static gmx_inline float * gmx_simd_align_f(float *p) { -# ifdef GMX_SIMD_HAVE_FLOAT +#if GMX_SIMD_HAVE_FLOAT return (float *)(((size_t)((p)+GMX_SIMD_FLOAT_WIDTH-1)) & (~((size_t)(GMX_SIMD_FLOAT_WIDTH*sizeof(float)-1)))); -# else +#else return p; -# endif +#endif } /*! \brief @@ -210,11 +205,11 @@ gmx_simd_align_f(float *p) static gmx_inline double * gmx_simd_align_d(double *p) { -# ifdef GMX_SIMD_HAVE_DOUBLE +#if GMX_SIMD_HAVE_DOUBLE return (double *)(((size_t)((p)+GMX_SIMD_DOUBLE_WIDTH-1)) & (~((size_t)(GMX_SIMD_DOUBLE_WIDTH*sizeof(double)-1)))); -# else +#else return p; -# endif +#endif } /*! \brief @@ -239,11 +234,11 @@ gmx_simd_align_d(double *p) static gmx_inline int * gmx_simd_align_fi(int *p) { -# ifdef GMX_SIMD_HAVE_FINT32 +#if GMX_SIMD_HAVE_FINT32 return (int *)(((size_t)((p)+GMX_SIMD_FINT32_WIDTH-1)) & (~((size_t)(GMX_SIMD_FINT32_WIDTH*sizeof(int)-1)))); -# else +#else return p; -# endif +#endif } /*! \brief @@ -268,11 +263,11 @@ gmx_simd_align_fi(int *p) static gmx_inline int * gmx_simd_align_di(int *p) { -# ifdef GMX_SIMD_HAVE_DINT32 +#if GMX_SIMD_HAVE_DINT32 return (int *)(((size_t)((p)+GMX_SIMD_DINT32_WIDTH-1)) & (~((size_t)(GMX_SIMD_DINT32_WIDTH*sizeof(int)-1)))); -# else +#else return p; -# endif +#endif } /*! \brief @@ -293,11 +288,11 @@ gmx_simd_align_di(int *p) static gmx_inline float * gmx_simd4_align_f(float *p) { -# ifdef GMX_SIMD4_HAVE_FLOAT +#if GMX_SIMD4_HAVE_FLOAT return (float *)(((size_t)((p)+GMX_SIMD4_WIDTH-1)) & (~((size_t)(GMX_SIMD4_WIDTH*sizeof(float)-1)))); -# else +#else return p; -# endif +#endif } /*! \brief @@ -318,11 +313,11 @@ gmx_simd4_align_f(float *p) static gmx_inline double * gmx_simd4_align_d(double *p) { -# ifdef GMX_SIMD4_HAVE_DOUBLE +#if GMX_SIMD4_HAVE_DOUBLE return (double *)(((size_t)((p)+GMX_SIMD4_WIDTH-1)) & (~((size_t)(GMX_SIMD4_WIDTH*sizeof(double)-1)))); -# else +#else return p; -# endif +#endif } /*! \} */ @@ -456,26 +451,14 @@ gmx_simd4_align_d(double *p) # define gmx_simd_align_i gmx_simd_align_di # define gmx_simd4_align_r gmx_simd4_align_d -# ifdef GMX_SIMD_HAVE_DOUBLE -# define GMX_SIMD_HAVE_REAL -# define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH -# endif -# ifdef GMX_SIMD_HAVE_DINT32 -# define GMX_SIMD_HAVE_INT32 -# define GMX_SIMD_INT32_WIDTH GMX_SIMD_DINT32_WIDTH -# endif -# ifdef GMX_SIMD_HAVE_DINT32_EXTRACT -# define GMX_SIMD_HAVE_INT32_EXTRACT -# endif -# ifdef GMX_SIMD_HAVE_DINT32_LOGICAL -# define GMX_SIMD_HAVE_INT32_LOGICAL -# endif -# ifdef GMX_SIMD_HAVE_DINT32_ARITHMETICS -# define GMX_SIMD_HAVE_INT32_ARITHMETICS -# endif -# ifdef GMX_SIMD4_HAVE_DOUBLE -# define GMX_SIMD4_HAVE_REAL -# endif +# define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_DOUBLE +# define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH +# define GMX_SIMD_HAVE_INT32 GMX_SIMD_HAVE_DINT32 +# define GMX_SIMD_INT32_WIDTH GMX_SIMD_DINT32_WIDTH +# define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_DINT32_EXTRACT +# define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_DINT32_LOGICAL +# define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_DINT32_ARITHMETICS +# define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_DOUBLE #else /* GMX_DOUBLE */ @@ -488,7 +471,7 @@ gmx_simd4_align_d(double *p) */ /*! \brief Real precision floating-point SIMD datatype. * - * This type is only available if \ref GMX_SIMD_HAVE_REAL is defined. + * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1. * * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_double_t * internally, otherwise \ref gmx_simd_float_t. @@ -497,7 +480,7 @@ gmx_simd4_align_d(double *p) /*! \brief 32-bit integer SIMD type. * - * This type is only available if \ref GMX_SIMD_HAVE_INT32 is defined. + * This type is only available if \ref GMX_SIMD_HAVE_INT32 is 1. * * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dint32_t * internally, otherwise \ref gmx_simd_fint32_t. This might seem a strange @@ -510,7 +493,7 @@ gmx_simd4_align_d(double *p) /*! \brief Boolean SIMD type for usage with \ref gmx_simd_real_t. * - * This type is only available if \ref GMX_SIMD_HAVE_REAL is defined. + * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1. * * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dbool_t * internally, otherwise \ref gmx_simd_fbool_t. This is necessary since some @@ -524,7 +507,7 @@ gmx_simd4_align_d(double *p) /*! \brief Boolean SIMD type for usage with \ref gmx_simd_int32_t. * - * This type is only available if \ref GMX_SIMD_HAVE_INT32 is defined. + * This type is only available if \ref GMX_SIMD_HAVE_INT32 is 1. * * If GMX_DOUBLE is defined, this will be set to \ref gmx_simd_dibool_t * internally, otherwise \ref gmx_simd_fibool_t. This is necessary since some @@ -689,7 +672,7 @@ gmx_simd4_align_d(double *p) /*! \} * \name SIMD floating-point logical operations on gmx_simd_real_t * - * These instructions are available if \ref GMX_SIMD_HAVE_LOGICAL is defined. + * These instructions are available if \ref GMX_SIMD_HAVE_LOGICAL is 1. * \{ */ @@ -883,7 +866,7 @@ gmx_simd4_align_d(double *p) * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_get_exponent_d, * otherwise \ref gmx_simd_get_exponent_f. * - * \copydetails gmx_simd_exponent_f + * \copydetails gmx_simd_get_exponent_f */ # define gmx_simd_get_exponent_r gmx_simd_get_exponent_f @@ -892,7 +875,7 @@ gmx_simd4_align_d(double *p) * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_get_mantissa_d, * otherwise \ref gmx_simd_get_mantissa_f. * - * \copydetails gmx_simd_mantissa_f + * \copydetails gmx_simd_get_mantissa_f */ # define gmx_simd_get_mantissa_r gmx_simd_get_mantissa_f @@ -951,7 +934,7 @@ gmx_simd4_align_d(double *p) * If GMX_DOUBLE is defined, this will be aliased to \ref gmx_simd_or_db, * otherwise \ref gmx_simd_or_fb. * - * \copydetails gmx_simd_or_fn + * \copydetails gmx_simd_or_fb */ # define gmx_simd_or_b gmx_simd_or_fb @@ -1005,7 +988,7 @@ gmx_simd4_align_d(double *p) /*! \} * \name SIMD integer logical operations on gmx_simd_int32_t * - * These instructions are available if \ref GMX_SIMD_HAVE_INT32_LOGICAL is defined. + * These instructions are available if \ref GMX_SIMD_HAVE_INT32_LOGICAL is 1. * \{ */ @@ -1066,7 +1049,7 @@ gmx_simd4_align_d(double *p) /*! \} * \name SIMD integer arithmetic operations on gmx_simd_int32_t * - * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is defined. + * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1. * \{ */ @@ -1100,7 +1083,7 @@ gmx_simd4_align_d(double *p) /*! \} * \name SIMD integer comparison, booleans, and selection on gmx_simd_int32_t * - * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is defined. + * These instructions are available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1. * \{ */ @@ -1180,8 +1163,8 @@ gmx_simd4_align_d(double *p) * \name SIMD conversion operations * * These instructions are available when both types involved in the conversion - * are defined, e.g. \ref GMX_SIMD_HAVE_REAL and \ref GMX_SIMD_HAVE_INT32 - * for real-to-integer conversion. + * are defined, e.g. if \ref GMX_SIMD_HAVE_REAL and \ref GMX_SIMD_HAVE_INT32 + * are 1 for real-to-integer conversion. * \{ */ @@ -1238,7 +1221,7 @@ gmx_simd4_align_d(double *p) /*! \brief Align real memory for SIMD usage. * - * This routine will only align memory if \ref GMX_SIMD_HAVE_REAL is defined. + * This routine will only align memory if \ref GMX_SIMD_HAVE_REAL is 1. * Otherwise the original pointer will be returned. * * Start by allocating an extra \ref GMX_SIMD_REAL_WIDTH float elements of memory, @@ -1254,7 +1237,7 @@ gmx_simd4_align_d(double *p) /*! \brief Align integer memory for SIMD usage. * - * This routine will only align memory if \ref GMX_SIMD_HAVE_INT32 is defined. + * This routine will only align memory if \ref GMX_SIMD_HAVE_INT32 is 1. * Otherwise the original pointer will be returned. * * Start by allocating an extra \ref GMX_SIMD_INT32_WIDTH elements of memory, @@ -1525,66 +1508,62 @@ gmx_simd4_align_d(double *p) * \{ */ -# if (defined GMX_SIMD_HAVE_FLOAT) || (defined DOXYGEN) -/*! \brief Defined if gmx_simd_real_t is available. +/*! \brief 1 if gmx_simd_real_t is available, otherwise 0. * * if GMX_DOUBLE is defined, this will be aliased to * \ref GMX_SIMD_HAVE_DOUBLE, otherwise GMX_SIMD_HAVE_FLOAT. */ -# define GMX_SIMD_HAVE_REAL +# define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_FLOAT + /*! \brief Width of gmx_simd_real_t. * * if GMX_DOUBLE is defined, this will be aliased to * \ref GMX_SIMD_DOUBLE_WIDTH, otherwise GMX_SIMD_FLOAT_WIDTH. */ -# define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH -# endif -# if (defined GMX_SIMD_HAVE_FINT32) || (defined DOXYGEN) -/*! \brief Defined if gmx_simd_int32_t is available. +# define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH + +/*! \brief 1 if gmx_simd_int32_t is available, otherwise 0. * * if GMX_DOUBLE is defined, this will be aliased to * \ref GMX_SIMD_HAVE_DINT32, otherwise GMX_SIMD_HAVE_FINT32. */ -# define GMX_SIMD_HAVE_INT32 +# define GMX_SIMD_HAVE_INT32 GMX_SIMD_HAVE_FINT32 + /*! \brief Width of gmx_simd_int32_t. * * if GMX_DOUBLE is defined, this will be aliased to * \ref GMX_SIMD_DINT32_WIDTH, otherwise GMX_SIMD_FINT32_WIDTH. */ -# define GMX_SIMD_INT32_WIDTH GMX_SIMD_FINT32_WIDTH -# endif -# if (defined GMX_SIMD_HAVE_FINT32_EXTRACT) || (defined DOXYGEN) -/*! \brief Defined if gmx_simd_extract_i() is available. +# define GMX_SIMD_INT32_WIDTH GMX_SIMD_FINT32_WIDTH + +/*! \brief 1 if gmx_simd_extract_i() is available, otherwise 0. * - * if GMX_DOUBLE is defined, this will be aliased to + * if GMX_DOUBLE is defined, this will correspond to * \ref GMX_SIMD_HAVE_DINT32_EXTRACT, otherwise GMX_SIMD_HAVE_FINT32_EXTRACT. */ -# define GMX_SIMD_HAVE_INT32_EXTRACT -# endif -# if (defined GMX_SIMD_HAVE_FINT32_LOGICAL) || (defined DOXYGEN) -/*! \brief Defined if logical ops are supported on gmx_simd_int32_t. +# define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_FINT32_EXTRACT + +/*! \brief 1 if logical ops are supported on gmx_simd_int32_t, otherwise 0. * - * if GMX_DOUBLE is defined, this will be aliased to + * if GMX_DOUBLE is defined, this will correspond to * \ref GMX_SIMD_HAVE_DINT32_LOGICAL, otherwise GMX_SIMD_HAVE_FINT32_LOGICAL. */ -# define GMX_SIMD_HAVE_INT32_LOGICAL -# endif -# if (defined GMX_SIMD_HAVE_FINT32_ARITHMETICS) || (defined DOXYGEN) -/*! \brief Defined if arithmetic ops are supported on gmx_simd_int32_t. +# define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_FINT32_LOGICAL + +/*! \brief 1 if arithmetic ops are supported on gmx_simd_int32_t, otherwise 0. * * if GMX_DOUBLE is defined, this will be aliased to * \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS, otherwise GMX_SIMD_HAVE_FINT32_ARITHMETICS. */ -# define GMX_SIMD_HAVE_INT32_ARITHMETICS -# endif -# if (defined GMX_SIMD4_HAVE_FLOAT) || (defined DOXYGEN) -/*! \brief Defined if gmx_simd4_real_t is available. +# define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_FINT32_ARITHMETICS + +/*! \brief 1 if gmx_simd4_real_t is available, otherwise 0. * * if GMX_DOUBLE is defined, this will be aliased to * \ref GMX_SIMD4_HAVE_DOUBLE, otherwise GMX_SIMD4_HAVE_FLOAT. */ -# define GMX_SIMD4_HAVE_REAL -# endif +# define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_FLOAT + /*! \} */ @@ -1611,8 +1590,8 @@ gmx_simd4_align_d(double *p) If there's ever other kinds of SIMD code that might have the same problem, we might want to add other variables here. */ -# define GMX_SIMD_HAVE_FLOAT -# define GMX_SIMD_HAVE_DOUBLE +# define GMX_SIMD_HAVE_FLOAT 1 +# define GMX_SIMD_HAVE_DOUBLE 1 #endif /* 0 */ diff --git a/src/gromacs/simd/simd_math.h b/src/gromacs/simd/simd_math.h index 2daf49e33d..81f5099c6f 100644 --- a/src/gromacs/simd/simd_math.h +++ b/src/gromacs/simd/simd_math.h @@ -63,6 +63,8 @@ #include "gromacs/simd/simd.h" #include "gromacs/utility/real.h" +#if GMX_SIMD + /*! \cond libapi */ /*! \addtogroup module_simd */ /*! \{ */ @@ -73,7 +75,7 @@ /*! \} */ -#ifdef GMX_SIMD_HAVE_FLOAT +#if GMX_SIMD_HAVE_FLOAT /*! \name Single precision SIMD math functions * @@ -117,7 +119,7 @@ gmx_simd_sum4_f(gmx_simd_float_t a, gmx_simd_float_t b, static gmx_inline gmx_simd_float_t gmx_simdcall gmx_simd_xor_sign_f(gmx_simd_float_t a, gmx_simd_float_t b) { -#ifdef GMX_SIMD_HAVE_LOGICAL +#if GMX_SIMD_HAVE_LOGICAL return gmx_simd_xor_f(a, gmx_simd_and_f(gmx_simd_set1_f(GMX_FLOAT_NEGZERO), b)); #else return gmx_simd_blendv_f(a, gmx_simd_fneg_f(a), gmx_simd_cmplt_f(b, gmx_simd_setzero_f())); @@ -137,7 +139,7 @@ gmx_simd_xor_sign_f(gmx_simd_float_t a, gmx_simd_float_t b) static gmx_inline gmx_simd_float_t gmx_simdcall gmx_simd_rsqrt_iter_f(gmx_simd_float_t lu, gmx_simd_float_t x) { -# ifdef GMX_SIMD_HAVE_FMA +# if GMX_SIMD_HAVE_FMA return gmx_simd_fmadd_f(gmx_simd_fnmadd_f(x, gmx_simd_mul_f(lu, lu), gmx_simd_set1_f(1.0f)), gmx_simd_mul_f(lu, gmx_simd_set1_f(0.5f)), lu); # else return gmx_simd_mul_f(gmx_simd_set1_f(0.5f), gmx_simd_mul_f(gmx_simd_sub_f(gmx_simd_set1_f(3.0f), gmx_simd_mul_f(gmx_simd_mul_f(lu, lu), x)), lu)); @@ -649,7 +651,7 @@ gmx_simd_erfc_f(gmx_simd_float_t x) * fp numbers, and perform a logical or. Since the expression is constant, * we can at least hope it is evaluated at compile-time. */ -#ifdef GMX_SIMD_HAVE_LOGICAL +#if GMX_SIMD_HAVE_LOGICAL const gmx_simd_float_t sieve = gmx_simd_or_f(gmx_simd_set1_f(-5.965323564e+29f), gmx_simd_set1_f(7.05044434e-30f)); #else const int isieve = 0xFFFFF000; @@ -707,7 +709,7 @@ gmx_simd_erfc_f(gmx_simd_float_t x) * in double, but we still need memory as a backup when that is not available, * and this case is rare enough that we go directly there... */ -#ifdef GMX_SIMD_HAVE_LOGICAL +#if GMX_SIMD_HAVE_LOGICAL z = gmx_simd_and_f(y, sieve); #else gmx_simd_store_f(pmem, y); @@ -798,7 +800,7 @@ gmx_simd_sincos_f(gmx_simd_float_t x, gmx_simd_float_t *sinval, gmx_simd_float_t gmx_simd_float_t ssign, csign; gmx_simd_float_t x2, y, z, psin, pcos, sss, ccc; gmx_simd_fbool_t mask; -#if (defined GMX_SIMD_HAVE_FINT32) && (defined GMX_SIMD_HAVE_FINT32_ARITHMETICS) && (defined GMX_SIMD_HAVE_LOGICAL) +#if GMX_SIMD_HAVE_FINT32 && GMX_SIMD_HAVE_FINT32_ARITHMETICS && GMX_SIMD_HAVE_LOGICAL const gmx_simd_fint32_t ione = gmx_simd_set1_fi(1); const gmx_simd_fint32_t itwo = gmx_simd_set1_fi(2); gmx_simd_fint32_t iy; @@ -844,7 +846,7 @@ gmx_simd_sincos_f(gmx_simd_float_t x, gmx_simd_float_t *sinval, gmx_simd_float_t * two GMX_SIMD_HAVE_LOGICAL sections in this routine must either both be * active or inactive - you will get errors if only one is used. */ -# ifdef GMX_SIMD_HAVE_LOGICAL +# if GMX_SIMD_HAVE_LOGICAL ssign = gmx_simd_and_f(ssign, gmx_simd_set1_f(GMX_FLOAT_NEGZERO)); csign = gmx_simd_andnot_f(q, gmx_simd_set1_f(GMX_FLOAT_NEGZERO)); ssign = gmx_simd_xor_f(ssign, csign); @@ -880,7 +882,7 @@ gmx_simd_sincos_f(gmx_simd_float_t x, gmx_simd_float_t *sinval, gmx_simd_float_t sss = gmx_simd_blendv_f(pcos, psin, mask); ccc = gmx_simd_blendv_f(psin, pcos, mask); /* See comment for GMX_SIMD_HAVE_LOGICAL section above. */ -#ifdef GMX_SIMD_HAVE_LOGICAL +#if GMX_SIMD_HAVE_LOGICAL *sinval = gmx_simd_xor_f(sss, ssign); *cosval = gmx_simd_xor_f(ccc, csign); #else @@ -950,7 +952,7 @@ gmx_simd_tan_f(gmx_simd_float_t x) gmx_simd_float_t x2, p, y, z; gmx_simd_fbool_t mask; -#if (defined GMX_SIMD_HAVE_FINT32) && (defined GMX_SIMD_HAVE_FINT32_ARITHMETICS) && (defined GMX_SIMD_HAVE_LOGICAL) +#if GMX_SIMD_HAVE_FINT32 && GMX_SIMD_HAVE_FINT32_ARITHMETICS && GMX_SIMD_HAVE_LOGICAL gmx_simd_fint32_t iy; gmx_simd_fint32_t ione = gmx_simd_set1_fi(1); @@ -1381,7 +1383,7 @@ gmx_simd_pmecorrV_f(gmx_simd_float_t z2) /*! \} */ -#ifdef GMX_SIMD_HAVE_DOUBLE +#if GMX_SIMD_HAVE_DOUBLE /*! \name Double precision SIMD math functions * @@ -1419,7 +1421,7 @@ gmx_simd_sum4_d(gmx_simd_double_t a, gmx_simd_double_t b, static gmx_inline gmx_simd_double_t gmx_simdcall gmx_simd_xor_sign_d(gmx_simd_double_t a, gmx_simd_double_t b) { -#ifdef GMX_SIMD_HAVE_LOGICAL +#if GMX_SIMD_HAVE_LOGICAL return gmx_simd_xor_d(a, gmx_simd_and_d(gmx_simd_set1_d(GMX_DOUBLE_NEGZERO), b)); #else return gmx_simd_blendv_d(a, gmx_simd_fneg_d(a), gmx_simd_cmplt_d(b, gmx_simd_setzero_d())); @@ -1434,7 +1436,7 @@ gmx_simd_xor_sign_d(gmx_simd_double_t a, gmx_simd_double_t b) static gmx_inline gmx_simd_double_t gmx_simdcall gmx_simd_rsqrt_iter_d(gmx_simd_double_t lu, gmx_simd_double_t x) { -#ifdef GMX_SIMD_HAVE_FMA +#if GMX_SIMD_HAVE_FMA return gmx_simd_fmadd_d(gmx_simd_fnmadd_d(x, gmx_simd_mul_d(lu, lu), gmx_simd_set1_d(1.0)), gmx_simd_mul_d(lu, gmx_simd_set1_d(0.5)), lu); #else return gmx_simd_mul_d(gmx_simd_set1_d(0.5), gmx_simd_mul_d(gmx_simd_sub_d(gmx_simd_set1_d(3.0), gmx_simd_mul_d(gmx_simd_mul_d(lu, lu), x)), lu)); @@ -1501,7 +1503,7 @@ static gmx_inline void gmx_simdcall gmx_simd_invsqrt_pair_d(gmx_simd_double_t x0, gmx_simd_double_t x1, gmx_simd_double_t *out0, gmx_simd_double_t *out1) { -#if (defined GMX_SIMD_HAVE_FLOAT) && (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH) && (GMX_SIMD_RSQRT_BITS < 22) +#if GMX_SIMD_HAVE_FLOAT && (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH) && (GMX_SIMD_RSQRT_BITS < 22) gmx_simd_float_t xf = gmx_simd_cvt_dd2f(x0, x1); gmx_simd_float_t luf = gmx_simd_rsqrt_f(xf); gmx_simd_double_t lu0, lu1; @@ -2156,7 +2158,7 @@ gmx_simd_sincos_d(gmx_simd_double_t x, gmx_simd_double_t *sinval, gmx_simd_doubl gmx_simd_double_t ssign, csign; gmx_simd_double_t x2, y, z, psin, pcos, sss, ccc; gmx_simd_dbool_t mask; -#if (defined GMX_SIMD_HAVE_DINT32) && (defined GMX_SIMD_HAVE_DINT32_ARITHMETICS) && (defined GMX_SIMD_HAVE_LOGICAL) +#if GMX_SIMD_HAVE_DINT32 && GMX_SIMD_HAVE_DINT32_ARITHMETICS && GMX_SIMD_HAVE_LOGICAL const gmx_simd_dint32_t ione = gmx_simd_set1_di(1); const gmx_simd_dint32_t itwo = gmx_simd_set1_di(2); gmx_simd_dint32_t iy; @@ -2202,7 +2204,7 @@ gmx_simd_sincos_d(gmx_simd_double_t x, gmx_simd_double_t *sinval, gmx_simd_doubl * two GMX_SIMD_HAVE_LOGICAL sections in this routine must either both be * active or inactive - you will get errors if only one is used. */ -# ifdef GMX_SIMD_HAVE_LOGICAL +# if GMX_SIMD_HAVE_LOGICAL ssign = gmx_simd_and_d(ssign, gmx_simd_set1_d(GMX_DOUBLE_NEGZERO)); csign = gmx_simd_andnot_d(q, gmx_simd_set1_d(GMX_DOUBLE_NEGZERO)); ssign = gmx_simd_xor_d(ssign, csign); @@ -2243,7 +2245,7 @@ gmx_simd_sincos_d(gmx_simd_double_t x, gmx_simd_double_t *sinval, gmx_simd_doubl sss = gmx_simd_blendv_d(pcos, psin, mask); ccc = gmx_simd_blendv_d(psin, pcos, mask); /* See comment for GMX_SIMD_HAVE_LOGICAL section above. */ -#ifdef GMX_SIMD_HAVE_LOGICAL +#if GMX_SIMD_HAVE_LOGICAL *sinval = gmx_simd_xor_d(sss, ssign); *cosval = gmx_simd_xor_d(ccc, csign); #else @@ -2307,7 +2309,7 @@ gmx_simd_tan_d(gmx_simd_double_t x) gmx_simd_double_t x2, p, y, z; gmx_simd_dbool_t mask; -#if (defined GMX_SIMD_HAVE_DINT32) && (defined GMX_SIMD_HAVE_DINT32_ARITHMETICS) && (defined GMX_SIMD_HAVE_LOGICAL) +#if GMX_SIMD_HAVE_DINT32 && GMX_SIMD_HAVE_DINT32_ARITHMETICS && GMX_SIMD_HAVE_LOGICAL gmx_simd_dint32_t iy; gmx_simd_dint32_t ione = gmx_simd_set1_di(1); @@ -2848,7 +2850,7 @@ static gmx_inline void gmx_simdcall gmx_simd_invsqrt_pair_singleaccuracy_d(gmx_simd_double_t x0, gmx_simd_double_t x1, gmx_simd_double_t *out0, gmx_simd_double_t *out1) { -#if (defined GMX_SIMD_HAVE_FLOAT) && (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH) && (GMX_SIMD_RSQRT_BITS < 22) +#if GMX_SIMD_HAVE_FLOAT && (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH) && (GMX_SIMD_RSQRT_BITS < 22) gmx_simd_float_t xf = gmx_simd_cvt_dd2f(x0, x1); gmx_simd_float_t luf = gmx_simd_rsqrt_f(xf); gmx_simd_double_t lu0, lu1; @@ -3353,7 +3355,7 @@ gmx_simd_sincos_singleaccuracy_d(gmx_simd_double_t x, gmx_simd_double_t *sinval, gmx_simd_double_t ssign, csign; gmx_simd_double_t x2, y, z, psin, pcos, sss, ccc; gmx_simd_dbool_t mask; -#if (defined GMX_SIMD_HAVE_FINT32) && (defined GMX_SIMD_HAVE_FINT32_ARITHMETICS) && (defined GMX_SIMD_HAVE_LOGICAL) +#if GMX_SIMD_HAVE_FINT32 && GMX_SIMD_HAVE_FINT32_ARITHMETICS && GMX_SIMD_HAVE_LOGICAL const gmx_simd_dint32_t ione = gmx_simd_set1_di(1); const gmx_simd_dint32_t itwo = gmx_simd_set1_di(2); gmx_simd_dint32_t iy; @@ -3399,7 +3401,7 @@ gmx_simd_sincos_singleaccuracy_d(gmx_simd_double_t x, gmx_simd_double_t *sinval, * two GMX_SIMD_HAVE_LOGICAL sections in this routine must either both be * active or inactive - you will get errors if only one is used. */ -# ifdef GMX_SIMD_HAVE_LOGICAL +# if GMX_SIMD_HAVE_LOGICAL ssign = gmx_simd_and_d(ssign, gmx_simd_set1_d(-0.0)); csign = gmx_simd_andnot_d(q, gmx_simd_set1_d(-0.0)); ssign = gmx_simd_xor_d(ssign, csign); @@ -3433,7 +3435,7 @@ gmx_simd_sincos_singleaccuracy_d(gmx_simd_double_t x, gmx_simd_double_t *sinval, sss = gmx_simd_blendv_d(pcos, psin, mask); ccc = gmx_simd_blendv_d(psin, pcos, mask); /* See comment for GMX_SIMD_HAVE_LOGICAL section above. */ -#ifdef GMX_SIMD_HAVE_LOGICAL +#if GMX_SIMD_HAVE_LOGICAL *sinval = gmx_simd_xor_d(sss, ssign); *cosval = gmx_simd_xor_d(ccc, csign); #else @@ -3505,7 +3507,7 @@ gmx_simd_tan_singleaccuracy_d(gmx_simd_double_t x) gmx_simd_double_t x2, p, y, z; gmx_simd_dbool_t mask; -#if (defined GMX_SIMD_HAVE_FINT32) && (defined GMX_SIMD_HAVE_FINT32_ARITHMETICS) && (defined GMX_SIMD_HAVE_LOGICAL) +#if GMX_SIMD_HAVE_FINT32 && GMX_SIMD_HAVE_FINT32_ARITHMETICS && GMX_SIMD_HAVE_LOGICAL gmx_simd_dint32_t iy; gmx_simd_dint32_t ione = gmx_simd_set1_di(1); @@ -3939,7 +3941,7 @@ gmx_simd_pmecorrV_singleaccuracy_d(gmx_simd_double_t z2) */ -#ifdef GMX_SIMD4_HAVE_FLOAT +#if GMX_SIMD4_HAVE_FLOAT /************************************************************************* * SINGLE PRECISION SIMD4 MATH FUNCTIONS - JUST A SMALL SUBSET SUPPORTED * @@ -3963,7 +3965,7 @@ gmx_simd4_sum4_f(gmx_simd4_float_t a, gmx_simd4_float_t b, static gmx_inline gmx_simd4_float_t gmx_simdcall gmx_simd4_rsqrt_iter_f(gmx_simd4_float_t lu, gmx_simd4_float_t x) { -# ifdef GMX_SIMD_HAVE_FMA +# if GMX_SIMD_HAVE_FMA return gmx_simd4_fmadd_f(gmx_simd4_fnmadd_f(x, gmx_simd4_mul_f(lu, lu), gmx_simd4_set1_f(1.0f)), gmx_simd4_mul_f(lu, gmx_simd4_set1_f(0.5f)), lu); # else return gmx_simd4_mul_f(gmx_simd4_set1_f(0.5f), gmx_simd4_mul_f(gmx_simd4_sub_f(gmx_simd4_set1_f(3.0f), gmx_simd4_mul_f(gmx_simd4_mul_f(lu, lu), x)), lu)); @@ -3994,7 +3996,7 @@ gmx_simd4_invsqrt_f(gmx_simd4_float_t x) -#ifdef GMX_SIMD4_HAVE_DOUBLE +#if GMX_SIMD4_HAVE_DOUBLE /************************************************************************* * DOUBLE PRECISION SIMD4 MATH FUNCTIONS - JUST A SMALL SUBSET SUPPORTED * *************************************************************************/ @@ -4018,7 +4020,7 @@ gmx_simd4_sum4_d(gmx_simd4_double_t a, gmx_simd4_double_t b, static gmx_inline gmx_simd4_double_t gmx_simdcall gmx_simd4_rsqrt_iter_d(gmx_simd4_double_t lu, gmx_simd4_double_t x) { -#ifdef GMX_SIMD_HAVE_FMA +#if GMX_SIMD_HAVE_FMA return gmx_simd4_fmadd_d(gmx_simd4_fnmadd_d(x, gmx_simd4_mul_d(lu, lu), gmx_simd4_set1_d(1.0)), gmx_simd4_mul_d(lu, gmx_simd4_set1_d(0.5)), lu); #else return gmx_simd4_mul_d(gmx_simd4_set1_d(0.5), gmx_simd4_mul_d(gmx_simd4_sub_d(gmx_simd4_set1_d(3.0), gmx_simd4_mul_d(gmx_simd4_mul_d(lu, lu), x)), lu)); @@ -4498,4 +4500,6 @@ gmx_simd4_invsqrt_singleaccuracy_d(gmx_simd4_double_t x) /*! \} */ /*! \endcond */ +#endif /* GMX_SIMD */ + #endif /* GMX_SIMD_SIMD_MATH_H_ */ diff --git a/src/gromacs/simd/tests/bootstrap_loadstore.cpp b/src/gromacs/simd/tests/bootstrap_loadstore.cpp index 824a831e18..671f95f93f 100644 --- a/src/gromacs/simd/tests/bootstrap_loadstore.cpp +++ b/src/gromacs/simd/tests/bootstrap_loadstore.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014, by the GROMACS development team, led by + * Copyright (c) 2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -57,6 +57,8 @@ #include "gromacs/simd/simd.h" #include "gromacs/utility/real.h" +#if GMX_SIMD + namespace { @@ -66,14 +68,14 @@ namespace TEST(SimdBootstrapTest, gmxSimdAlign) { -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL real rdata[GMX_SIMD_REAL_WIDTH*2]; for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++) { EXPECT_EQ(((size_t)gmx_simd_align_r(&rdata[i]) & (GMX_SIMD_REAL_WIDTH*sizeof(real)-1)), (size_t)0); } #endif -#ifdef GMX_SIMD_HAVE_INT32 +#if GMX_SIMD_HAVE_INT32 int idata[GMX_SIMD_INT32_WIDTH*2]; for (int i = 0; i < GMX_SIMD_INT32_WIDTH; i++) { @@ -131,7 +133,7 @@ simdLoadStoreTester(TSimd simdLoadFn(T* mem), void simdStoreFn(T* mem, TSimd), } } -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL //! Wrapper for SIMD macro to load aligned floating-point data. gmx_simd_real_t wrapperSimdLoadR(real *m) { @@ -148,7 +150,7 @@ TEST(SimdBootstrapTest, gmxSimdLoadStoreR) simdLoadStoreTester(wrapperSimdLoadR, wrapperSimdStoreR, gmx_simd_align_r, 0, 0, GMX_SIMD_REAL_WIDTH); } -# ifdef GMX_SIMD_HAVE_LOADU +# if GMX_SIMD_HAVE_LOADU //! Wrapper for SIMD macro to load unaligned floating-point data. gmx_simd_real_t WrapperSimdLoadUR(real *m) { @@ -164,7 +166,7 @@ TEST(SimdBootstrapTest, gmxSimdLoadUR) } # endif -# ifdef GMX_SIMD_HAVE_STOREU +# if GMX_SIMD_HAVE_STOREU //! Wrapper for SIMD macro to store to unaligned floating-point data. void WrapperSimdStoreUR(real *m, gmx_simd_real_t s) { @@ -181,7 +183,7 @@ TEST(SimdBootstrapTest, gmxSimdStoreUR) # endif #endif -#ifdef GMX_SIMD_HAVE_INT32 +#if GMX_SIMD_HAVE_INT32 // Tests for gmx_simd_int32_t load & store operations //! Wrapper for SIMD macro to load aligned integer data. @@ -200,7 +202,7 @@ TEST(SimdBootstrapTest, gmxSimdLoadStoreI) simdLoadStoreTester(wrapperSimdLoadI, wrapperSimdStoreI, gmx_simd_align_i, 0, 0, GMX_SIMD_INT32_WIDTH); } -# ifdef GMX_SIMD_HAVE_LOADU +# if GMX_SIMD_HAVE_LOADU //! Wrapper for SIMD macro to load unaligned integer data. gmx_simd_int32_t wrapperSimdLoadUI(int *m) { @@ -216,7 +218,7 @@ TEST(SimdBootstrapTest, gmxSimdLoadUI) } # endif -# ifdef GMX_SIMD_HAVE_STOREU +# if GMX_SIMD_HAVE_STOREU //! Wrapper for SIMD macro to store to unaligned integer data. void wrapperSimdStoreUI(int *m, gmx_simd_int32_t s) { @@ -233,7 +235,7 @@ TEST(SimdBootstrapTest, gmxSimdStoreUI) # endif #endif -#ifdef GMX_SIMD4_HAVE_REAL +#if GMX_SIMD4_HAVE_REAL /* Tests for gmx_simd4_real_t load & store operations. Define wrapper functions * for the SIMD instructions that are typically implemented as macros. */ @@ -303,7 +305,7 @@ TEST(SimdBootstrapTest, gmxSimd4LoadStoreR) simd4LoadStoreTester(wrapperSimd4LoadR, wrapperSimd4StoreR, gmx_simd4_align_r, 0, 0); } -# ifdef GMX_SIMD_HAVE_LOADU +# if GMX_SIMD_HAVE_LOADU //! Wrapper for SIMD4 macro to load unaligned floating-point data. gmx_simd4_real_t WrapperSimd4LoadUR(real *m) { @@ -319,7 +321,7 @@ TEST(SimdBootstrapTest, gmxSimd4LoadUR) } # endif -# ifdef GMX_SIMD_HAVE_STOREU +# if GMX_SIMD_HAVE_STOREU //! Wrapper for SIMD4 macro to store to unaligned floating-point data. void WrapperSimd4StoreUR(real *m, gmx_simd4_real_t s) { @@ -339,4 +341,6 @@ TEST(SimdBootstrapTest, gmxSimd4StoreUR) /*! \} */ /*! \endcond */ -} // namespace +} // namespace + +#endif // GMX_SIMD diff --git a/src/gromacs/simd/tests/simd.cpp b/src/gromacs/simd/tests/simd.cpp index 88ee93a87b..b699fa7c34 100644 --- a/src/gromacs/simd/tests/simd.cpp +++ b/src/gromacs/simd/tests/simd.cpp @@ -38,6 +38,8 @@ #include "gromacs/simd/simd.h" +#if GMX_SIMD + namespace gmx { namespace test @@ -60,7 +62,7 @@ namespace test * occasionally have many digits that need to be exactly right, and keeping * them in a single place makes sure they are consistent. */ -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL const gmx_simd_real_t rSimd_1_2_3 = setSimdRealFrom3R(1, 2, 3); const gmx_simd_real_t rSimd_4_5_6 = setSimdRealFrom3R(4, 5, 6); const gmx_simd_real_t rSimd_7_8_9 = setSimdRealFrom3R(7, 8, 9); @@ -75,14 +77,14 @@ const gmx_simd_real_t rSimd_m3p75 = setSimdRealFrom1R(-3.75); const gmx_simd_real_t rSimd_Exp = setSimdRealFrom3R( 1.4055235171027452623914516e+18, 5.3057102734253445623914516e-13, -2.1057102745623934534514516e+16); -# if (defined GMX_SIMD_HAVE_DOUBLE) && (defined GMX_DOUBLE) +# if GMX_SIMD_HAVE_DOUBLE && defined GMX_DOUBLE // Make sure we also test exponents outside single precision when we use double const gmx_simd_real_t rSimd_ExpDouble = setSimdRealFrom3R( 6.287393598732017379054414e+176, 8.794495252903116023030553e-140, -3.637060701570496477655022e+202); # endif #endif // GMX_SIMD_HAVE_REAL -#ifdef GMX_SIMD_HAVE_INT32 +#if GMX_SIMD_HAVE_INT32 const gmx_simd_int32_t iSimd_1_2_3 = setSimdIntFrom3I(1, 2, 3); const gmx_simd_int32_t iSimd_4_5_6 = setSimdIntFrom3I(4, 5, 6); const gmx_simd_int32_t iSimd_7_8_9 = setSimdIntFrom3I(7, 8, 9); @@ -94,7 +96,7 @@ const gmx_simd_int32_t iSimd_0xF0F0F0F0 = setSimdIntFrom1I(0xF0F0F0F0); const gmx_simd_int32_t iSimd_0xCCCCCCCC = setSimdIntFrom1I(0xCCCCCCCC); #endif // GMX_SIMD_HAVE_INT32 -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL ::std::vector simdReal2Vector(const gmx_simd_real_t simd) { @@ -157,7 +159,7 @@ SimdTest::compareSimdRealEq(const char * refExpr, const char * tstExpr, #endif // GMX_SIMD_HAVE_REAL -#ifdef GMX_SIMD_HAVE_INT32 +#if GMX_SIMD_HAVE_INT32 std::vector simdInt2Vector(const gmx_simd_int32_t simd) { @@ -218,3 +220,5 @@ SimdTest::compareSimdInt32(const char * refExpr, const char * tstExpr, } // namespace } // namespace + +#endif // GMX_SIMD diff --git a/src/gromacs/simd/tests/simd.h b/src/gromacs/simd/tests/simd.h index 26cdf9e74b..4f92a61edf 100644 --- a/src/gromacs/simd/tests/simd.h +++ b/src/gromacs/simd/tests/simd.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014, by the GROMACS development team, led by + * Copyright (c) 2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -82,6 +82,8 @@ #include "base.h" +#if GMX_SIMD + namespace gmx { namespace test @@ -104,7 +106,7 @@ namespace test * occasionally have many digits that need to be exactly right, and keeping * them in a single place makes sure they are consistent. */ -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL extern const gmx_simd_real_t rSimd_1_2_3; //!< Generic (different) fp values. extern const gmx_simd_real_t rSimd_4_5_6; //!< Generic (different) fp values. extern const gmx_simd_real_t rSimd_7_8_9; //!< Generic (different) fp values. @@ -118,7 +120,7 @@ extern const gmx_simd_real_t rSimd_m2p25; //!< Negative value that rounds up extern const gmx_simd_real_t rSimd_m3p75; //!< Negative value that rounds down. //! Three large floating-point values whose exponents are >32. extern const gmx_simd_real_t rSimd_Exp; -# if (defined GMX_SIMD_HAVE_DOUBLE) && (defined GMX_DOUBLE) +# if GMX_SIMD_HAVE_DOUBLE && defined GMX_DOUBLE // Make sure we also test exponents outside single precision when we use double extern const gmx_simd_real_t rSimd_ExpDouble; # endif @@ -130,7 +132,7 @@ extern const gmx_simd_real_t rSimd_Bits4; //!< Pattern 0C repeated to fill extern const gmx_simd_real_t rSimd_Bits5; //!< Pattern FC repeated to fill single/double. extern const gmx_simd_real_t rSimd_Bits6; //!< Pattern 3C repeated to fill single/double. #endif // GMX_SIMD_HAVE_REAL -#ifdef GMX_SIMD_HAVE_INT32 +#if GMX_SIMD_HAVE_INT32 extern const gmx_simd_int32_t iSimd_1_2_3; //!< Three generic ints. extern const gmx_simd_int32_t iSimd_4_5_6; //!< Three generic ints. extern const gmx_simd_int32_t iSimd_7_8_9; //!< Three generic ints. @@ -154,7 +156,7 @@ extern const gmx_simd_int32_t iSimd_0xCCCCCCCC; //!< Bitpattern to test integer class SimdTest : public SimdBaseTest { public: -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /*! \brief Compare two real SIMD variables for approximate equality. * * This is an internal implementation routine. YOu should always use @@ -189,7 +191,7 @@ class SimdTest : public SimdBaseTest #endif -#ifdef GMX_SIMD_HAVE_INT32 +#if GMX_SIMD_HAVE_INT32 /*! \brief Compare two 32-bit integer SIMD variables. * * This is an internal implementation routine. YOu should always use @@ -208,7 +210,7 @@ class SimdTest : public SimdBaseTest #endif }; -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /*! \brief Convert SIMD real to std::vector. * * The returned vector will have the same length as the SIMD width. @@ -246,7 +248,7 @@ gmx_simd_real_t setSimdRealFrom1R(real value); #endif // GMX_SIMD_HAVE_REAL -#ifdef GMX_SIMD_HAVE_INT32 +#if GMX_SIMD_HAVE_INT32 /*! \brief Convert SIMD integer to std::vector. * * The returned vector will have the same length as the SIMD width. @@ -291,4 +293,6 @@ gmx_simd_int32_t setSimdIntFrom1I(int value); } // namespace } // namespace +#endif // GMX_SIMD + #endif // GMX_SIMD_TESTS_SIMD_H diff --git a/src/gromacs/simd/tests/simd4.cpp b/src/gromacs/simd/tests/simd4.cpp index bb124336eb..090ff143eb 100644 --- a/src/gromacs/simd/tests/simd4.cpp +++ b/src/gromacs/simd/tests/simd4.cpp @@ -38,6 +38,8 @@ #include "gromacs/simd/simd.h" +#if GMX_SIMD + namespace gmx { namespace test @@ -47,7 +49,7 @@ namespace test /*! \addtogroup module_simd */ /*! \{ */ -#ifdef GMX_SIMD4_HAVE_REAL +#if GMX_SIMD4_HAVE_REAL const gmx_simd4_real_t rSimd4_1_2_3 = setSimd4RealFrom3R(1, 2, 3); const gmx_simd4_real_t rSimd4_4_5_6 = setSimd4RealFrom3R(4, 5, 6); @@ -63,7 +65,7 @@ const gmx_simd4_real_t rSimd4_m3p75 = setSimd4RealFrom1R(-3.75); const gmx_simd4_real_t rSimd4_Exp = setSimd4RealFrom3R( 1.4055235171027452623914516e+18, 5.3057102734253445623914516e-13, -2.1057102745623934534514516e+16); -# if (defined GMX_SIMD_HAVE_DOUBLE) && (defined GMX_DOUBLE) +# if GMX_SIMD_HAVE_DOUBLE && defined GMX_DOUBLE // Make sure we also test exponents outside single precision when we use double const gmx_simd4_real_t rSimd_ExpDouble = setSimd4RealFrom3R( 6.287393598732017379054414e+176, 8.794495252903116023030553e-140, @@ -137,3 +139,5 @@ Simd4Test::compareSimd4RealEq(const char * refExpr, const char * tstExpr, } // namespace } // namespace + +#endif // GMX_SIMD diff --git a/src/gromacs/simd/tests/simd4.h b/src/gromacs/simd/tests/simd4.h index 83cb794a74..b0a5e83859 100644 --- a/src/gromacs/simd/tests/simd4.h +++ b/src/gromacs/simd/tests/simd4.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014, by the GROMACS development team, led by + * Copyright (c) 2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -55,6 +55,8 @@ #include "base.h" +#if GMX_SIMD + namespace gmx { namespace test @@ -64,7 +66,7 @@ namespace test /*! \addtogroup module_simd */ /*! \{ */ -#ifdef GMX_SIMD4_HAVE_REAL +#if GMX_SIMD4_HAVE_REAL extern const gmx_simd4_real_t rSimd4_1_2_3; //!< Generic (different) fp values. extern const gmx_simd4_real_t rSimd4_4_5_6; //!< Generic (different) fp values. extern const gmx_simd4_real_t rSimd4_7_8_9; //!< Generic (different) fp values. @@ -78,7 +80,7 @@ extern const gmx_simd4_real_t rSimd4_m2p25; //!< Negative value that rounds extern const gmx_simd4_real_t rSimd4_m3p75; //!< Negative value that rounds down. //! Three large floating-point values whose exponents are >32. extern const gmx_simd4_real_t rSimd4_Exp; -# if (defined GMX_SIMD_HAVE_DOUBLE) && (defined GMX_DOUBLE) +# if GMX_SIMD_HAVE_DOUBLE && defined GMX_DOUBLE // Make sure we also test exponents outside single precision when we use double extern const gmx_simd4_real_t rSimd4_ExpDouble; # endif @@ -176,4 +178,6 @@ gmx_simd4_real_t setSimd4RealFrom1R(real value); } // namespace } // namespace +#endif // GMX_SIMD + #endif // GMX_SIMD_TESTS_SIMD4_H diff --git a/src/gromacs/simd/tests/simd4_floatingpoint.cpp b/src/gromacs/simd/tests/simd4_floatingpoint.cpp index 03b914d2e4..ca8c78dcb2 100644 --- a/src/gromacs/simd/tests/simd4_floatingpoint.cpp +++ b/src/gromacs/simd/tests/simd4_floatingpoint.cpp @@ -41,6 +41,8 @@ #include "simd4.h" +#if GMX_SIMD + namespace gmx { namespace test @@ -52,7 +54,7 @@ namespace /*! \addtogroup module_simd */ /*! \{ */ -#ifdef GMX_SIMD4_HAVE_REAL +#if GMX_SIMD4_HAVE_REAL /*! \brief Test fixture for SIMD4 floating-point operations (identical to the SIMD4 \ref Simd4Test) */ typedef Simd4Test Simd4FloatingpointTest; @@ -120,7 +122,7 @@ TEST_F(Simd4FloatingpointTest, gmxSimd4FnegR) GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_1_2_3, gmx_simd4_fneg_r(rSimd4_m1_m2_m3)); // fneg(-x)=x } -#ifdef GMX_SIMD4_HAVE_LOGICAL +#if GMX_SIMD_HAVE_LOGICAL /* 1.3333282470703125 has mantissa 0101010101010101 (followed by zeros) * 1.79998779296875 has mantissa 1100110011001100 (followed by zeros) * 1.26666259765625 has mantissa 0100010001000100 (followed by zeros) @@ -311,3 +313,5 @@ TEST_F(Simd4FloatingpointTest, gmxSimd4Dotproduct3R) } // namespace } // namespace } // namespace + +#endif // GMX_SIMD diff --git a/src/gromacs/simd/tests/simd4_math.cpp b/src/gromacs/simd/tests/simd4_math.cpp index 9a37a73654..ed2fb3a1ae 100644 --- a/src/gromacs/simd/tests/simd4_math.cpp +++ b/src/gromacs/simd/tests/simd4_math.cpp @@ -45,6 +45,8 @@ #include "simd4.h" +#if GMX_SIMD + namespace gmx { namespace test @@ -54,7 +56,7 @@ namespace test /*! \addtogroup module_simd */ /*! \{ */ -#ifdef GMX_SIMD4_HAVE_REAL +#if GMX_SIMD4_HAVE_REAL class Simd4MathTest : public Simd4Test { @@ -221,3 +223,5 @@ TEST_F(Simd4MathTest, gmxSimd4InvsqrtSingleaccuracyR) } // namespace } // namespace + +#endif // GMX_SIMD diff --git a/src/gromacs/simd/tests/simd4_vector_operations.cpp b/src/gromacs/simd/tests/simd4_vector_operations.cpp index 02a29afd3d..79b7c33709 100644 --- a/src/gromacs/simd/tests/simd4_vector_operations.cpp +++ b/src/gromacs/simd/tests/simd4_vector_operations.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014, by the GROMACS development team, led by + * Copyright (c) 2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -41,6 +41,8 @@ #include "simd4.h" +#if GMX_SIMD + namespace gmx { namespace test @@ -52,7 +54,7 @@ namespace /*! \addtogroup module_simd */ /*! \{ */ -#ifdef GMX_SIMD4_HAVE_REAL +#if GMX_SIMD4_HAVE_REAL /*! \brief Test fixture for SIMD4 vector operations (identical to the SIMD4 \ref Simd4Test) */ typedef Simd4Test Simd4VectorOperationsTest; @@ -76,3 +78,5 @@ TEST_F(Simd4VectorOperationsTest, gmxSimd4CalcRsqR) } // namespace } // namespace } // namespace + +#endif // GMX_SIMD diff --git a/src/gromacs/simd/tests/simd_floatingpoint.cpp b/src/gromacs/simd/tests/simd_floatingpoint.cpp index 8d028a7090..86694fe46e 100644 --- a/src/gromacs/simd/tests/simd_floatingpoint.cpp +++ b/src/gromacs/simd/tests/simd_floatingpoint.cpp @@ -41,6 +41,8 @@ #include "simd.h" +#if GMX_SIMD + namespace gmx { namespace test @@ -52,7 +54,7 @@ namespace /*! \addtogroup module_simd */ /*! \{ */ -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /*! \brief Test fixture for floating-point tests (identical to the generic \ref SimdTest) */ typedef SimdTest SimdFloatingpointTest; @@ -127,7 +129,7 @@ TEST_F(SimdFloatingpointTest, gmxSimdFnegR) GMX_EXPECT_SIMD_REAL_EQ(rSimd_1_2_3, gmx_simd_fneg_r(rSimd_m1_m2_m3)); // fneg(-x)=x } -#ifdef GMX_SIMD_HAVE_LOGICAL +#if GMX_SIMD_HAVE_LOGICAL /* 1.3333282470703125 has mantissa 0101010101010101 (followed by zeros) * 1.79998779296875 has mantissa 1100110011001100 (followed by zeros) * 1.26666259765625 has mantissa 0100010001000100 (followed by zeros) @@ -218,7 +220,7 @@ TEST_F(SimdFloatingpointTest, gmxSimdFractionR) TEST_F(SimdFloatingpointTest, gmxSimdGetExponentR) { GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(60.0, -41.0, 54.0), gmx_simd_get_exponent_r(rSimd_Exp)); -#if (defined GMX_SIMD_HAVE_DOUBLE) && (defined GMX_DOUBLE) +#if GMX_SIMD_HAVE_DOUBLE && defined GMX_DOUBLE GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(587.0, -462.0, 672.0), gmx_simd_get_exponent_r(rSimd_ExpDouble)); #endif } @@ -228,7 +230,7 @@ TEST_F(SimdFloatingpointTest, gmxSimdGetMantissaR) GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(1.219097320577810839026256, 1.166738027848349235071623, 1.168904015004464724825084), gmx_simd_get_mantissa_r(rSimd_Exp)); -#if (defined GMX_SIMD_HAVE_DOUBLE) && (defined GMX_DOUBLE) +#if GMX_SIMD_HAVE_DOUBLE && defined GMX_DOUBLE GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(1.241261238952345623563251, 1.047294723759123852359232, 1.856066204750275957395734), gmx_simd_get_mantissa_r(rSimd_ExpDouble)); @@ -242,7 +244,7 @@ TEST_F(SimdFloatingpointTest, gmxSimdSetExponentR) GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(pow(2.0, 60.0), pow(2.0, -41.0), pow(2.0, 54.0)), gmx_simd_set_exponent_r(setSimdRealFrom3R(60.0, -41.0, 54.0))); -#if (defined GMX_SIMD_HAVE_DOUBLE) && (defined GMX_DOUBLE) +#if GMX_SIMD_HAVE_DOUBLE && defined GMX_DOUBLE GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(pow(2.0, 587.0), pow(2.0, -462.0), pow(2.0, 672.0)), gmx_simd_set_exponent_r(setSimdRealFrom3R(587.0, -462.0, 672.0))); #endif @@ -374,3 +376,5 @@ TEST_F(SimdFloatingpointTest, gmxSimdReduceR) } // namespace } // namespace } // namespace + +#endif // GMX_SIMD diff --git a/src/gromacs/simd/tests/simd_integer.cpp b/src/gromacs/simd/tests/simd_integer.cpp index 4871227a35..0299236d5b 100644 --- a/src/gromacs/simd/tests/simd_integer.cpp +++ b/src/gromacs/simd/tests/simd_integer.cpp @@ -48,6 +48,8 @@ * that all have gmx_simd_ prefixes. */ +#if GMX_SIMD + namespace gmx { namespace test @@ -59,7 +61,7 @@ namespace /*! \addtogroup module_simd */ /*! \{ */ -#ifdef GMX_SIMD_HAVE_INT32 +#if GMX_SIMD_HAVE_INT32 /*! \brief Test fixture for integer tests (identical to the generic \ref SimdTest) */ typedef SimdTest SimdIntegerTest; @@ -74,7 +76,7 @@ TEST_F(SimdIntegerTest, gmxSimdSet1I) GMX_EXPECT_SIMD_INT_EQ(setSimdIntFrom1I(1), gmx_simd_set1_i(1)); } -#ifdef GMX_SIMD_HAVE_FINT32_ARITHMETICS +#if GMX_SIMD_HAVE_FINT32_ARITHMETICS TEST_F(SimdIntegerTest, gmxSimdAddI) { GMX_EXPECT_SIMD_INT_EQ(iSimd_5_7_9, gmx_simd_add_i(iSimd_1_2_3, iSimd_4_5_6) ); // short add @@ -94,7 +96,7 @@ TEST_F(SimdIntegerTest, gmxSimdMulI) } #endif -#ifdef GMX_SIMD_HAVE_FINT32_LOGICAL +#if GMX_SIMD_HAVE_FINT32_LOGICAL TEST_F(SimdIntegerTest, gmxSimdSlliI) { GMX_EXPECT_SIMD_INT_EQ(setSimdIntFrom1I(4194304), gmx_simd_slli_i(gmx_simd_set1_i(2), 21)); // 2 << 21 = 4194304 @@ -126,7 +128,7 @@ TEST_F(SimdIntegerTest, gmxSimdXorI) } #endif -#ifdef GMX_SIMD_HAVE_INT32_EXTRACT +#if GMX_SIMD_HAVE_INT32_EXTRACT TEST_F(SimdIntegerTest, gmxSimdExtractI) { int idata[GMX_SIMD_INT32_WIDTH*2]; @@ -168,7 +170,7 @@ TEST_F(SimdIntegerTest, gmxSimdExtractI) } #endif -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL TEST_F(SimdIntegerTest, gmxSimdCvtR2I) { GMX_EXPECT_SIMD_INT_EQ(setSimdIntFrom1I(4), gmx_simd_cvt_r2i(rSimd_3p75)); @@ -188,7 +190,7 @@ TEST_F(SimdIntegerTest, gmxSimdCvtI2R) } #endif -#ifdef GMX_SIMD_HAVE_FINT32_ARITHMETICS +#if GMX_SIMD_HAVE_FINT32_ARITHMETICS TEST_F(SimdIntegerTest, gmxSimdBoolCmpEqAndBlendZeroI) { gmx_simd_ibool_t eq = gmx_simd_cmpeq_i(iSimd_5_7_9, iSimd_7_8_9); @@ -242,7 +244,7 @@ TEST_F(SimdIntegerTest, gmxSimdBlendvI) } #endif -#if (defined GMX_SIMD_HAVE_REAL) && (defined GMX_SIMD_HAVE_FINT32_ARITHMETICS) +#if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_FINT32_ARITHMETICS TEST_F(SimdIntegerTest, gmxSimdCvtB2IB) { gmx_simd_bool_t eq = gmx_simd_cmpeq_r(rSimd_5_7_9, setSimdRealFrom3R(5, 0, 0)); // eq should be T,F,F @@ -267,3 +269,5 @@ TEST_F(SimdIntegerTest, gmxSimdCvtIB2B) } // namespace } // namespace } // namespace + +#endif // GMX_SIMD diff --git a/src/gromacs/simd/tests/simd_math.cpp b/src/gromacs/simd/tests/simd_math.cpp index e8f40936e0..7bb7549046 100644 --- a/src/gromacs/simd/tests/simd_math.cpp +++ b/src/gromacs/simd/tests/simd_math.cpp @@ -47,6 +47,8 @@ #include "simd.h" +#if GMX_SIMD + namespace gmx { namespace test @@ -56,7 +58,7 @@ namespace test /*! \addtogroup module_simd */ /*! \{ */ -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL class SimdMathTest : public SimdTest { @@ -764,3 +766,5 @@ TEST_F(SimdMathTest, gmxSimdPmecorrPotentialSingleaccuracyR) } // namespace } // namespace + +#endif // GMX_SIMD diff --git a/src/gromacs/simd/tests/simd_vector_operations.cpp b/src/gromacs/simd/tests/simd_vector_operations.cpp index 9393c24c98..628900a77e 100644 --- a/src/gromacs/simd/tests/simd_vector_operations.cpp +++ b/src/gromacs/simd/tests/simd_vector_operations.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014, by the GROMACS development team, led by + * Copyright (c) 2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -41,6 +41,8 @@ #include "simd.h" +#if GMX_SIMD + namespace gmx { namespace test @@ -52,7 +54,7 @@ namespace /*! \addtogroup module_simd */ /*! \{ */ -#ifdef GMX_SIMD_HAVE_REAL +#if GMX_SIMD_HAVE_REAL /*! \internal \brief Test fixture for vector operations tests (identical to the generic \ref SimdTest) */ typedef SimdTest SimdVectorOperationsTest; @@ -121,3 +123,5 @@ TEST_F(SimdVectorOperationsTest, gmxSimdCprodR) } // namespace } // namespace } // namespace + +#endif // GMX_SIMD diff --git a/src/gromacs/simd/vector_operations.h b/src/gromacs/simd/vector_operations.h index 2d8e53ca60..b2a3644a03 100644 --- a/src/gromacs/simd/vector_operations.h +++ b/src/gromacs/simd/vector_operations.h @@ -50,6 +50,8 @@ #include "gromacs/simd/simd.h" +#if GMX_SIMD + /*! \cond libapi */ /*! \addtogroup module_simd */ /*! \{ */ @@ -59,9 +61,9 @@ * check-source to know that this file depends on simd.h (though * symbols like GMX_SIMD_HAVE_FLOAT are actually defined in its * implementation headers). */ -#if (defined GMX_SIMD_HAVE_REAL) || (defined DOXYGEN) +#if GMX_SIMD_HAVE_REAL || defined DOXYGEN -#if (defined GMX_SIMD_HAVE_FLOAT) || (defined DOXYGEN) +#if GMX_SIMD_HAVE_FLOAT || defined DOXYGEN /*! \brief SIMD float inner product of multiple float vectors. * * For normal usage you should always call the real-precision \ref gmx_simd_iprod_r. @@ -157,7 +159,7 @@ gmx_simd_cprod_f(gmx_simd_float_t ax, gmx_simd_float_t ay, gmx_simd_float_t az, } #endif /* GMX_SIMD_HAVE_FLOAT */ -#if (defined GMX_SIMD_HAVE_DOUBLE) || (defined DOXYGEN) +#if GMX_SIMD_HAVE_DOUBLE || defined DOXYGEN /*! \brief SIMD double inner product of multiple double vectors. * * \copydetails gmx_simd_iprod_f @@ -218,7 +220,7 @@ gmx_simd_cprod_d(gmx_simd_double_t ax, gmx_simd_double_t ay, gmx_simd_double_t a #endif /* GMX_SIMD_HAVE_DOUBLE */ -#if (defined GMX_SIMD4_HAVE_FLOAT) || (defined DOXYGEN) +#if GMX_SIMD4_HAVE_FLOAT || defined DOXYGEN /*! \brief SIMD4 float inner product of four float vectors. * * \copydetails gmx_simd_norm2_f @@ -243,7 +245,7 @@ gmx_simd4_norm2_f(gmx_simd4_float_t ax, gmx_simd4_float_t ay, gmx_simd4_float_t #endif /* GMX_SIMD4_HAVE_FLOAT */ -#if (defined GMX_SIMD4_HAVE_DOUBLE) || (defined DOXYGEN) +#if GMX_SIMD4_HAVE_DOUBLE || defined DOXYGEN /*! \brief SIMD4 double norm squared of multiple vectors. * * \copydetails gmx_simd_norm2_f @@ -335,9 +337,11 @@ gmx_simd4_norm2_d(gmx_simd4_double_t ax, gmx_simd4_double_t ay, gmx_simd4_double #endif /* GMX_DOUBLE */ -#endif /* (defined GMX_SIMD_HAVE REAL) || (defined DOXYGEN) */ +#endif /* GMX_SIMD_HAVE REAL || defined DOXYGEN */ /*! \} */ /*! \endcond */ +#endif /* GMX_SIMD */ + #endif /* GMX_SIMD_VECTOR_OPERATIONS_H */ diff --git a/src/gromacs/utility/gmxomp.h b/src/gromacs/utility/gmxomp.h index 553eadc092..52e00d93ed 100644 --- a/src/gromacs/utility/gmxomp.h +++ b/src/gromacs/utility/gmxomp.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -58,9 +58,8 @@ * settings to decide when to use _mm_pause(). This should eventually be * changed into proper detection of the intrinsics uses, not SIMD. */ -#if (defined GMX_SIMD_X86_SSE2) || (defined GMX_SIMD_X86_SSE4_1) || \ - (defined GMX_SIMD_X86_AVX_128_FMA) || (defined GMX_SIMD_X86_AVX_256) || \ - (defined GMX_SIMD_X86_AVX2_256) +#if GMX_SIMD_X86_SSE2 || GMX_SIMD_X86_SSE4_1 || GMX_SIMD_X86_AVX_128_FMA || \ + GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256 # include #endif #else @@ -148,9 +147,8 @@ static gmx_inline void gmx_pause() * settings to decide when to use _mm_pause(). This should eventually be * changed into proper detection of the intrinsics uses, not SIMD. */ -#if ((defined GMX_SIMD_X86_SSE2) || (defined GMX_SIMD_X86_SSE4_1) || \ - (defined GMX_SIMD_X86_AVX_128_FMA) || (defined GMX_SIMD_X86_AVX_256) || \ - (defined GMX_SIMD_X86_AVX2_256)) && !defined(__MINGW32__) +#if (GMX_SIMD_X86_SSE2 || GMX_SIMD_X86_SSE4_1 || GMX_SIMD_X86_AVX_128_FMA || \ + GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && !defined(__MINGW32__) /* Replace with tbb::internal::atomic_backoff when/if we use TBB */ _mm_pause(); #elif defined __MIC__ -- 2.11.4.GIT