src/gromacs/simd/simd.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \libinternal
  37  * \defgroup module_simd SIMD intrinsics interface (simd)
  38  * \ingroup group_utilitymodules
  39  *
  40  * \brief Provides an architecture-independent way of doing SIMD coding.
  41  *
  42  * Overview of the SIMD implementation is provided in \ref page_simd.
  43  * The details are documented in gromacs/simd/simd.h and the reference
  44  * implementation impl_reference.h.
  45  *
  46  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  47  */
  48
  49 #ifndef GMX_SIMD_SIMD_H
  50 #define GMX_SIMD_SIMD_H
  51
  52 /*! \libinternal \file
  53  *
  54  * \brief Definitions, capabilities, and wrappers for SIMD module.
  55  *
  56  * The macros in this file are intended to be used for writing
  57  * architecture-independent SIMD intrinsics code.
  58  * To support a new architecture, adding a new sub-include with macros here
  59  * should be (nearly) all that is needed.
  60  *
  61  * The defines in this top-level file will set default Gromacs real precision
  62  * operations to either single or double precision based on whether
  63  * GMX_DOUBLE is 1. The actual implementation - including e.g.
  64  * conversion operations specifically between single and double - is documented
  65  * in impl_reference.h.
  66  *
  67  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  68  *
  69  * \inlibraryapi
  70  * \ingroup module_simd
  71  */
  72
  73 #include "config.h"
  74
  75 #include <cstddef>
  76 #include <cstdint>
  77
  78 #include <array>
  79
  80 #include "gromacs/utility/classhelpers.h"
  81 #include "gromacs/utility/real.h"
  82
  83 //! \cond libapi
  84
  85
  86 /*! \addtogroup module_simd
  87  * \{
  88  */
  89
  90 namespace gmx
  91 {
  92 /*! \libinternal \brief Tag type to select to load SimdFloat with simdLoad(U) */
  93 struct SimdFloatTag {};
  94 /*! \libinternal \brief Tag type to select to load SimdDouble with simdLoad(U) */
  95 struct SimdDoubleTag {};
  96 /*! \libinternal \brief Tag type to select to load SimdFInt32 with simdLoad(U) */
  97 struct SimdFInt32Tag {};
  98 /*! \libinternal \brief Tag type to select to load SimdDInt32 with simdLoad(U) */
  99 struct SimdDInt32Tag {};
 100 }
 101
 102 /*! \name SIMD predefined macros to describe high-level capabilities
 103  *
 104  *  These macros are used to describe the features available in default
 105  *  Gromacs real precision. They are set from the lower-level implementation
 106  *  files that have macros describing single and double precision individually,
 107  *  as well as the implementation details.
 108  *  \{
 109  */
 110
 111 #if GMX_SIMD_X86_SSE2
 112 #    include "impl_x86_sse2/impl_x86_sse2.h"
 113 #elif GMX_SIMD_X86_SSE4_1
 114 #    include "impl_x86_sse4_1/impl_x86_sse4_1.h"
 115 #elif GMX_SIMD_X86_AVX_128_FMA
 116 #    include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
 117 #elif GMX_SIMD_X86_AVX_256
 118 #    include "impl_x86_avx_256/impl_x86_avx_256.h"
 119 #elif GMX_SIMD_X86_AVX2_256
 120 #    include "impl_x86_avx2_256/impl_x86_avx2_256.h"
 121 #elif GMX_SIMD_X86_AVX2_128
 122 #    include "impl_x86_avx2_128/impl_x86_avx2_128.h"
 123 #elif GMX_SIMD_X86_MIC
 124 #    include "impl_x86_mic/impl_x86_mic.h"
 125 #elif GMX_SIMD_X86_AVX_512
 126 #    include "impl_x86_avx_512/impl_x86_avx_512.h"
 127 #elif GMX_SIMD_X86_AVX_512_KNL
 128 #    include "impl_x86_avx_512_knl/impl_x86_avx_512_knl.h"
 129 #elif GMX_SIMD_ARM_NEON
 130 #    include "impl_arm_neon/impl_arm_neon.h"
 131 #elif GMX_SIMD_ARM_NEON_ASIMD
 132 #    include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
 133 #elif GMX_SIMD_IBM_QPX
 134 #    include "impl_ibm_qpx/impl_ibm_qpx.h"
 135 #elif GMX_SIMD_IBM_VMX
 136 #    include "impl_ibm_vmx/impl_ibm_vmx.h"
 137 #elif GMX_SIMD_IBM_VSX
 138 #    include "impl_ibm_vsx/impl_ibm_vsx.h"
 139 #elif (GMX_SIMD_REFERENCE || defined DOXYGEN)
 140 #    include "impl_reference/impl_reference.h" // Includes doxygen documentation
 141 #else
 142 #    include "impl_none/impl_none.h"
 143 #endif
 144
 145 // The scalar SIMD-mimicking functions are always included so we can use
 146 // templated functions even without SIMD support.
 147 #include "gromacs/simd/scalar/scalar.h"
 148 #include "gromacs/simd/scalar/scalar_math.h"
 149 #include "gromacs/simd/scalar/scalar_util.h"
 150
 151
 152 #if GMX_DOUBLE
 153 #    define GMX_SIMD_HAVE_REAL                                     GMX_SIMD_HAVE_DOUBLE
 154 #    define GMX_SIMD_REAL_WIDTH                                    GMX_SIMD_DOUBLE_WIDTH
 155 #    define GMX_SIMD_HAVE_INT32_EXTRACT                            GMX_SIMD_HAVE_DINT32_EXTRACT
 156 #    define GMX_SIMD_HAVE_INT32_LOGICAL                            GMX_SIMD_HAVE_DINT32_LOGICAL
 157 #    define GMX_SIMD_HAVE_INT32_ARITHMETICS                        GMX_SIMD_HAVE_DINT32_ARITHMETICS
 158 #    define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL    GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE
 159 #    define GMX_SIMD_HAVE_HSIMD_UTIL_REAL                          GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE
 160 #    define GMX_SIMD4_HAVE_REAL                                    GMX_SIMD4_HAVE_DOUBLE
 161 #else // GMX_DOUBLE
 162
 163 /*! \brief 1 if SimdReal is available, otherwise 0.
 164  *
 165  *  \ref GMX_SIMD_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_HAVE_FLOAT.
 166  */
 167 #    define GMX_SIMD_HAVE_REAL               GMX_SIMD_HAVE_FLOAT
 168
 169 /*! \brief Width of SimdReal.
 170  *
 171  *  \ref GMX_SIMD_DOUBLE_WIDTH if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_FLOAT_WIDTH.
 172  */
 173 #    define GMX_SIMD_REAL_WIDTH              GMX_SIMD_FLOAT_WIDTH
 174
 175 /*! \brief 1 if support is available for extracting elements from SimdInt32, otherwise 0
 176  *
 177  *  \ref GMX_SIMD_HAVE_DINT32_EXTRACT if GMX_DOUBLE is 1, otherwise
 178  *  \ref GMX_SIMD_HAVE_FINT32_EXTRACT.
 179  */
 180 #    define GMX_SIMD_HAVE_INT32_EXTRACT      GMX_SIMD_HAVE_FINT32_EXTRACT
 181
 182 /*! \brief 1 if logical ops are supported on SimdInt32, otherwise 0.
 183  *
 184  *  \ref GMX_SIMD_HAVE_DINT32_LOGICAL if GMX_DOUBLE is 1, otherwise
 185  *  \ref GMX_SIMD_HAVE_FINT32_LOGICAL.
 186  */
 187 #    define GMX_SIMD_HAVE_INT32_LOGICAL      GMX_SIMD_HAVE_FINT32_LOGICAL
 188
 189 /*! \brief 1 if arithmetic ops are supported on SimdInt32, otherwise 0.
 190  *
 191  *  \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS if GMX_DOUBLE is 1, otherwise
 192  *  \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS.
 193  */
 194 #    define GMX_SIMD_HAVE_INT32_ARITHMETICS  GMX_SIMD_HAVE_FINT32_ARITHMETICS
 195
 196 /*! \brief 1 if gmx::simdGatherLoadUBySimdIntTranspose is present, otherwise 0
 197  *
 198  *  \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE if GMX_DOUBLE is 1, otherwise
 199  *  \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT.
 200  */
 201 #    define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL    GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT
 202
 203 /*! \brief 1 if real half-register load/store/reduce utils present, otherwise 0
 204  *
 205  *  \ref GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE if GMX_DOUBLE is 1, otherwise
 206  *  \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT.
 207  */
 208 #    define GMX_SIMD_HAVE_HSIMD_UTIL_REAL    GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT
 209
 210 /*! \brief 1 if Simd4Real is available, otherwise 0.
 211  *
 212  *  \ref GMX_SIMD4_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD4_HAVE_FLOAT.
 213  */
 214 #    define GMX_SIMD4_HAVE_REAL              GMX_SIMD4_HAVE_FLOAT
 215
 216 #endif // GMX_DOUBLE
 217
 218 //! \}  end of name-group describing high-level capabilities
 219
 220 namespace gmx
 221 {
 222
 223 template<class T, size_t N>
 224 struct AlignedArray;
 225
 226 #if GMX_SIMD_HAVE_FLOAT
 227 /*! \libinternal \brief Identical to std::array with GMX_SIMD_FLOAT_WIDTH alignment.
 228  *  Should not be deleted through base pointer (destructor is non-virtual).
 229  */
 230 template<size_t N>
 231 struct alignas(GMX_SIMD_FLOAT_WIDTH*sizeof(float))AlignedArray<float, N> : public std::array<float, N>
 232 {
 233 };
 234 #endif
 235
 236 #if GMX_SIMD_HAVE_DOUBLE
 237 /*! \libinternal \brief  Identical to std::array with GMX_SIMD_DOUBLE_WIDTH alignment.
 238  *  Should not be deleted through base pointer (destructor is non-virtual).
 239  */
 240 template<size_t N>
 241 struct alignas(GMX_SIMD_DOUBLE_WIDTH*sizeof(double))AlignedArray<double, N> : public std::array<double, N>
 242 {
 243 };
 244 #endif
 245
 246 #if GMX_SIMD_HAVE_REAL
 247
 248 /*! \name SIMD data types
 249  *
 250  *  The actual storage of these types is implementation dependent. The
 251  *  documentation is generated from the reference implementation, but for
 252  *  normal usage this will likely not be what you are using.
 253  * \{
 254  */
 255
 256 /*! \brief Real precision floating-point SIMD datatype.
 257  *
 258  * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
 259  *
 260  * \ref SimdDouble if GMX_DOUBLE is 1, otherwise \ref SimdFloat.
 261  *
 262  * \note This variable cannot be placed inside other structures or classes, since
 263  *       some compilers (including at least clang-3.7) appear to lose the
 264  *       alignment. This is likely particularly severe when allocating such
 265  *       memory on the heap, but it occurs for stack structures too.
 266  */
 267 #    if GMX_DOUBLE
 268 typedef SimdDouble               SimdReal;
 269 #    else
 270 typedef SimdFloat                SimdReal;
 271 #    endif
 272
 273
 274 /*! \brief Boolean SIMD type for usage with \ref SimdReal.
 275  *
 276  * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
 277  *
 278  * If GMX_DOUBLE is 1, this will be set to \ref SimdDBool
 279  * internally, otherwise \ref SimdFBool. This is necessary since some
 280  * SIMD implementations use bitpatterns for marking truth, so single-
 281  * vs. double precision booleans are not necessarily exchangable.
 282  * As long as you just use this type you will not have to worry about precision.
 283  *
 284  * See \ref SimdIBool for an explanation of real vs. integer booleans.
 285  *
 286  * \note This variable cannot be placed inside other structures or classes, since
 287  *       some compilers (including at least clang-3.7) appear to lose the
 288  *       alignment. This is likely particularly severe when allocating such
 289  *       memory on the heap, but it occurs for stack structures too.
 290  */
 291 #    if GMX_DOUBLE
 292 typedef SimdDBool                SimdBool;
 293 #    else
 294 typedef SimdFBool                SimdBool;
 295 #    endif
 296
 297
 298 /*! \brief 32-bit integer SIMD type.
 299  *
 300  * If GMX_DOUBLE is 1, this will be set to \ref SimdDInt32
 301  * internally, otherwise \ref SimdFInt32. This might seem a strange
 302  * implementation detail, but it is because some SIMD implementations use
 303  * different types/widths of integers registers when converting from
 304  * double vs. single precision floating point. As long as you just use
 305  * this type you will not have to worry about precision.
 306  *
 307  * \note This variable cannot be placed inside other structures or classes, since
 308  *       some compilers (including at least clang-3.7) appear to lose the
 309  *       alignment. This is likely particularly severe when allocating such
 310  *       memory on the heap, but it occurs for stack structures too.
 311  */
 312 #    if GMX_DOUBLE
 313 typedef SimdDInt32               SimdInt32;
 314 #    else
 315 typedef SimdFInt32               SimdInt32;
 316 #    endif
 317
 318 #if GMX_SIMD_HAVE_INT32_ARITHMETICS
 319 /*! \brief Boolean SIMD type for usage with \ref SimdInt32.
 320  *
 321  * This type is only available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
 322  *
 323  * If GMX_DOUBLE is 1, this will be set to \ref SimdDIBool
 324  * internally, otherwise \ref SimdFIBool. This is necessary since some
 325  * SIMD implementations use bitpatterns for marking truth, so single-
 326  * vs. double precision booleans are not necessarily exchangable, and while
 327  * a double-precision boolean might be represented with a 64-bit mask, the
 328  * corresponding integer might only use a 32-bit mask.
 329  *
 330  * We provide conversion routines for these cases, so the only thing you need to
 331  * keep in mind is to use \ref SimdBool when working with
 332  * \ref SimdReal while you pick \ref SimdIBool when working with
 333  * \ref SimdInt32 .
 334  *
 335  * To convert between them, use \ref cvtB2IB and \ref cvtIB2B.
 336  *
 337  * \note This variable cannot be placed inside other structures or classes, since
 338  *       some compilers (including at least clang-3.7) appear to lose the
 339  *       alignment. This is likely particularly severe when allocating such
 340  *       memory on the heap, but it occurs for stack structures too.
 341  */
 342 #    if GMX_DOUBLE
 343 typedef SimdDIBool               SimdIBool;
 344 #    else
 345 typedef SimdFIBool               SimdIBool;
 346 #    endif
 347 #endif  // GMX_SIMD_HAVE_INT32_ARITHMETICS
 348
 349
 350 #if GMX_DOUBLE
 351 const int c_simdBestPairAlignment = c_simdBestPairAlignmentDouble;
 352 #else
 353 const int c_simdBestPairAlignment = c_simdBestPairAlignmentFloat;
 354 #endif
 355
 356 #endif  // GMX_SIMD_HAVE_REAL
 357
 358 #if GMX_SIMD4_HAVE_REAL
 359 /*! \brief Real precision floating-point SIMD4 datatype.
 360  *
 361  * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
 362  *
 363  * \ref Simd4Double if GMX_DOUBLE is 1, otherwise \ref Simd4Float.
 364  *
 365  * \note This variable cannot be placed inside other structures or classes, since
 366  *       some compilers (including at least clang-3.7) appear to lose the
 367  *       alignment. This is likely particularly severe when allocating such
 368  *       memory on the heap, but it occurs for stack structures too.
 369  */
 370 #    if GMX_DOUBLE
 371 typedef Simd4Double               Simd4Real;
 372 #    else
 373 typedef Simd4Float                Simd4Real;
 374 #    endif
 375
 376
 377 /*! \brief Boolean SIMD4 type for usage with \ref SimdReal.
 378  *
 379  * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
 380  *
 381  * If GMX_DOUBLE is 1, this will be set to \ref Simd4DBool
 382  * internally, otherwise \ref Simd4FBool. This is necessary since some
 383  * SIMD implementations use bitpatterns for marking truth, so single-
 384  * vs. double precision booleans are not necessarily exchangable.
 385  * As long as you just use this type you will not have to worry about precision.
 386  *
 387  * \note This variable cannot be placed inside other structures or classes, since
 388  *       some compilers (including at least clang-3.7) appear to lose the
 389  *       alignment. This is likely particularly severe when allocating such
 390  *       memory on the heap, but it occurs for stack structures too.
 391  */
 392 #    if GMX_DOUBLE
 393 typedef Simd4DBool                Simd4Bool;
 394 #    else
 395 typedef Simd4FBool                Simd4Bool;
 396 #    endif
 397 #endif // GMX_SIMD4_HAVE_REAL
 398
 399 //! \}  end of name-group describing SIMD data types
 400
 401 /*! \name High-level SIMD proxy objects to disambiguate load/set operations
 402  * \{
 403  */
 404
 405 /*! \libinternal \brief Simd traits */
 406 template<typename T>
 407 struct SimdTraits {};
 408
 409 #if GMX_SIMD_HAVE_FLOAT
 410 template<>
 411 struct SimdTraits<SimdFloat>
 412 {
 413     using type = float;
 414     static constexpr int width = GMX_SIMD_FLOAT_WIDTH;
 415     using tag = SimdFloatTag;
 416 };
 417 #endif
 418 #if GMX_SIMD_HAVE_DOUBLE
 419 template<>
 420 struct SimdTraits<SimdDouble>
 421 {
 422     using type = double;
 423     static constexpr int width = GMX_SIMD_DOUBLE_WIDTH;
 424     using tag = SimdDoubleTag;
 425 };
 426 #endif
 427 #if GMX_SIMD_HAVE_FLOAT
 428 template<>
 429 struct SimdTraits<SimdFInt32>
 430 {
 431     using type = int;
 432     static constexpr int width = GMX_SIMD_FINT32_WIDTH;
 433     using tag = SimdFInt32Tag;
 434 };
 435 #endif
 436 #if GMX_SIMD_HAVE_DOUBLE
 437 template<>
 438 struct SimdTraits<SimdDInt32>
 439 {
 440     using type = int;
 441     static constexpr int width = GMX_SIMD_DINT32_WIDTH;
 442     using tag = SimdDInt32Tag;
 443 };
 444 #endif
 445
 446 template<typename T>
 447 struct SimdTraits<const T>
 448 {
 449     using type = const typename SimdTraits<T>::type;
 450     static constexpr int width = SimdTraits<T>::width;
 451     using tag = typename SimdTraits<T>::tag;
 452 };
 453
 454 /*! \brief Load function that returns SIMD or scalar
 455  *
 456  * \tparam T Type to load (type is always mandatory)
 457  * \param  m Pointer to aligned memory
 458  * \return   Loaded value
 459  */
 460 template<typename T>
 461 static inline T
 462 load(const typename SimdTraits<T>::type *m) //disabled by SFINAE for non-SIMD types
 463 {
 464     return simdLoad(m, typename SimdTraits<T>::tag());
 465 }
 466
 467 template<typename T>
 468 static inline T
 469 /* the enable_if serves to prevent two different type of misuse:
 470  * 1) load<SimdReal>(SimdReal*); should only be called on real* or int*
 471  * 2) load(real*); template parameter is mandatory because otherwise ambiguity is
 472  *    created. The dependent type disables type deduction.
 473  */
 474 load(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
 475 {
 476     return *m;
 477 }
 478
 479 template <typename T, size_t N>
 480 static inline T gmx_simdcall
 481 load(const AlignedArray<typename SimdTraits<T>::type, N> &m)
 482 {
 483     return simdLoad(m.data(), typename SimdTraits<T>::tag());
 484 }
 485
 486 /*! \brief Load function that returns SIMD or scalar based on template argument
 487  *
 488  * \tparam T Type to load (type is always mandatory)
 489  * \param m Pointer to unaligned memory
 490  * \return Loaded SimdFloat/Double/Int or basic scalar type
 491  */
 492 template<typename T>
 493 static inline T
 494 loadU(const typename SimdTraits<T>::type *m)
 495 {
 496     return simdLoadU(m, typename SimdTraits<T>::tag());
 497 }
 498
 499 template<typename T>
 500 static inline T
 501 loadU(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
 502 {
 503     return *m;
 504 }
 505
 506 template <typename T, size_t N>
 507 static inline T gmx_simdcall
 508 loadU(const AlignedArray<typename SimdTraits<T>::type, N> &m)
 509 {
 510     return simdLoadU(m.data(), typename SimdTraits<T>::tag());
 511 }
 512
 513 class SimdSetZeroProxyInternal;
 514
 515 static inline const SimdSetZeroProxyInternal gmx_simdcall
 516 setZero();
 517
 518 /*! \libinternal \brief Proxy object to enable setZero() for SIMD and real types.
 519  *
 520  * This object is returned by setZero(), and depending on what type you assign
 521  * the result to the conversion method will call the right low-level function.
 522  */
 523 class SimdSetZeroProxyInternal
 524 {
 525     public:
 526         //!\brief Conversion method that returns 0.0 as float
 527         operator float() const { return 0.0f; }
 528         //!\brief Conversion method that returns 0.0 as double
 529         operator double() const { return 0.0; }
 530         //!\brief Conversion method that returns 0.0 as int32
 531         operator std::int32_t() const { return 0; }
 532 #if GMX_SIMD_HAVE_FLOAT
 533         //!\brief Conversion method that will execute setZero() for SimdFloat
 534         operator SimdFloat() const { return setZeroF(); }
 535         //!\brief Conversion method that will execute setZero() for SimdFInt32
 536         operator SimdFInt32() const { return setZeroFI(); }
 537 #endif
 538 #if GMX_SIMD4_HAVE_FLOAT
 539         //!\brief Conversion method that will execute setZero() for Simd4Float
 540         operator Simd4Float() const { return simd4SetZeroF(); }
 541 #endif
 542 #if GMX_SIMD_HAVE_DOUBLE
 543         //!\brief Conversion method that will execute setZero() for SimdDouble
 544         operator SimdDouble() const { return setZeroD(); }
 545         //!\brief Conversion method that will execute setZero() for SimdDInt32
 546         operator SimdDInt32() const { return setZeroDI(); }
 547 #endif
 548 #if GMX_SIMD4_HAVE_DOUBLE
 549         //!\brief Conversion method that will execute setZero() for Simd4Double
 550         operator Simd4Double() const { return simd4SetZeroD(); }
 551 #endif
 552
 553     private:
 554         //! \brief Private constructor can only be called from setZero()
 555         SimdSetZeroProxyInternal() {}
 556
 557         friend const SimdSetZeroProxyInternal gmx_simdcall
 558         setZero();
 559
 560         GMX_DISALLOW_COPY_AND_ASSIGN(SimdSetZeroProxyInternal);
 561 };
 562
 563 /*! \brief Proxy object to set any SIMD or scalar variable to zero
 564  *
 565  * \return Proxy object that will call the actual function to set a SIMD/scalar
 566  *         variable to zero based on the conversion function called when you
 567  *         assign the result.
 568  */
 569 static inline const SimdSetZeroProxyInternal gmx_simdcall
 570 setZero()
 571 {
 572     return {};
 573 }
 574
 575 /* Implement most of 4xn functions by forwarding them to other functions when possible.
 576  * The functions forwarded here don't need to be implemented by each implementation.
 577  * For width=4 all functions are forwarded and for width=8 all but loadU4NOffset are forwarded.
 578  */
 579 #if GMX_SIMD_HAVE_FLOAT
 580 #if GMX_SIMD_FLOAT_WIDTH < 4 || !GMX_SIMD_HAVE_LOADU
 581 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
 582 #elif GMX_SIMD_FLOAT_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
 583 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 1
 584 //For GMX_SIMD_FLOAT_WIDTH>4 it is the reponsibility of the implementation to set
 585 //GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 586 #endif
 587
 588 #if GMX_SIMD_FLOAT_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
 589 static inline SimdFloat gmx_simdcall
 590 loadUNDuplicate4(const float* f)
 591 {
 592     return SimdFloat(*f);
 593 }
 594 static inline SimdFloat gmx_simdcall
 595 load4DuplicateN(const float* f)
 596 {
 597     return load<SimdFloat>(f);
 598 }
 599 static inline SimdFloat gmx_simdcall
 600 loadU4NOffset(const float* f, int)
 601 {
 602     return loadU<SimdFloat>(f);
 603 }
 604 #elif GMX_SIMD_FLOAT_WIDTH == 8 && GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT && GMX_SIMD_HAVE_LOADU
 605 static inline SimdFloat gmx_simdcall
 606 loadUNDuplicate4(const float* f)
 607 {
 608     return loadU1DualHsimd(f);
 609 }
 610 static inline SimdFloat gmx_simdcall
 611 load4DuplicateN(const float* f)
 612 {
 613     return loadDuplicateHsimd(f);
 614 }
 615 #endif
 616 #else //GMX_SIMD_HAVE_FLOAT
 617 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
 618 #endif
 619
 620 #if GMX_SIMD_HAVE_DOUBLE
 621 #if GMX_SIMD_DOUBLE_WIDTH < 4 || !GMX_SIMD_HAVE_LOADU
 622 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
 623 #elif GMX_SIMD_DOUBLE_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
 624 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 1
 625 //For GMX_SIMD_DOUBLE_WIDTH>4 it is the reponsibility of the implementation to set
 626 //GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 627 #endif
 628
 629 #if GMX_SIMD_DOUBLE_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
 630 static inline SimdDouble gmx_simdcall
 631 loadUNDuplicate4(const double* f)
 632 {
 633     return SimdDouble(*f);
 634 }
 635 static inline SimdDouble gmx_simdcall
 636 load4DuplicateN(const double* f)
 637 {
 638     return load<SimdDouble>(f);
 639 }
 640 static inline SimdDouble gmx_simdcall
 641 loadU4NOffset(const double* f, int)
 642 {
 643     return loadU<SimdDouble>(f);
 644 }
 645 #elif GMX_SIMD_DOUBLE_WIDTH == 8 && GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE && GMX_SIMD_HAVE_LOADU
 646 static inline SimdDouble gmx_simdcall
 647 loadUNDuplicate4(const double* f)
 648 {
 649     return loadU1DualHsimd(f);
 650 }
 651 static inline SimdDouble gmx_simdcall
 652 load4DuplicateN(const double* f)
 653 {
 654     return loadDuplicateHsimd(f);
 655 }
 656 #endif
 657 #else //GMX_SIMD_HAVE_DOUBLE
 658 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
 659 #endif
 660
 661 #if GMX_DOUBLE
 662 #define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
 663 #else
 664 #define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
 665 #endif
 666
 667 //! \}  end of name-group proxy objects
 668
 669 }      // namespace gmx
 670
 671 // \}          end of module_simd
 672
 673 //! \endcond   end of condition libapi
 674
 675
 676 #if GMX_SIMD_HAVE_FLOAT
 677
 678 /*! \brief Returns whether a pointer to float is aligned to a SIMD boundary
 679  *
 680  * \param[in] ptr  A pointer to a float
 681  */
 682 static inline bool isSimdAligned(const float *ptr)
 683 {
 684     return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_FLOAT_WIDTH*sizeof(float)) == 0;
 685 }
 686
 687 #endif // GMX_SIMD_HAVE_FLOAT
 688
 689 #if GMX_SIMD_HAVE_DOUBLE
 690
 691 /*! \brief Returns whether a pointer to double is aligned to a SIMD boundary
 692  *
 693  * \param[in] ptr  A pointer to a double
 694  */
 695 static inline bool isSimdAligned(const double *ptr)
 696 {
 697     return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_DOUBLE_WIDTH*sizeof(double)) == 0;
 698 }
 699
 700 #endif // GMX_SIMD_HAVE_DOUBLE
 701
 702
 703 #if GMX_SIMD_HAVE_REAL
 704 #if GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH
 705 #error "GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH: increase GMX_REAL_MAX_SIMD_WIDTH in real.h"
 706 #endif
 707 #endif
 708
 709
 710 #if 0
 711 /* This is a hack to cover the corner case of using an
 712    explicit GMX_SIMD_HAVE_FLOAT or GMX_SIMD_HAVE_DOUBLE, rather than
 713    GMX_SIMD_HAVE_REAL.
 714
 715    Such code is expected to include simd.h to get those symbols
 716    defined, but the actual definitions are in the implemention headers
 717    included by simd.h. check-source.py is not a full preprocessor, so
 718    it does not see the definitions in the implementation headers as
 719    belonging to simd.h, thus it cannot check that simd.h is being used
 720    correctly in the above hypothetical corner case. However, the
 721    checker also does not parse #if 0, so we can fool the checker into
 722    thinking that definition occurs here, and that will work well
 723    enough.
 724
 725    If there's ever other kinds of SIMD code that might have the same
 726    problem, we might want to add other variables here.
 727  */
 728 #    define GMX_SIMD_HAVE_FLOAT         1
 729 #    define GMX_SIMD_HAVE_DOUBLE        1
 730
 731 #endif // end of hack
 732
 733 // The ArrayRef<SimdReal> specialization is always included, because compiler
 734 // errors are confusing when template specialization aren't available.
 735 #include "gromacs/simd/simd_memory.h"
 736
 737 #endif // GMX_SIMD_SIMD_H