PME-gather: 4xN SIMD
[gromacs/AngularHB.git] / src / gromacs / simd / simd.h
blob45d4baf6daf7687f62b3bb1474ed579312ad920a
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal
37 * \defgroup module_simd SIMD intrinsics interface (simd)
38 * \ingroup group_utilitymodules
40 * \brief Provides an architecture-independent way of doing SIMD coding.
42 * Overview of the SIMD implementation is provided in \ref page_simd.
43 * The details are documented in gromacs/simd/simd.h and the reference
44 * implementation impl_reference.h.
46 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
49 #ifndef GMX_SIMD_SIMD_H
50 #define GMX_SIMD_SIMD_H
52 /*! \libinternal \file
54 * \brief Definitions, capabilities, and wrappers for SIMD module.
56 * The macros in this file are intended to be used for writing
57 * architecture-independent SIMD intrinsics code.
58 * To support a new architecture, adding a new sub-include with macros here
59 * should be (nearly) all that is needed.
61 * The defines in this top-level file will set default Gromacs real precision
62 * operations to either single or double precision based on whether
63 * GMX_DOUBLE is 1. The actual implementation - including e.g.
64 * conversion operations specifically between single and double - is documented
65 * in impl_reference.h.
67 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
69 * \inlibraryapi
70 * \ingroup module_simd
73 #include "config.h"
75 #include <cstddef>
76 #include <cstdint>
78 #include <array>
80 #include "gromacs/utility/classhelpers.h"
81 #include "gromacs/utility/real.h"
83 //! \cond libapi
86 /*! \addtogroup module_simd
87 * \{
90 namespace gmx
92 /*! \libinternal \brief Tag type to select to load SimdFloat with simdLoad(U) */
93 struct SimdFloatTag {};
94 /*! \libinternal \brief Tag type to select to load SimdDouble with simdLoad(U) */
95 struct SimdDoubleTag {};
96 /*! \libinternal \brief Tag type to select to load SimdFInt32 with simdLoad(U) */
97 struct SimdFInt32Tag {};
98 /*! \libinternal \brief Tag type to select to load SimdDInt32 with simdLoad(U) */
99 struct SimdDInt32Tag {};
102 /*! \name SIMD predefined macros to describe high-level capabilities
104 * These macros are used to describe the features available in default
105 * Gromacs real precision. They are set from the lower-level implementation
106 * files that have macros describing single and double precision individually,
107 * as well as the implementation details.
108 * \{
111 #if GMX_SIMD_X86_SSE2
112 # include "impl_x86_sse2/impl_x86_sse2.h"
113 #elif GMX_SIMD_X86_SSE4_1
114 # include "impl_x86_sse4_1/impl_x86_sse4_1.h"
115 #elif GMX_SIMD_X86_AVX_128_FMA
116 # include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
117 #elif GMX_SIMD_X86_AVX_256
118 # include "impl_x86_avx_256/impl_x86_avx_256.h"
119 #elif GMX_SIMD_X86_AVX2_256
120 # include "impl_x86_avx2_256/impl_x86_avx2_256.h"
121 #elif GMX_SIMD_X86_AVX2_128
122 # include "impl_x86_avx2_128/impl_x86_avx2_128.h"
123 #elif GMX_SIMD_X86_MIC
124 # include "impl_x86_mic/impl_x86_mic.h"
125 #elif GMX_SIMD_X86_AVX_512
126 # include "impl_x86_avx_512/impl_x86_avx_512.h"
127 #elif GMX_SIMD_X86_AVX_512_KNL
128 # include "impl_x86_avx_512_knl/impl_x86_avx_512_knl.h"
129 #elif GMX_SIMD_ARM_NEON
130 # include "impl_arm_neon/impl_arm_neon.h"
131 #elif GMX_SIMD_ARM_NEON_ASIMD
132 # include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
133 #elif GMX_SIMD_IBM_QPX
134 # include "impl_ibm_qpx/impl_ibm_qpx.h"
135 #elif GMX_SIMD_IBM_VMX
136 # include "impl_ibm_vmx/impl_ibm_vmx.h"
137 #elif GMX_SIMD_IBM_VSX
138 # include "impl_ibm_vsx/impl_ibm_vsx.h"
139 #elif (GMX_SIMD_REFERENCE || defined DOXYGEN)
140 # include "impl_reference/impl_reference.h" // Includes doxygen documentation
141 #else
142 # include "impl_none/impl_none.h"
143 #endif
145 // The scalar SIMD-mimicking functions are always included so we can use
146 // templated functions even without SIMD support.
147 #include "gromacs/simd/scalar/scalar.h"
148 #include "gromacs/simd/scalar/scalar_math.h"
149 #include "gromacs/simd/scalar/scalar_util.h"
152 #if GMX_DOUBLE
153 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_DOUBLE
154 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH
155 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_DINT32_EXTRACT
156 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_DINT32_LOGICAL
157 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_DINT32_ARITHMETICS
158 # define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE
159 # define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE
160 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_DOUBLE
161 #else // GMX_DOUBLE
163 /*! \brief 1 if SimdReal is available, otherwise 0.
165 * \ref GMX_SIMD_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_HAVE_FLOAT.
167 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_FLOAT
169 /*! \brief Width of SimdReal.
171 * \ref GMX_SIMD_DOUBLE_WIDTH if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_FLOAT_WIDTH.
173 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
175 /*! \brief 1 if support is available for extracting elements from SimdInt32, otherwise 0
177 * \ref GMX_SIMD_HAVE_DINT32_EXTRACT if GMX_DOUBLE is 1, otherwise
178 * \ref GMX_SIMD_HAVE_FINT32_EXTRACT.
180 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_FINT32_EXTRACT
182 /*! \brief 1 if logical ops are supported on SimdInt32, otherwise 0.
184 * \ref GMX_SIMD_HAVE_DINT32_LOGICAL if GMX_DOUBLE is 1, otherwise
185 * \ref GMX_SIMD_HAVE_FINT32_LOGICAL.
187 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_FINT32_LOGICAL
189 /*! \brief 1 if arithmetic ops are supported on SimdInt32, otherwise 0.
191 * \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS if GMX_DOUBLE is 1, otherwise
192 * \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS.
194 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_FINT32_ARITHMETICS
196 /*! \brief 1 if gmx::simdGatherLoadUBySimdIntTranspose is present, otherwise 0
198 * \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE if GMX_DOUBLE is 1, otherwise
199 * \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT.
201 # define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT
203 /*! \brief 1 if real half-register load/store/reduce utils present, otherwise 0
205 * \ref GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE if GMX_DOUBLE is 1, otherwise
206 * \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT.
208 # define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT
210 /*! \brief 1 if Simd4Real is available, otherwise 0.
212 * \ref GMX_SIMD4_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD4_HAVE_FLOAT.
214 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_FLOAT
216 #endif // GMX_DOUBLE
218 //! \} end of name-group describing high-level capabilities
220 namespace gmx
223 template<class T, size_t N>
224 struct AlignedArray;
226 #if GMX_SIMD_HAVE_FLOAT
227 /*! \libinternal \brief Identical to std::array with GMX_SIMD_FLOAT_WIDTH alignment.
228 * Should not be deleted through base pointer (destructor is non-virtual).
230 template<size_t N>
231 struct alignas(GMX_SIMD_FLOAT_WIDTH*sizeof(float))AlignedArray<float, N> : public std::array<float, N>
234 #endif
236 #if GMX_SIMD_HAVE_DOUBLE
237 /*! \libinternal \brief Identical to std::array with GMX_SIMD_DOUBLE_WIDTH alignment.
238 * Should not be deleted through base pointer (destructor is non-virtual).
240 template<size_t N>
241 struct alignas(GMX_SIMD_DOUBLE_WIDTH*sizeof(double))AlignedArray<double, N> : public std::array<double, N>
244 #endif
246 #if GMX_SIMD_HAVE_REAL
248 /*! \name SIMD data types
250 * The actual storage of these types is implementation dependent. The
251 * documentation is generated from the reference implementation, but for
252 * normal usage this will likely not be what you are using.
253 * \{
256 /*! \brief Real precision floating-point SIMD datatype.
258 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
260 * \ref SimdDouble if GMX_DOUBLE is 1, otherwise \ref SimdFloat.
262 * \note This variable cannot be placed inside other structures or classes, since
263 * some compilers (including at least clang-3.7) appear to lose the
264 * alignment. This is likely particularly severe when allocating such
265 * memory on the heap, but it occurs for stack structures too.
267 # if GMX_DOUBLE
268 typedef SimdDouble SimdReal;
269 # else
270 typedef SimdFloat SimdReal;
271 # endif
274 /*! \brief Boolean SIMD type for usage with \ref SimdReal.
276 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
278 * If GMX_DOUBLE is 1, this will be set to \ref SimdDBool
279 * internally, otherwise \ref SimdFBool. This is necessary since some
280 * SIMD implementations use bitpatterns for marking truth, so single-
281 * vs. double precision booleans are not necessarily exchangable.
282 * As long as you just use this type you will not have to worry about precision.
284 * See \ref SimdIBool for an explanation of real vs. integer booleans.
286 * \note This variable cannot be placed inside other structures or classes, since
287 * some compilers (including at least clang-3.7) appear to lose the
288 * alignment. This is likely particularly severe when allocating such
289 * memory on the heap, but it occurs for stack structures too.
291 # if GMX_DOUBLE
292 typedef SimdDBool SimdBool;
293 # else
294 typedef SimdFBool SimdBool;
295 # endif
298 /*! \brief 32-bit integer SIMD type.
300 * If GMX_DOUBLE is 1, this will be set to \ref SimdDInt32
301 * internally, otherwise \ref SimdFInt32. This might seem a strange
302 * implementation detail, but it is because some SIMD implementations use
303 * different types/widths of integers registers when converting from
304 * double vs. single precision floating point. As long as you just use
305 * this type you will not have to worry about precision.
307 * \note This variable cannot be placed inside other structures or classes, since
308 * some compilers (including at least clang-3.7) appear to lose the
309 * alignment. This is likely particularly severe when allocating such
310 * memory on the heap, but it occurs for stack structures too.
312 # if GMX_DOUBLE
313 typedef SimdDInt32 SimdInt32;
314 # else
315 typedef SimdFInt32 SimdInt32;
316 # endif
318 #if GMX_SIMD_HAVE_INT32_ARITHMETICS
319 /*! \brief Boolean SIMD type for usage with \ref SimdInt32.
321 * This type is only available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
323 * If GMX_DOUBLE is 1, this will be set to \ref SimdDIBool
324 * internally, otherwise \ref SimdFIBool. This is necessary since some
325 * SIMD implementations use bitpatterns for marking truth, so single-
326 * vs. double precision booleans are not necessarily exchangable, and while
327 * a double-precision boolean might be represented with a 64-bit mask, the
328 * corresponding integer might only use a 32-bit mask.
330 * We provide conversion routines for these cases, so the only thing you need to
331 * keep in mind is to use \ref SimdBool when working with
332 * \ref SimdReal while you pick \ref SimdIBool when working with
333 * \ref SimdInt32 .
335 * To convert between them, use \ref cvtB2IB and \ref cvtIB2B.
337 * \note This variable cannot be placed inside other structures or classes, since
338 * some compilers (including at least clang-3.7) appear to lose the
339 * alignment. This is likely particularly severe when allocating such
340 * memory on the heap, but it occurs for stack structures too.
342 # if GMX_DOUBLE
343 typedef SimdDIBool SimdIBool;
344 # else
345 typedef SimdFIBool SimdIBool;
346 # endif
347 #endif // GMX_SIMD_HAVE_INT32_ARITHMETICS
350 #if GMX_DOUBLE
351 const int c_simdBestPairAlignment = c_simdBestPairAlignmentDouble;
352 #else
353 const int c_simdBestPairAlignment = c_simdBestPairAlignmentFloat;
354 #endif
356 #endif // GMX_SIMD_HAVE_REAL
358 #if GMX_SIMD4_HAVE_REAL
359 /*! \brief Real precision floating-point SIMD4 datatype.
361 * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
363 * \ref Simd4Double if GMX_DOUBLE is 1, otherwise \ref Simd4Float.
365 * \note This variable cannot be placed inside other structures or classes, since
366 * some compilers (including at least clang-3.7) appear to lose the
367 * alignment. This is likely particularly severe when allocating such
368 * memory on the heap, but it occurs for stack structures too.
370 # if GMX_DOUBLE
371 typedef Simd4Double Simd4Real;
372 # else
373 typedef Simd4Float Simd4Real;
374 # endif
377 /*! \brief Boolean SIMD4 type for usage with \ref SimdReal.
379 * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
381 * If GMX_DOUBLE is 1, this will be set to \ref Simd4DBool
382 * internally, otherwise \ref Simd4FBool. This is necessary since some
383 * SIMD implementations use bitpatterns for marking truth, so single-
384 * vs. double precision booleans are not necessarily exchangable.
385 * As long as you just use this type you will not have to worry about precision.
387 * \note This variable cannot be placed inside other structures or classes, since
388 * some compilers (including at least clang-3.7) appear to lose the
389 * alignment. This is likely particularly severe when allocating such
390 * memory on the heap, but it occurs for stack structures too.
392 # if GMX_DOUBLE
393 typedef Simd4DBool Simd4Bool;
394 # else
395 typedef Simd4FBool Simd4Bool;
396 # endif
397 #endif // GMX_SIMD4_HAVE_REAL
399 //! \} end of name-group describing SIMD data types
401 /*! \name High-level SIMD proxy objects to disambiguate load/set operations
402 * \{
405 /*! \libinternal \brief Simd traits */
406 template<typename T>
407 struct SimdTraits {};
409 #if GMX_SIMD_HAVE_FLOAT
410 template<>
411 struct SimdTraits<SimdFloat>
413 using type = float;
414 static constexpr int width = GMX_SIMD_FLOAT_WIDTH;
415 using tag = SimdFloatTag;
417 #endif
418 #if GMX_SIMD_HAVE_DOUBLE
419 template<>
420 struct SimdTraits<SimdDouble>
422 using type = double;
423 static constexpr int width = GMX_SIMD_DOUBLE_WIDTH;
424 using tag = SimdDoubleTag;
426 #endif
427 #if GMX_SIMD_HAVE_FLOAT
428 template<>
429 struct SimdTraits<SimdFInt32>
431 using type = int;
432 static constexpr int width = GMX_SIMD_FINT32_WIDTH;
433 using tag = SimdFInt32Tag;
435 #endif
436 #if GMX_SIMD_HAVE_DOUBLE
437 template<>
438 struct SimdTraits<SimdDInt32>
440 using type = int;
441 static constexpr int width = GMX_SIMD_DINT32_WIDTH;
442 using tag = SimdDInt32Tag;
444 #endif
446 template<typename T>
447 struct SimdTraits<const T>
449 using type = const typename SimdTraits<T>::type;
450 static constexpr int width = SimdTraits<T>::width;
451 using tag = typename SimdTraits<T>::tag;
454 /*! \brief Load function that returns SIMD or scalar
456 * \tparam T Type to load (type is always mandatory)
457 * \param m Pointer to aligned memory
458 * \return Loaded value
460 template<typename T>
461 static inline T
462 load(const typename SimdTraits<T>::type *m) //disabled by SFINAE for non-SIMD types
464 return simdLoad(m, typename SimdTraits<T>::tag());
467 template<typename T>
468 static inline T
469 /* the enable_if serves to prevent two different type of misuse:
470 * 1) load<SimdReal>(SimdReal*); should only be called on real* or int*
471 * 2) load(real*); template parameter is mandatory because otherwise ambiguity is
472 * created. The dependent type disables type deduction.
474 load(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
476 return *m;
479 template <typename T, size_t N>
480 static inline T gmx_simdcall
481 load(const AlignedArray<typename SimdTraits<T>::type, N> &m)
483 return simdLoad(m.data(), typename SimdTraits<T>::tag());
486 /*! \brief Load function that returns SIMD or scalar based on template argument
488 * \tparam T Type to load (type is always mandatory)
489 * \param m Pointer to unaligned memory
490 * \return Loaded SimdFloat/Double/Int or basic scalar type
492 template<typename T>
493 static inline T
494 loadU(const typename SimdTraits<T>::type *m)
496 return simdLoadU(m, typename SimdTraits<T>::tag());
499 template<typename T>
500 static inline T
501 loadU(const typename std::enable_if<std::is_arithmetic<T>::value, T>::type *m)
503 return *m;
506 template <typename T, size_t N>
507 static inline T gmx_simdcall
508 loadU(const AlignedArray<typename SimdTraits<T>::type, N> &m)
510 return simdLoadU(m.data(), typename SimdTraits<T>::tag());
513 class SimdSetZeroProxyInternal;
515 static inline const SimdSetZeroProxyInternal gmx_simdcall
516 setZero();
518 /*! \libinternal \brief Proxy object to enable setZero() for SIMD and real types.
520 * This object is returned by setZero(), and depending on what type you assign
521 * the result to the conversion method will call the right low-level function.
523 class SimdSetZeroProxyInternal
525 public:
526 //!\brief Conversion method that returns 0.0 as float
527 operator float() const { return 0.0f; }
528 //!\brief Conversion method that returns 0.0 as double
529 operator double() const { return 0.0; }
530 //!\brief Conversion method that returns 0.0 as int32
531 operator std::int32_t() const { return 0; }
532 #if GMX_SIMD_HAVE_FLOAT
533 //!\brief Conversion method that will execute setZero() for SimdFloat
534 operator SimdFloat() const { return setZeroF(); }
535 //!\brief Conversion method that will execute setZero() for SimdFInt32
536 operator SimdFInt32() const { return setZeroFI(); }
537 #endif
538 #if GMX_SIMD4_HAVE_FLOAT
539 //!\brief Conversion method that will execute setZero() for Simd4Float
540 operator Simd4Float() const { return simd4SetZeroF(); }
541 #endif
542 #if GMX_SIMD_HAVE_DOUBLE
543 //!\brief Conversion method that will execute setZero() for SimdDouble
544 operator SimdDouble() const { return setZeroD(); }
545 //!\brief Conversion method that will execute setZero() for SimdDInt32
546 operator SimdDInt32() const { return setZeroDI(); }
547 #endif
548 #if GMX_SIMD4_HAVE_DOUBLE
549 //!\brief Conversion method that will execute setZero() for Simd4Double
550 operator Simd4Double() const { return simd4SetZeroD(); }
551 #endif
553 private:
554 //! \brief Private constructor can only be called from setZero()
555 SimdSetZeroProxyInternal() {}
557 friend const SimdSetZeroProxyInternal gmx_simdcall
558 setZero();
560 GMX_DISALLOW_COPY_AND_ASSIGN(SimdSetZeroProxyInternal);
563 /*! \brief Proxy object to set any SIMD or scalar variable to zero
565 * \return Proxy object that will call the actual function to set a SIMD/scalar
566 * variable to zero based on the conversion function called when you
567 * assign the result.
569 static inline const SimdSetZeroProxyInternal gmx_simdcall
570 setZero()
572 return {};
575 /* Implement most of 4xn functions by forwarding them to other functions when possible.
576 * The functions forwarded here don't need to be implemented by each implementation.
577 * For width=4 all functions are forwarded and for width=8 all but loadU4NOffset are forwarded.
579 #if GMX_SIMD_HAVE_FLOAT
580 #if GMX_SIMD_FLOAT_WIDTH < 4 || !GMX_SIMD_HAVE_LOADU
581 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
582 #elif GMX_SIMD_FLOAT_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
583 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 1
584 //For GMX_SIMD_FLOAT_WIDTH>4 it is the reponsibility of the implementation to set
585 //GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
586 #endif
588 #if GMX_SIMD_FLOAT_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
589 static inline SimdFloat gmx_simdcall
590 loadUNDuplicate4(const float* f)
592 return SimdFloat(*f);
594 static inline SimdFloat gmx_simdcall
595 load4DuplicateN(const float* f)
597 return load<SimdFloat>(f);
599 static inline SimdFloat gmx_simdcall
600 loadU4NOffset(const float* f, int)
602 return loadU<SimdFloat>(f);
604 #elif GMX_SIMD_FLOAT_WIDTH == 8 && GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT && GMX_SIMD_HAVE_LOADU
605 static inline SimdFloat gmx_simdcall
606 loadUNDuplicate4(const float* f)
608 return loadU1DualHsimd(f);
610 static inline SimdFloat gmx_simdcall
611 load4DuplicateN(const float* f)
613 return loadDuplicateHsimd(f);
615 #endif
616 #else //GMX_SIMD_HAVE_FLOAT
617 #define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
618 #endif
620 #if GMX_SIMD_HAVE_DOUBLE
621 #if GMX_SIMD_DOUBLE_WIDTH < 4 || !GMX_SIMD_HAVE_LOADU
622 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
623 #elif GMX_SIMD_DOUBLE_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
624 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 1
625 //For GMX_SIMD_DOUBLE_WIDTH>4 it is the reponsibility of the implementation to set
626 //GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
627 #endif
629 #if GMX_SIMD_DOUBLE_WIDTH == 4 && GMX_SIMD_HAVE_LOADU
630 static inline SimdDouble gmx_simdcall
631 loadUNDuplicate4(const double* f)
633 return SimdDouble(*f);
635 static inline SimdDouble gmx_simdcall
636 load4DuplicateN(const double* f)
638 return load<SimdDouble>(f);
640 static inline SimdDouble gmx_simdcall
641 loadU4NOffset(const double* f, int)
643 return loadU<SimdDouble>(f);
645 #elif GMX_SIMD_DOUBLE_WIDTH == 8 && GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE && GMX_SIMD_HAVE_LOADU
646 static inline SimdDouble gmx_simdcall
647 loadUNDuplicate4(const double* f)
649 return loadU1DualHsimd(f);
651 static inline SimdDouble gmx_simdcall
652 load4DuplicateN(const double* f)
654 return loadDuplicateHsimd(f);
656 #endif
657 #else //GMX_SIMD_HAVE_DOUBLE
658 #define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
659 #endif
661 #if GMX_DOUBLE
662 #define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
663 #else
664 #define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
665 #endif
667 //! \} end of name-group proxy objects
669 } // namespace gmx
671 // \} end of module_simd
673 //! \endcond end of condition libapi
676 #if GMX_SIMD_HAVE_FLOAT
678 /*! \brief Returns whether a pointer to float is aligned to a SIMD boundary
680 * \param[in] ptr A pointer to a float
682 static inline bool isSimdAligned(const float *ptr)
684 return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_FLOAT_WIDTH*sizeof(float)) == 0;
687 #endif // GMX_SIMD_HAVE_FLOAT
689 #if GMX_SIMD_HAVE_DOUBLE
691 /*! \brief Returns whether a pointer to double is aligned to a SIMD boundary
693 * \param[in] ptr A pointer to a double
695 static inline bool isSimdAligned(const double *ptr)
697 return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_DOUBLE_WIDTH*sizeof(double)) == 0;
700 #endif // GMX_SIMD_HAVE_DOUBLE
703 #if GMX_SIMD_HAVE_REAL
704 #if GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH
705 #error "GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH: increase GMX_REAL_MAX_SIMD_WIDTH in real.h"
706 #endif
707 #endif
710 #if 0
711 /* This is a hack to cover the corner case of using an
712 explicit GMX_SIMD_HAVE_FLOAT or GMX_SIMD_HAVE_DOUBLE, rather than
713 GMX_SIMD_HAVE_REAL.
715 Such code is expected to include simd.h to get those symbols
716 defined, but the actual definitions are in the implemention headers
717 included by simd.h. check-source.py is not a full preprocessor, so
718 it does not see the definitions in the implementation headers as
719 belonging to simd.h, thus it cannot check that simd.h is being used
720 correctly in the above hypothetical corner case. However, the
721 checker also does not parse #if 0, so we can fool the checker into
722 thinking that definition occurs here, and that will work well
723 enough.
725 If there's ever other kinds of SIMD code that might have the same
726 problem, we might want to add other variables here.
728 # define GMX_SIMD_HAVE_FLOAT 1
729 # define GMX_SIMD_HAVE_DOUBLE 1
731 #endif // end of hack
733 // The ArrayRef<SimdReal> specialization is always included, because compiler
734 // errors are confusing when template specialization aren't available.
735 #include "gromacs/simd/simd_memory.h"
737 #endif // GMX_SIMD_SIMD_H