Merge branch 'origin/release-2020' into master
[gromacs.git] / src / gromacs / simd / simd.h
blob537046645a9b2080ae4024bfc6dcadb9e7e794c4
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
5 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 /*! \libinternal
38 * \defgroup module_simd SIMD intrinsics interface (simd)
39 * \ingroup group_utilitymodules
41 * \brief Provides an architecture-independent way of doing SIMD coding.
43 * Overview of the SIMD implementation is provided in \ref page_simd.
44 * The details are documented in gromacs/simd/simd.h and the reference
45 * implementation impl_reference.h.
47 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
50 #ifndef GMX_SIMD_SIMD_H
51 #define GMX_SIMD_SIMD_H
53 /*! \libinternal \file
55 * \brief Definitions, capabilities, and wrappers for SIMD module.
57 * The macros in this file are intended to be used for writing
58 * architecture-independent SIMD intrinsics code.
59 * To support a new architecture, adding a new sub-include with macros here
60 * should be (nearly) all that is needed.
62 * The defines in this top-level file will set default Gromacs real precision
63 * operations to either single or double precision based on whether
64 * GMX_DOUBLE is 1. The actual implementation - including e.g.
65 * conversion operations specifically between single and double - is documented
66 * in impl_reference.h.
68 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
70 * \inlibraryapi
71 * \ingroup module_simd
74 #include "config.h"
76 #include <cstddef>
77 #include <cstdint>
79 #include <array>
80 #include <type_traits>
82 #include "gromacs/utility/basedefinitions.h"
83 #include "gromacs/utility/classhelpers.h"
84 #include "gromacs/utility/real.h"
86 //! \cond libapi
89 /*! \addtogroup module_simd
90 * \{
93 namespace gmx
95 /*! \libinternal \brief Tag type to select to load SimdFloat with simdLoad(U) */
96 struct SimdFloatTag
99 /*! \libinternal \brief Tag type to select to load SimdDouble with simdLoad(U) */
100 struct SimdDoubleTag
103 /*! \libinternal \brief Tag type to select to load SimdFInt32 with simdLoad(U) */
104 struct SimdFInt32Tag
107 /*! \libinternal \brief Tag type to select to load SimdDInt32 with simdLoad(U) */
108 struct SimdDInt32Tag
111 } // namespace gmx
113 /*! \name SIMD predefined macros to describe high-level capabilities
115 * These macros are used to describe the features available in default
116 * Gromacs real precision. They are set from the lower-level implementation
117 * files that have macros describing single and double precision individually,
118 * as well as the implementation details.
119 * \{
122 #ifdef __clang__
123 # pragma clang diagnostic push
124 /* reinterpret_cast is used for SIMD->scalar conversion
126 * In general using reinterpret_cast for bit_cast is UB but
127 * for intrinsics types it works for all known compilers
128 * and not all compilers produce as good code for memcpy.
130 # pragma clang diagnostic ignored "-Wundefined-reinterpret-cast"
131 #endif
133 #if GMX_SIMD_X86_SSE2
134 # include "impl_x86_sse2/impl_x86_sse2.h"
135 #elif GMX_SIMD_X86_SSE4_1
136 # include "impl_x86_sse4_1/impl_x86_sse4_1.h"
137 #elif GMX_SIMD_X86_AVX_128_FMA
138 # include "impl_x86_avx_128_fma/impl_x86_avx_128_fma.h"
139 #elif GMX_SIMD_X86_AVX_256
140 # include "impl_x86_avx_256/impl_x86_avx_256.h"
141 #elif GMX_SIMD_X86_AVX2_256
142 # include "impl_x86_avx2_256/impl_x86_avx2_256.h"
143 #elif GMX_SIMD_X86_AVX2_128
144 # include "impl_x86_avx2_128/impl_x86_avx2_128.h"
145 #elif GMX_SIMD_X86_MIC
146 # include "impl_x86_mic/impl_x86_mic.h"
147 #elif GMX_SIMD_X86_AVX_512
148 # include "impl_x86_avx_512/impl_x86_avx_512.h"
149 #elif GMX_SIMD_X86_AVX_512_KNL
150 # include "impl_x86_avx_512_knl/impl_x86_avx_512_knl.h"
151 #elif GMX_SIMD_ARM_NEON
152 # include "impl_arm_neon/impl_arm_neon.h"
153 #elif GMX_SIMD_ARM_NEON_ASIMD
154 # include "impl_arm_neon_asimd/impl_arm_neon_asimd.h"
155 #elif GMX_SIMD_IBM_VMX
156 # include "impl_ibm_vmx/impl_ibm_vmx.h"
157 #elif GMX_SIMD_IBM_VSX
158 # include "impl_ibm_vsx/impl_ibm_vsx.h"
159 #elif (GMX_SIMD_REFERENCE || defined DOXYGEN)
160 # include "impl_reference/impl_reference.h" // Includes doxygen documentation
161 #else
162 # include "impl_none/impl_none.h"
163 #endif
165 #ifdef __clang__
166 # pragma clang diagnostic pop
167 #endif
169 // The scalar SIMD-mimicking functions are always included so we can use
170 // templated functions even without SIMD support.
171 #include "gromacs/simd/scalar/scalar.h"
172 #include "gromacs/simd/scalar/scalar_math.h"
173 #include "gromacs/simd/scalar/scalar_util.h"
176 #if GMX_DOUBLE
177 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_DOUBLE
178 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_DOUBLE_WIDTH
179 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_DINT32_EXTRACT
180 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_DINT32_LOGICAL
181 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_DINT32_ARITHMETICS
182 # define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL \
183 GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE
184 # define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE
185 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_DOUBLE
186 #else // GMX_DOUBLE
188 /*! \brief 1 if SimdReal is available, otherwise 0.
190 * \ref GMX_SIMD_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_HAVE_FLOAT.
192 # define GMX_SIMD_HAVE_REAL GMX_SIMD_HAVE_FLOAT
194 /*! \brief Width of SimdReal.
196 * \ref GMX_SIMD_DOUBLE_WIDTH if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD_FLOAT_WIDTH.
198 # define GMX_SIMD_REAL_WIDTH GMX_SIMD_FLOAT_WIDTH
200 /*! \brief 1 if support is available for extracting elements from SimdInt32, otherwise 0
202 * \ref GMX_SIMD_HAVE_DINT32_EXTRACT if GMX_DOUBLE is 1, otherwise
203 * \ref GMX_SIMD_HAVE_FINT32_EXTRACT.
205 # define GMX_SIMD_HAVE_INT32_EXTRACT GMX_SIMD_HAVE_FINT32_EXTRACT
207 /*! \brief 1 if logical ops are supported on SimdInt32, otherwise 0.
209 * \ref GMX_SIMD_HAVE_DINT32_LOGICAL if GMX_DOUBLE is 1, otherwise
210 * \ref GMX_SIMD_HAVE_FINT32_LOGICAL.
212 # define GMX_SIMD_HAVE_INT32_LOGICAL GMX_SIMD_HAVE_FINT32_LOGICAL
214 /*! \brief 1 if arithmetic ops are supported on SimdInt32, otherwise 0.
216 * \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS if GMX_DOUBLE is 1, otherwise
217 * \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS.
219 # define GMX_SIMD_HAVE_INT32_ARITHMETICS GMX_SIMD_HAVE_FINT32_ARITHMETICS
221 /*! \brief 1 if gmx::simdGatherLoadUBySimdIntTranspose is present, otherwise 0
223 * \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_DOUBLE if GMX_DOUBLE is 1, otherwise
224 * \ref GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT.
226 # define GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL \
227 GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_FLOAT
229 /*! \brief 1 if real half-register load/store/reduce utils present, otherwise 0
231 * \ref GMX_SIMD_HAVE_HSIMD_UTIL_DOUBLE if GMX_DOUBLE is 1, otherwise
232 * \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT.
234 # define GMX_SIMD_HAVE_HSIMD_UTIL_REAL GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT
236 /*! \brief 1 if Simd4Real is available, otherwise 0.
238 * \ref GMX_SIMD4_HAVE_DOUBLE if GMX_DOUBLE is 1, otherwise \ref GMX_SIMD4_HAVE_FLOAT.
240 # define GMX_SIMD4_HAVE_REAL GMX_SIMD4_HAVE_FLOAT
242 #endif // GMX_DOUBLE
244 //! \} end of name-group describing high-level capabilities
246 namespace gmx
249 template<class T, size_t N>
250 struct AlignedArray;
252 #if GMX_SIMD_HAVE_FLOAT
253 /*! \libinternal \brief Identical to std::array with GMX_SIMD_FLOAT_WIDTH alignment.
254 * Should not be deleted through base pointer (destructor is non-virtual).
256 template<size_t N>
257 struct alignas(GMX_SIMD_FLOAT_WIDTH * sizeof(float)) AlignedArray<float, N> :
258 public std::array<float, N>
261 #endif
263 #if GMX_SIMD_HAVE_DOUBLE
264 /*! \libinternal \brief Identical to std::array with GMX_SIMD_DOUBLE_WIDTH alignment.
265 * Should not be deleted through base pointer (destructor is non-virtual).
267 template<size_t N>
268 struct alignas(GMX_SIMD_DOUBLE_WIDTH * sizeof(double)) AlignedArray<double, N> :
269 public std::array<double, N>
272 #endif
274 #if GMX_SIMD_HAVE_REAL
276 /*! \name SIMD data types
278 * The actual storage of these types is implementation dependent. The
279 * documentation is generated from the reference implementation, but for
280 * normal usage this will likely not be what you are using.
281 * \{
284 /*! \brief Real precision floating-point SIMD datatype.
286 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
288 * \ref SimdDouble if GMX_DOUBLE is 1, otherwise \ref SimdFloat.
290 * \note This variable cannot be placed inside other structures or classes, since
291 * some compilers (including at least clang-3.7) appear to lose the
292 * alignment. This is likely particularly severe when allocating such
293 * memory on the heap, but it occurs for stack structures too.
295 # if GMX_DOUBLE
296 typedef SimdDouble SimdReal;
297 # else
298 typedef SimdFloat SimdReal;
299 # endif
302 /*! \brief Boolean SIMD type for usage with \ref SimdReal.
304 * This type is only available if \ref GMX_SIMD_HAVE_REAL is 1.
306 * If GMX_DOUBLE is 1, this will be set to \ref SimdDBool
307 * internally, otherwise \ref SimdFBool. This is necessary since some
308 * SIMD implementations use bitpatterns for marking truth, so single-
309 * vs. double precision booleans are not necessarily exchangable.
310 * As long as you just use this type you will not have to worry about precision.
312 * See \ref SimdIBool for an explanation of real vs. integer booleans.
314 * \note This variable cannot be placed inside other structures or classes, since
315 * some compilers (including at least clang-3.7) appear to lose the
316 * alignment. This is likely particularly severe when allocating such
317 * memory on the heap, but it occurs for stack structures too.
319 # if GMX_DOUBLE
320 typedef SimdDBool SimdBool;
321 # else
322 typedef SimdFBool SimdBool;
323 # endif
326 /*! \brief 32-bit integer SIMD type.
328 * If GMX_DOUBLE is 1, this will be set to \ref SimdDInt32
329 * internally, otherwise \ref SimdFInt32. This might seem a strange
330 * implementation detail, but it is because some SIMD implementations use
331 * different types/widths of integers registers when converting from
332 * double vs. single precision floating point. As long as you just use
333 * this type you will not have to worry about precision.
335 * \note This variable cannot be placed inside other structures or classes, since
336 * some compilers (including at least clang-3.7) appear to lose the
337 * alignment. This is likely particularly severe when allocating such
338 * memory on the heap, but it occurs for stack structures too.
340 # if GMX_DOUBLE
341 typedef SimdDInt32 SimdInt32;
342 # else
343 typedef SimdFInt32 SimdInt32;
344 # endif
346 # if GMX_SIMD_HAVE_INT32_ARITHMETICS
347 /*! \brief Boolean SIMD type for usage with \ref SimdInt32.
349 * This type is only available if \ref GMX_SIMD_HAVE_INT32_ARITHMETICS is 1.
351 * If GMX_DOUBLE is 1, this will be set to \ref SimdDIBool
352 * internally, otherwise \ref SimdFIBool. This is necessary since some
353 * SIMD implementations use bitpatterns for marking truth, so single-
354 * vs. double precision booleans are not necessarily exchangable, and while
355 * a double-precision boolean might be represented with a 64-bit mask, the
356 * corresponding integer might only use a 32-bit mask.
358 * We provide conversion routines for these cases, so the only thing you need to
359 * keep in mind is to use \ref SimdBool when working with
360 * \ref SimdReal while you pick \ref SimdIBool when working with
361 * \ref SimdInt32 .
363 * To convert between them, use \ref cvtB2IB and \ref cvtIB2B.
365 * \note This variable cannot be placed inside other structures or classes, since
366 * some compilers (including at least clang-3.7) appear to lose the
367 * alignment. This is likely particularly severe when allocating such
368 * memory on the heap, but it occurs for stack structures too.
370 # if GMX_DOUBLE
371 typedef SimdDIBool SimdIBool;
372 # else
373 typedef SimdFIBool SimdIBool;
374 # endif
375 # endif // GMX_SIMD_HAVE_INT32_ARITHMETICS
378 # if GMX_DOUBLE
379 const int c_simdBestPairAlignment = c_simdBestPairAlignmentDouble;
380 # else
381 const int c_simdBestPairAlignment = c_simdBestPairAlignmentFloat;
382 # endif
384 #endif // GMX_SIMD_HAVE_REAL
386 #if GMX_SIMD4_HAVE_REAL
387 /*! \brief Real precision floating-point SIMD4 datatype.
389 * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
391 * \ref Simd4Double if GMX_DOUBLE is 1, otherwise \ref Simd4Float.
393 * \note This variable cannot be placed inside other structures or classes, since
394 * some compilers (including at least clang-3.7) appear to lose the
395 * alignment. This is likely particularly severe when allocating such
396 * memory on the heap, but it occurs for stack structures too.
398 # if GMX_DOUBLE
399 typedef Simd4Double Simd4Real;
400 # else
401 typedef Simd4Float Simd4Real;
402 # endif
405 /*! \brief Boolean SIMD4 type for usage with \ref SimdReal.
407 * This type is only available if \ref GMX_SIMD4_HAVE_REAL is 1.
409 * If GMX_DOUBLE is 1, this will be set to \ref Simd4DBool
410 * internally, otherwise \ref Simd4FBool. This is necessary since some
411 * SIMD implementations use bitpatterns for marking truth, so single-
412 * vs. double precision booleans are not necessarily exchangable.
413 * As long as you just use this type you will not have to worry about precision.
415 * \note This variable cannot be placed inside other structures or classes, since
416 * some compilers (including at least clang-3.7) appear to lose the
417 * alignment. This is likely particularly severe when allocating such
418 * memory on the heap, but it occurs for stack structures too.
420 # if GMX_DOUBLE
421 typedef Simd4DBool Simd4Bool;
422 # else
423 typedef Simd4FBool Simd4Bool;
424 # endif
425 #endif // GMX_SIMD4_HAVE_REAL
427 //! \} end of name-group describing SIMD data types
429 /*! \name High-level SIMD proxy objects to disambiguate load/set operations
430 * \{
433 namespace internal
435 /*! \libinternal \brief Simd traits
437 * These traits are used to query data about SIMD types. Currently provided
438 * data is useful for SIMD loads (load function and helper classes for
439 * ArrayRef<> in simd_memory.h). Provided data:
440 * - type: scalar type corresponding to the SIMD type
441 * - width: SIMD width
442 * - tag: tag used for type dispatch of load function
444 template<typename T>
445 struct SimdTraits
449 #if GMX_SIMD_HAVE_FLOAT
450 template<>
451 struct SimdTraits<SimdFloat>
453 using type = float;
454 static constexpr int width = GMX_SIMD_FLOAT_WIDTH;
455 using tag = SimdFloatTag;
457 #endif
458 #if GMX_SIMD_HAVE_DOUBLE
459 template<>
460 struct SimdTraits<SimdDouble>
462 using type = double;
463 static constexpr int width = GMX_SIMD_DOUBLE_WIDTH;
464 using tag = SimdDoubleTag;
466 #endif
467 #if GMX_SIMD_HAVE_FLOAT
468 template<>
469 struct SimdTraits<SimdFInt32>
471 using type = int;
472 static constexpr int width = GMX_SIMD_FINT32_WIDTH;
473 using tag = SimdFInt32Tag;
475 #endif
476 #if GMX_SIMD_HAVE_DOUBLE
477 template<>
478 struct SimdTraits<SimdDInt32>
480 using type = int;
481 static constexpr int width = GMX_SIMD_DINT32_WIDTH;
482 using tag = SimdDInt32Tag;
484 #endif
486 template<typename T>
487 struct SimdTraits<const T>
489 using type = const typename SimdTraits<T>::type;
490 static constexpr int width = SimdTraits<T>::width;
491 using tag = typename SimdTraits<T>::tag;
493 } // namespace internal
495 /*! \brief Load function that returns SIMD or scalar
497 * Note that a load of T* where T is const returns a value, which is a
498 * copy, and the caller cannot be constrained to not change it, so the
499 * return type uses std::remove_const_t.
501 * \tparam T Type to load (type is always mandatory)
502 * \param m Pointer to aligned memory
503 * \return Loaded value
505 template<typename T>
506 static inline std::remove_const_t<T>
507 load(const typename internal::SimdTraits<T>::type* m) // disabled by SFINAE for non-SIMD types
509 return simdLoad(m, typename internal::SimdTraits<T>::tag());
512 template<typename T>
513 static inline T
514 /* the enable_if serves to prevent two different type of misuse:
515 * 1) load<SimdReal>(SimdReal*); should only be called on real* or int*
516 * 2) load(real*); template parameter is mandatory because otherwise ambiguity is
517 * created. The dependent type disables type deduction.
519 load(const std::enable_if_t<std::is_arithmetic<T>::value, T> *m)
521 return *m;
524 template<typename T, size_t N>
525 static inline T gmx_simdcall load(const AlignedArray<typename internal::SimdTraits<T>::type, N>& m)
527 return simdLoad(m.data(), typename internal::SimdTraits<T>::tag());
530 /*! \brief Load function that returns SIMD or scalar based on template argument
532 * \tparam T Type to load (type is always mandatory)
533 * \param m Pointer to unaligned memory
534 * \return Loaded SimdFloat/Double/Int or basic scalar type
536 template<typename T>
537 static inline T loadU(const typename internal::SimdTraits<T>::type* m)
539 return simdLoadU(m, typename internal::SimdTraits<T>::tag());
542 template<typename T>
543 static inline T loadU(const std::enable_if_t<std::is_arithmetic<T>::value, T>* m)
545 return *m;
548 template<typename T, size_t N>
549 static inline T gmx_simdcall loadU(const AlignedArray<typename internal::SimdTraits<T>::type, N>& m)
551 return simdLoadU(m.data(), typename internal::SimdTraits<T>::tag());
554 /*! \libinternal \brief Proxy object to enable setZero() for SIMD and real types.
556 * This object is returned by setZero(), and depending on what type you assign
557 * the result to the conversion method will call the right low-level function.
559 class SimdSetZeroProxy
561 public:
562 //!\brief Conversion method that returns 0.0 as float
563 operator float() const { return 0.0F; }
564 //!\brief Conversion method that returns 0.0 as double
565 operator double() const { return 0.0; }
566 //!\brief Conversion method that returns 0.0 as int32
567 operator std::int32_t() const { return 0; }
568 #if GMX_SIMD_HAVE_FLOAT
569 //!\brief Conversion method that will execute setZero() for SimdFloat
570 operator SimdFloat() const { return setZeroF(); }
571 //!\brief Conversion method that will execute setZero() for SimdFInt32
572 operator SimdFInt32() const { return setZeroFI(); }
573 #endif
574 #if GMX_SIMD4_HAVE_FLOAT
575 //!\brief Conversion method that will execute setZero() for Simd4Float
576 operator Simd4Float() const { return simd4SetZeroF(); }
577 #endif
578 #if GMX_SIMD_HAVE_DOUBLE
579 //!\brief Conversion method that will execute setZero() for SimdDouble
580 operator SimdDouble() const { return setZeroD(); }
581 //!\brief Conversion method that will execute setZero() for SimdDInt32
582 operator SimdDInt32() const { return setZeroDI(); }
583 #endif
584 #if GMX_SIMD4_HAVE_DOUBLE
585 //!\brief Conversion method that will execute setZero() for Simd4Double
586 operator Simd4Double() const { return simd4SetZeroD(); }
587 #endif
590 /*! \brief Helper function to set any SIMD or scalar variable to zero
592 * \return Proxy object that will call the actual function to set a SIMD/scalar
593 * variable to zero based on the conversion function called when you
594 * assign the result.
596 static inline SimdSetZeroProxy gmx_simdcall setZero()
598 return {};
601 namespace internal
603 // TODO: Don't foward function but properly rename them and use proper traits
604 template<typename T>
605 struct Simd4Traits
609 #if GMX_SIMD4_HAVE_FLOAT
610 template<>
611 struct Simd4Traits<Simd4Float>
613 using type = float;
615 #endif
617 #if GMX_SIMD4_HAVE_DOUBLE
618 template<>
619 struct Simd4Traits<Simd4Double>
621 using type = double;
623 #endif
624 } // namespace internal
626 #if GMX_SIMD4_HAVE_REAL
627 template<typename T>
628 T load(const typename internal::Simd4Traits<T>::type* m)
630 return load4(m);
632 template<typename T>
633 T loadU(const typename internal::Simd4Traits<T>::type* m)
635 return load4U(m);
637 #endif
639 /* Implement most of 4xn functions by forwarding them to other functions when possible.
640 * The functions forwarded here don't need to be implemented by each implementation.
641 * For width=4 all functions are forwarded and for width=8 all but loadU4NOffset are forwarded.
643 #if GMX_SIMD_HAVE_FLOAT
644 # if GMX_SIMD_FLOAT_WIDTH < 4
645 # define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_FLOAT)
646 # elif GMX_SIMD_FLOAT_WIDTH == 4
647 # define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT GMX_SIMD_HAVE_LOADU
648 // For GMX_SIMD_FLOAT_WIDTH>4 it is the reponsibility of the implementation to set
649 // GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
650 # endif
652 # if GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
653 # if GMX_SIMD_FLOAT_WIDTH < 4
654 using Simd4NFloat = Simd4Float;
655 # define GMX_SIMD4N_FLOAT_WIDTH 4
656 # else
657 using Simd4NFloat = SimdFloat;
658 # define GMX_SIMD4N_FLOAT_WIDTH GMX_SIMD_FLOAT_WIDTH
659 # endif
661 # if GMX_SIMD_FLOAT_WIDTH <= 4
662 static inline Simd4NFloat gmx_simdcall loadUNDuplicate4(const float* f)
664 return Simd4NFloat(*f);
666 static inline Simd4NFloat gmx_simdcall load4DuplicateN(const float* f)
668 return load<Simd4NFloat>(f);
670 static inline Simd4NFloat gmx_simdcall loadU4NOffset(const float* f, int)
672 return loadU<Simd4NFloat>(f);
674 # elif GMX_SIMD_FLOAT_WIDTH == 8
675 static inline Simd4NFloat gmx_simdcall loadUNDuplicate4(const float* f)
677 return loadU1DualHsimd(f);
679 static inline Simd4NFloat gmx_simdcall load4DuplicateN(const float* f)
681 return loadDuplicateHsimd(f);
683 # endif
684 # endif // GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
685 #else // GMX_SIMD_HAVE_FLOAT
686 # define GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT 0
687 #endif
689 #if GMX_SIMD_HAVE_DOUBLE
690 # if GMX_SIMD_DOUBLE_WIDTH < 4
691 # define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE (GMX_SIMD_HAVE_LOADU && GMX_SIMD4_HAVE_DOUBLE)
692 # elif GMX_SIMD_DOUBLE_WIDTH == 4
693 # define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE GMX_SIMD_HAVE_LOADU
694 // For GMX_SIMD_DOUBLE_WIDTH>4 it is the reponsibility of the implementation to set
695 // GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
696 # endif
698 # if GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
699 # if GMX_SIMD_DOUBLE_WIDTH < 4
700 using Simd4NDouble = Simd4Double;
701 # define GMX_SIMD4N_DOUBLE_WIDTH 4
702 # else
703 using Simd4NDouble = SimdDouble;
704 # define GMX_SIMD4N_DOUBLE_WIDTH GMX_SIMD_DOUBLE_WIDTH
705 # endif
707 # if GMX_SIMD_DOUBLE_WIDTH <= 4
708 static inline Simd4NDouble gmx_simdcall loadUNDuplicate4(const double* f)
710 return Simd4NDouble(*f);
712 static inline Simd4NDouble gmx_simdcall load4DuplicateN(const double* f)
714 return load<Simd4NDouble>(f);
716 static inline Simd4NDouble gmx_simdcall loadU4NOffset(const double* f, int /*unused*/)
718 return loadU<Simd4NDouble>(f);
720 # elif GMX_SIMD_DOUBLE_WIDTH == 8
721 static inline Simd4NDouble gmx_simdcall loadUNDuplicate4(const double* f)
723 return loadU1DualHsimd(f);
725 static inline Simd4NDouble gmx_simdcall load4DuplicateN(const double* f)
727 return loadDuplicateHsimd(f);
729 # endif
730 # endif // GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
731 #else // GMX_SIMD_HAVE_DOUBLE
732 # define GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE 0
733 #endif
735 #if GMX_DOUBLE
736 # define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_DOUBLE
737 #else
738 # define GMX_SIMD_HAVE_4NSIMD_UTIL_REAL GMX_SIMD_HAVE_4NSIMD_UTIL_FLOAT
739 #endif
741 #if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
742 # if GMX_DOUBLE
743 using Simd4NReal = Simd4NDouble;
744 # define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_DOUBLE_WIDTH
745 # else
746 using Simd4NReal = Simd4NFloat;
747 # define GMX_SIMD4N_REAL_WIDTH GMX_SIMD4N_FLOAT_WIDTH
748 # endif
749 #endif
751 //! \} end of name-group proxy objects
753 } // namespace gmx
755 // \} end of module_simd
757 //! \endcond end of condition libapi
760 #if GMX_SIMD_HAVE_FLOAT
762 /*! \brief Returns whether a pointer to float is aligned to a SIMD boundary
764 * \param[in] ptr A pointer to a float
766 static inline bool isSimdAligned(const float* ptr)
768 return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_FLOAT_WIDTH * sizeof(float)) == 0;
771 #endif // GMX_SIMD_HAVE_FLOAT
773 #if GMX_SIMD_HAVE_DOUBLE
775 /*! \brief Returns whether a pointer to double is aligned to a SIMD boundary
777 * \param[in] ptr A pointer to a double
779 static inline bool isSimdAligned(const double* ptr)
781 return reinterpret_cast<std::size_t>(ptr) % (GMX_SIMD_DOUBLE_WIDTH * sizeof(double)) == 0;
784 #endif // GMX_SIMD_HAVE_DOUBLE
787 #if GMX_SIMD_HAVE_REAL
788 # if GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH
789 # error "GMX_SIMD_REAL_WIDTH > GMX_REAL_MAX_SIMD_WIDTH: increase GMX_REAL_MAX_SIMD_WIDTH in real.h"
790 # endif
791 #endif
794 #if 0
795 /* This is a hack to cover the corner case of using an
796 explicit GMX_SIMD_HAVE_FLOAT or GMX_SIMD_HAVE_DOUBLE, rather than
797 GMX_SIMD_HAVE_REAL.
799 Such code is expected to include simd.h to get those symbols
800 defined, but the actual definitions are in the implemention headers
801 included by simd.h. check-source.py is not a full preprocessor, so
802 it does not see the definitions in the implementation headers as
803 belonging to simd.h, thus it cannot check that simd.h is being used
804 correctly in the above hypothetical corner case. However, the
805 checker also does not parse #if 0, so we can fool the checker into
806 thinking that definition occurs here, and that will work well
807 enough.
809 If there's ever other kinds of SIMD code that might have the same
810 problem, we might want to add other variables here.
812 # define GMX_SIMD_HAVE_FLOAT 1
813 # define GMX_SIMD_HAVE_DOUBLE 1
815 #endif // end of hack
817 // The ArrayRef<SimdReal> specialization is always included, because compiler
818 // errors are confusing when template specialization aren't available.
819 #include "gromacs/simd/simd_memory.h"
821 #endif // GMX_SIMD_SIMD_H