Improve accuracy of SIMD exp for small args
[gromacs.git] / src / gromacs / simd / impl_reference / impl_reference_simd_double.h
blob8b953402b3e7ca1dea00b86abc5d1b006eada05a
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
39 /*! \libinternal \file
41 * \brief Reference implementation, SIMD double precision.
43 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
45 * \ingroup module_simd
48 #include "config.h"
50 #include <cassert>
51 #include <cmath>
52 #include <cstddef>
53 #include <cstdint>
55 #include <algorithm>
56 #include <array>
58 #include "gromacs/math/utilities.h"
59 #include "gromacs/utility/fatalerror.h"
61 #include "impl_reference_definitions.h"
62 #include "impl_reference_simd_float.h"
64 namespace gmx
67 /*! \cond libapi */
68 /*! \addtogroup module_simd */
69 /*! \{ */
71 /* \name SIMD implementation data types
72 * \{
75 /*! \libinternal \brief Double SIMD variable. Available if GMX_SIMD_HAVE_DOUBLE is 1.
77 * \note This variable cannot be placed inside other structures or classes, since
78 * some compilers (including at least clang-3.7) appear to lose the
79 * alignment. This is likely particularly severe when allocating such
80 * memory on the heap, but it occurs for stack structures too.
82 class SimdDouble
84 public:
85 SimdDouble() {}
87 //! \brief Construct from scalar
88 SimdDouble(double d) { simdInternal_.fill(d); }
90 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
92 * This has to be public to enable usage in combination with static inline
93 * functions, but it should never, EVER, be accessed by any code outside
94 * the corresponding implementation directory since the type will depend
95 * on the architecture.
97 std::array<double, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
100 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from double.
102 * Available if GMX_SIMD_HAVE_DOUBLE is 1.
104 * \note The integer SIMD type will always be available, but on architectures
105 * that do not have any real integer SIMD support it might be defined as the
106 * floating-point type. This will work fine, since there are separate defines
107 * for whether the implementation can actually do any operations on integer
108 * SIMD types.
110 * \note This variable cannot be placed inside other structures or classes, since
111 * some compilers (including at least clang-3.7) appear to lose the
112 * alignment. This is likely particularly severe when allocating such
113 * memory on the heap, but it occurs for stack structures too.
115 class SimdDInt32
117 public:
118 SimdDInt32() {}
120 //! \brief Construct from scalar
121 SimdDInt32(std::int32_t i) { simdInternal_.fill(i); }
123 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
125 * This has to be public to enable usage in combination with static inline
126 * functions, but it should never, EVER, be accessed by any code outside
127 * the corresponding implementation directory since the type will depend
128 * on the architecture.
130 std::array<std::int32_t, GMX_SIMD_DINT32_WIDTH> simdInternal_;
133 /*! \libinternal \brief Boolean type for double SIMD data.
135 * Available if GMX_SIMD_HAVE_DOUBLE is 1.
137 * \note This variable cannot be placed inside other structures or classes, since
138 * some compilers (including at least clang-3.7) appear to lose the
139 * alignment. This is likely particularly severe when allocating such
140 * memory on the heap, but it occurs for stack structures too.
142 class SimdDBool
144 public:
145 SimdDBool() {}
147 //! \brief Construct from scalar bool
148 SimdDBool(bool b) { simdInternal_.fill(b); }
150 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
152 * This has to be public to enable usage in combination with static inline
153 * functions, but it should never, EVER, be accessed by any code outside
154 * the corresponding implementation directory since the type will depend
155 * on the architecture.
157 std::array<bool, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
160 /*! \libinternal \brief Boolean type for integer datatypes corresponding to double SIMD.
162 * Available if GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
164 * \note This variable cannot be placed inside other structures or classes, since
165 * some compilers (including at least clang-3.7) appear to lose the
166 * alignment. This is likely particularly severe when allocating such
167 * memory on the heap, but it occurs for stack structures too.
169 class SimdDIBool
171 public:
172 SimdDIBool() {}
174 //! \brief Construct from scalar
175 SimdDIBool(bool b) { simdInternal_.fill(b); }
177 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
179 * This has to be public to enable usage in combination with static inline
180 * functions, but it should never, EVER, be accessed by any code outside
181 * the corresponding implementation directory since the type will depend
182 * on the architecture.
184 std::array<bool, GMX_SIMD_DINT32_WIDTH> simdInternal_;
187 /*! \}
189 * \name SIMD implementation load/store operations for double precision floating point
190 * \{
193 /*! \brief Load \ref GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.
195 * \param m Pointer to memory aligned to the SIMD width.
196 * \return SIMD variable with data loaded.
198 static inline SimdDouble gmx_simdcall
199 simdLoad(const double *m)
201 SimdDouble a;
203 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
205 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
206 return a;
209 /*! \brief Store the contents of SIMD double variable to aligned memory m.
211 * \param[out] m Pointer to memory, aligned to SIMD width.
212 * \param a SIMD variable to store
214 static inline void gmx_simdcall
215 store(double *m, SimdDouble a)
217 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
219 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
222 /*! \brief Load SIMD double from unaligned memory.
224 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
226 * \param m Pointer to memory, no alignment requirement.
227 * \return SIMD variable with data loaded.
229 static inline SimdDouble gmx_simdcall
230 simdLoadU(const double *m)
232 SimdDouble a;
233 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
234 return a;
237 /*! \brief Store SIMD double to unaligned memory.
239 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
241 * \param[out] m Pointer to memory, no alignment requirement.
242 * \param a SIMD variable to store.
244 static inline void gmx_simdcall
245 storeU(double *m, SimdDouble a)
247 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
250 /*! \brief Set all SIMD double variable elements to 0.0.
252 * You should typically just call \ref gmx::setZero(), which uses proxy objects
253 * internally to handle all types rather than adding the suffix used here.
255 * \return SIMD 0.0
257 static inline SimdDouble gmx_simdcall
258 setZeroD()
260 return SimdDouble(0.0);
263 /*! \}
265 * \name SIMD implementation load/store operations for integers (corresponding to double)
266 * \{
269 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
271 * You should typically just call \ref gmx::load(), which uses proxy objects
272 * internally to handle all types rather than adding the suffix used here.
274 * \param m Pointer to memory, aligned to (double) integer SIMD width.
275 * \return SIMD integer variable.
277 static inline SimdDInt32 gmx_simdcall
278 simdLoadDI(const std::int32_t * m)
280 SimdDInt32 a;
282 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
284 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
285 return a;
288 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
290 * \param m Memory aligned to (double) integer SIMD width.
291 * \param a SIMD (double) integer variable to store.
293 static inline void gmx_simdcall
294 store(std::int32_t * m, SimdDInt32 a)
296 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
298 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
301 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdDouble.
303 * You should typically just call \ref gmx::loadU(), which uses proxy objects
304 * internally to handle all types rather than adding the suffix used here.
306 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
308 * \param m Pointer to memory, no alignment requirements.
309 * \return SIMD integer variable.
311 static inline SimdDInt32 gmx_simdcall
312 simdLoadUDI(const std::int32_t *m)
314 SimdDInt32 a;
315 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
316 return a;
319 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
321 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
323 * \param m Memory pointer, no alignment requirements.
324 * \param a SIMD (double) integer variable to store.
326 static inline void gmx_simdcall
327 storeU(std::int32_t * m, SimdDInt32 a)
329 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
332 /*! \brief Set all SIMD (double) integer variable elements to 0.
334 * You should typically just call \ref gmx::setZero(), which uses proxy objects
335 * internally to handle all types rather than adding the suffix used here.
337 * \return SIMD 0
339 static inline SimdDInt32 gmx_simdcall
340 setZeroDI()
342 return SimdDInt32(0);
345 /*! \brief Extract element with index i from \ref gmx::SimdDInt32.
347 * Available if \ref GMX_SIMD_HAVE_DINT32_EXTRACT is 1.
349 * \tparam index Compile-time constant, position to extract (first position is 0)
350 * \param a SIMD variable from which to extract value.
351 * \return Single integer from position index in SIMD variable.
353 template<int index>
354 static inline std::int32_t gmx_simdcall
355 extract(SimdDInt32 a)
357 return a.simdInternal_[index];
360 /*! \}
362 * \name SIMD implementation double precision floating-point bitwise logical operations
363 * \{
366 /*! \brief Bitwise and for two SIMD double variables.
368 * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
370 * \param a data1
371 * \param b data2
372 * \return data1 & data2
374 static inline SimdDouble gmx_simdcall
375 operator&(SimdDouble a, SimdDouble b)
377 SimdDouble res;
379 union
381 double r;
382 std::int64_t i;
384 conv1, conv2;
386 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
388 conv1.r = a.simdInternal_[i];
389 conv2.r = b.simdInternal_[i];
390 conv1.i = conv1.i & conv2.i;
391 res.simdInternal_[i] = conv1.r;
393 return res;
396 /*! \brief Bitwise andnot for SIMD double.
398 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
400 * \param a data1
401 * \param b data2
402 * \return (~data1) & data2
404 static inline SimdDouble gmx_simdcall
405 andNot(SimdDouble a, SimdDouble b)
407 SimdDouble res;
409 union
411 double r;
412 std::int64_t i;
414 conv1, conv2;
416 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
418 conv1.r = a.simdInternal_[i];
419 conv2.r = b.simdInternal_[i];
420 conv1.i = ~conv1.i & conv2.i;
421 res.simdInternal_[i] = conv1.r;
423 return res;
426 /*! \brief Bitwise or for SIMD double.
428 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
430 * \param a data1
431 * \param b data2
432 * \return data1 | data2
434 static inline SimdDouble gmx_simdcall
435 operator|(SimdDouble a, SimdDouble b)
437 SimdDouble res;
439 union
441 double r;
442 std::int64_t i;
444 conv1, conv2;
446 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
448 conv1.r = a.simdInternal_[i];
449 conv2.r = b.simdInternal_[i];
450 conv1.i = conv1.i | conv2.i;
451 res.simdInternal_[i] = conv1.r;
453 return res;
456 /*! \brief Bitwise xor for SIMD double.
458 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
460 * \param a data1
461 * \param b data2
462 * \return data1 ^ data2
464 static inline SimdDouble gmx_simdcall
465 operator^(SimdDouble a, SimdDouble b)
467 SimdDouble res;
469 union
471 double r;
472 std::int64_t i;
474 conv1, conv2;
476 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
478 conv1.r = a.simdInternal_[i];
479 conv2.r = b.simdInternal_[i];
480 conv1.i = conv1.i ^ conv2.i;
481 res.simdInternal_[i] = conv1.r;
483 return res;
486 /*! \}
488 * \name SIMD implementation double precision floating-point arithmetics
489 * \{
492 /*! \brief Add two double SIMD variables.
494 * \param a term1
495 * \param b term2
496 * \return a+b
498 static inline SimdDouble gmx_simdcall
499 operator+(SimdDouble a, SimdDouble b)
501 SimdDouble res;
503 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
505 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
507 return res;
510 /*! \brief Subtract two double SIMD variables.
512 * \param a term1
513 * \param b term2
514 * \return a-b
516 static inline SimdDouble gmx_simdcall
517 operator-(SimdDouble a, SimdDouble b)
519 SimdDouble res;
521 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
523 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
525 return res;
528 /*! \brief SIMD double precision negate.
530 * \param a SIMD double precision value
531 * \return -a
533 static inline SimdDouble gmx_simdcall
534 operator-(SimdDouble a)
536 SimdDouble res;
538 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
540 res.simdInternal_[i] = -a.simdInternal_[i];
542 return res;
545 /*! \brief Multiply two double SIMD variables.
547 * \param a factor1
548 * \param b factor2
549 * \return a*b.
551 static inline SimdDouble gmx_simdcall
552 operator*(SimdDouble a, SimdDouble b)
554 SimdDouble res;
556 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
558 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
560 return res;
563 /*! \brief SIMD double Fused-multiply-add. Result is a*b+c.
565 * \param a factor1
566 * \param b factor2
567 * \param c term
568 * \return a*b+c
570 static inline SimdDouble gmx_simdcall
571 fma(SimdDouble a, SimdDouble b, SimdDouble c)
573 return a*b+c;
576 /*! \brief SIMD double Fused-multiply-subtract. Result is a*b-c.
578 * \param a factor1
579 * \param b factor2
580 * \param c term
581 * \return a*b-c
583 static inline SimdDouble gmx_simdcall
584 fms(SimdDouble a, SimdDouble b, SimdDouble c)
586 return a*b-c;
589 /*! \brief SIMD double Fused-negated-multiply-add. Result is -a*b+c.
591 * \param a factor1
592 * \param b factor2
593 * \param c term
594 * \return -a*b+c
596 static inline SimdDouble gmx_simdcall
597 fnma(SimdDouble a, SimdDouble b, SimdDouble c)
599 return c-a*b;
602 /*! \brief SIMD double Fused-negated-multiply-subtract. Result is -a*b-c.
604 * \param a factor1
605 * \param b factor2
606 * \param c term
607 * \return -a*b-c
609 static inline SimdDouble gmx_simdcall
610 fnms(SimdDouble a, SimdDouble b, SimdDouble c)
612 return -a*b-c;
615 /*! \brief double SIMD 1.0/sqrt(x) lookup.
617 * This is a low-level instruction that should only be called from routines
618 * implementing the inverse square root in simd_math.h.
620 * \param x Argument, x>0
621 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
623 static inline SimdDouble gmx_simdcall
624 rsqrt(SimdDouble x)
626 SimdDouble res;
628 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
630 // sic - we only use single precision for the lookup
631 res.simdInternal_[i] = 1.0f / std::sqrt(static_cast<float>(x.simdInternal_[i]));
633 return res;
636 /*! \brief SIMD double 1.0/x lookup.
638 * This is a low-level instruction that should only be called from routines
639 * implementing the reciprocal in simd_math.h.
641 * \param x Argument, x!=0
642 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
644 static inline SimdDouble gmx_simdcall
645 rcp(SimdDouble x)
647 SimdDouble res;
649 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
651 // sic - we only use single precision for the lookup
652 res.simdInternal_[i] = 1.0f / static_cast<float>(x.simdInternal_[i]);
654 return res;
657 /*! \brief Add two double SIMD variables, masked version.
659 * \param a term1
660 * \param b term2
661 * \param m mask
662 * \return a+b where mask is true, 0.0 otherwise.
664 static inline SimdDouble gmx_simdcall
665 maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
667 SimdDouble res;
669 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
671 res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0);
673 return res;
676 /*! \brief Multiply two double SIMD variables, masked version.
678 * \param a factor1
679 * \param b factor2
680 * \param m mask
681 * \return a*b where mask is true, 0.0 otherwise.
683 static inline SimdDouble gmx_simdcall
684 maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
686 SimdDouble res;
688 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
690 res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0;
692 return res;
695 /*! \brief SIMD double fused multiply-add, masked version.
697 * \param a factor1
698 * \param b factor2
699 * \param c term
700 * \param m mask
701 * \return a*b+c where mask is true, 0.0 otherwise.
703 static inline SimdDouble gmx_simdcall
704 maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
706 SimdDouble res;
708 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
710 res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0;
712 return res;
715 /*! \brief SIMD double 1.0/sqrt(x) lookup, masked version.
717 * This is a low-level instruction that should only be called from routines
718 * implementing the inverse square root in simd_math.h.
720 * \param x Argument, x>0 for entries where mask is true.
721 * \param m Mask
722 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
723 * The result for masked-out entries will be 0.0.
725 static inline SimdDouble gmx_simdcall
726 maskzRsqrt(SimdDouble x, SimdDBool m)
728 SimdDouble res;
730 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
732 // sic - we only use single precision for the lookup
733 res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0f / std::sqrt(static_cast<float>(x.simdInternal_[i])) : 0.0;
735 return res;
738 /*! \brief SIMD double 1.0/x lookup, masked version.
740 * This is a low-level instruction that should only be called from routines
741 * implementing the reciprocal in simd_math.h.
743 * \param x Argument, x>0 for entries where mask is true.
744 * \param m Mask
745 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
746 * The result for masked-out entries will be 0.0.
748 static inline SimdDouble gmx_simdcall
749 maskzRcp(SimdDouble x, SimdDBool m)
751 SimdDouble res;
753 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
755 res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0f / static_cast<float>(x.simdInternal_[i]) : 0.0;
757 return res;
760 /*! \brief SIMD double floating-point fabs().
762 * \param a any floating point values
763 * \return fabs(a) for each element.
765 static inline SimdDouble gmx_simdcall
766 abs(SimdDouble a)
768 SimdDouble res;
770 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
772 res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
774 return res;
777 /*! \brief Set each SIMD double element to the largest from two variables.
779 * \param a Any floating-point value
780 * \param b Any floating-point value
781 * \return max(a,b) for each element.
783 static inline SimdDouble gmx_simdcall
784 max(SimdDouble a, SimdDouble b)
786 SimdDouble res;
788 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
790 res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
792 return res;
795 /*! \brief Set each SIMD double element to the smallest from two variables.
797 * \param a Any floating-point value
798 * \param b Any floating-point value
799 * \return min(a,b) for each element.
801 static inline SimdDouble gmx_simdcall
802 min(SimdDouble a, SimdDouble b)
804 SimdDouble res;
806 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
808 res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
810 return res;
813 /*! \brief SIMD double round to nearest integer value (in floating-point format).
815 * \param a Any floating-point value
816 * \return The nearest integer, represented in floating-point format.
818 static inline SimdDouble gmx_simdcall
819 round(SimdDouble a)
821 SimdDouble res;
823 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
825 res.simdInternal_[i] = std::round(a.simdInternal_[i]);
827 return res;
830 /*! \brief Truncate SIMD double, i.e. round towards zero - common hardware instruction.
832 * \param a Any floating-point value
833 * \return Integer rounded towards zero, represented in floating-point format.
835 * \note This is truncation towards zero, not floor(). The reason for this
836 * is that truncation is virtually always present as a dedicated hardware
837 * instruction, but floor() frequently isn't.
839 static inline SimdDouble gmx_simdcall
840 trunc(SimdDouble a)
842 SimdDouble res;
844 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
846 res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
848 return res;
851 /*! \brief Extract (integer) exponent and fraction from double precision SIMD.
853 * \param value Floating-point value to extract from
854 * \param[out] exponent Returned exponent of value, integer SIMD format.
855 * \return Fraction of value, floating-point SIMD format.
857 static inline SimdDouble gmx_simdcall
858 frexp(SimdDouble value, SimdDInt32 * exponent)
860 SimdDouble fraction;
862 for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
864 fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
866 return fraction;
869 /*! \brief Multiply a SIMD double value by the number 2 raised to an exp power.
871 * \tparam opt By default, this routine will return zero for input arguments
872 * that are so small they cannot be reproduced in the current
873 * precision. If the unsafe math optimization template parameter
874 * setting is used, these tests are skipped, and the result will
875 * be undefined (possible even NaN). This might happen below -127
876 * in single precision or -1023 in double, although some
877 * might use denormal support to extend the range.
879 * \param value Floating-point number to multiply with new exponent
880 * \param exponent Integer that will not overflow as 2^exponent.
881 * \return value*2^exponent
883 template <MathOptimization opt = MathOptimization::Safe>
884 static inline SimdDouble gmx_simdcall
885 ldexp(SimdDouble value, SimdDInt32 exponent)
887 SimdDouble res;
889 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
891 // std::ldexp already takes care of clamping arguments, so we do not
892 // need to do anything in the reference implementation
893 res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
895 return res;
898 /*! \brief Return sum of all elements in SIMD double variable.
900 * \param a SIMD variable to reduce/sum.
901 * \return The sum of all elements in the argument variable.
904 static inline double gmx_simdcall
905 reduce(SimdDouble a)
907 double sum = 0.0;
909 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
911 sum += a.simdInternal_[i];
913 return sum;
916 /*! \}
918 * \name SIMD implementation double precision floating-point comparison, boolean, selection.
919 * \{
922 /*! \brief SIMD a==b for double SIMD.
924 * \param a value1
925 * \param b value2
926 * \return Each element of the boolean will be set to true if a==b.
928 * Beware that exact floating-point comparisons are difficult.
930 static inline SimdDBool gmx_simdcall
931 operator==(SimdDouble a, SimdDouble b)
933 SimdDBool res;
935 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
937 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
939 return res;
942 /*! \brief SIMD a!=b for double SIMD.
944 * \param a value1
945 * \param b value2
946 * \return Each element of the boolean will be set to true if a!=b.
948 * Beware that exact floating-point comparisons are difficult.
950 static inline SimdDBool gmx_simdcall
951 operator!=(SimdDouble a, SimdDouble b)
953 SimdDBool res;
955 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
957 res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
959 return res;
962 /*! \brief SIMD a<b for double SIMD.
964 * \param a value1
965 * \param b value2
966 * \return Each element of the boolean will be set to true if a<b.
968 static inline SimdDBool gmx_simdcall
969 operator<(SimdDouble a, SimdDouble b)
971 SimdDBool res;
973 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
975 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
977 return res;
980 /*! \brief SIMD a<=b for double SIMD.
982 * \param a value1
983 * \param b value2
984 * \return Each element of the boolean will be set to true if a<=b.
986 static inline SimdDBool gmx_simdcall
987 operator<=(SimdDouble a, SimdDouble b)
989 SimdDBool res;
991 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
993 res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
995 return res;
998 /*! \brief Return true if any bits are set in the single precision SIMD.
1000 * This function is used to handle bitmasks, mainly for exclusions in the
1001 * inner kernels. Note that it will return true even for -0.0 (sign bit set),
1002 * so it is not identical to not-equal.
1004 * \param a value
1005 * \return Each element of the boolean will be true if any bit in a is nonzero.
1007 static inline SimdDBool gmx_simdcall
1008 testBits(SimdDouble a)
1010 SimdDBool res;
1012 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1014 union
1016 std::uint64_t i;
1017 double d;
1018 } conv;
1020 conv.d = a.simdInternal_[i];
1021 res.simdInternal_[i] = (conv.i != 0);
1023 return res;
1026 /*! \brief Logical \a and on double precision SIMD booleans.
1028 * \param a logical vars 1
1029 * \param b logical vars 2
1030 * \return For each element, the result boolean is true if a \& b are true.
1032 * \note This is not necessarily a bitwise operation - the storage format
1033 * of booleans is implementation-dependent.
1035 static inline SimdDBool gmx_simdcall
1036 operator&&(SimdDBool a, SimdDBool b)
1038 SimdDBool res;
1040 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1042 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1044 return res;
1047 /*! \brief Logical \a or on double precision SIMD booleans.
1049 * \param a logical vars 1
1050 * \param b logical vars 2
1051 * \return For each element, the result boolean is true if a or b is true.
1053 * Note that this is not necessarily a bitwise operation - the storage format
1054 * of booleans is implementation-dependent.
1056 \ */
1057 static inline SimdDBool gmx_simdcall
1058 operator||(SimdDBool a, SimdDBool b)
1060 SimdDBool res;
1062 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1064 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1066 return res;
1069 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1071 * \param a Logical variable.
1072 * \return true if any element in a is true, otherwise false.
1074 * The actual return value for truth will depend on the architecture,
1075 * so any non-zero value is considered truth.
1077 static inline bool gmx_simdcall
1078 anyTrue(SimdDBool a)
1080 bool res = false;
1082 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1084 res = res || a.simdInternal_[i];
1086 return res;
1089 /*! \brief Select from double precision SIMD variable where boolean is true.
1091 * \param a Floating-point variable to select from
1092 * \param mask Boolean selector
1093 * \return For each element, a is selected for true, 0 for false.
1095 static inline SimdDouble gmx_simdcall
1096 selectByMask(SimdDouble a, SimdDBool mask)
1098 SimdDouble res;
1100 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1102 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
1104 return res;
1107 /*! \brief Select from double precision SIMD variable where boolean is false.
1109 * \param a Floating-point variable to select from
1110 * \param mask Boolean selector
1111 * \return For each element, a is selected for false, 0 for true (sic).
1113 static inline SimdDouble gmx_simdcall
1114 selectByNotMask(SimdDouble a, SimdDBool mask)
1116 SimdDouble res;
1118 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1120 res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
1122 return res;
1125 /*! \brief Vector-blend SIMD double selection.
1127 * \param a First source
1128 * \param b Second source
1129 * \param sel Boolean selector
1130 * \return For each element, select b if sel is true, a otherwise.
1132 static inline SimdDouble gmx_simdcall
1133 blend(SimdDouble a, SimdDouble b, SimdDBool sel)
1135 SimdDouble res;
1137 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1139 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1141 return res;
1144 /*! \}
1146 * \name SIMD implementation integer (corresponding to double) bitwise logical operations
1147 * \{
1150 /*! \brief SIMD integer shift left logical, based on immediate value.
1152 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1154 * Logical shift. Each element is shifted (independently) up to 32 positions
1155 * left, while zeros are shifted in from the right.
1157 * \param a integer data to shift
1158 * \param n number of positions to shift left. n<=32.
1159 * \return shifted values
1161 static inline SimdDInt32 gmx_simdcall
1162 operator<<(SimdDInt32 a, int n)
1164 SimdDInt32 res;
1166 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1168 res.simdInternal_[i] = a.simdInternal_[i] << n;
1170 return res;
1173 /*! \brief SIMD integer shift right logical, based on immediate value.
1175 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1177 * Logical shift. Each element is shifted (independently) up to 32 positions
1178 * right, while zeros are shifted in from the left.
1180 * \param a integer data to shift
1181 * \param n number of positions to shift right. n<=32.
1182 * \return shifted values
1184 static inline SimdDInt32 gmx_simdcall
1185 operator>>(SimdDInt32 a, int n)
1187 SimdDInt32 res;
1189 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1191 res.simdInternal_[i] = a.simdInternal_[i] >> n;
1193 return res;
1196 /*! \brief Integer SIMD bitwise and.
1198 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1200 * \note You can \a not use this operation directly to select based on a boolean
1201 * SIMD variable, since booleans are separate from integer SIMD. If that
1202 * is what you need, have a look at \ref gmx::selectByMask instead.
1204 * \param a first integer SIMD
1205 * \param b second integer SIMD
1206 * \return a \& b (bitwise and)
1208 static inline SimdDInt32 gmx_simdcall
1209 operator&(SimdDInt32 a, SimdDInt32 b)
1211 SimdDInt32 res;
1213 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1215 res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1217 return res;
1220 /*! \brief Integer SIMD bitwise not/complement.
1222 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1224 * \note You can \a not use this operation directly to select based on a boolean
1225 * SIMD variable, since booleans are separate from integer SIMD. If that
1226 * is what you need, have a look at \ref gmx::selectByMask instead.
1228 * \param a integer SIMD
1229 * \param b integer SIMD
1230 * \return (~a) & b
1232 static inline SimdDInt32 gmx_simdcall
1233 andNot(SimdDInt32 a, SimdDInt32 b)
1235 SimdDInt32 res;
1237 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1239 res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1241 return res;
1244 /*! \brief Integer SIMD bitwise or.
1246 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1248 * \param a first integer SIMD
1249 * \param b second integer SIMD
1250 * \return a \| b (bitwise or)
1252 static inline SimdDInt32 gmx_simdcall
1253 operator|(SimdDInt32 a, SimdDInt32 b)
1255 SimdDInt32 res;
1257 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1259 res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1261 return res;
1264 /*! \brief Integer SIMD bitwise xor.
1266 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1268 * \param a first integer SIMD
1269 * \param b second integer SIMD
1270 * \return a ^ b (bitwise xor)
1272 static inline SimdDInt32 gmx_simdcall
1273 operator^(SimdDInt32 a, SimdDInt32 b)
1275 SimdDInt32 res;
1277 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1279 res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1281 return res;
1284 /*! \}
1286 * \name SIMD implementation integer (corresponding to double) arithmetics
1287 * \{
1290 /*! \brief Add SIMD integers.
1292 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1294 * \param a term1
1295 * \param b term2
1296 * \return a+b
1298 static inline SimdDInt32 gmx_simdcall
1299 operator+(SimdDInt32 a, SimdDInt32 b)
1301 SimdDInt32 res;
1303 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1305 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1307 return res;
1310 /*! \brief Subtract SIMD integers.
1312 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1314 * \param a term1
1315 * \param b term2
1316 * \return a-b
1318 static inline SimdDInt32 gmx_simdcall
1319 operator-(SimdDInt32 a, SimdDInt32 b)
1321 SimdDInt32 res;
1323 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1325 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1327 return res;
1330 /*! \brief Multiply SIMD integers.
1332 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1334 * \param a factor1
1335 * \param b factor2
1336 * \return a*b.
1338 * \note Only the low 32 bits are retained, so this can overflow.
1340 static inline SimdDInt32 gmx_simdcall
1341 operator*(SimdDInt32 a, SimdDInt32 b)
1343 SimdDInt32 res;
1345 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1347 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1349 return res;
1352 /*! \}
1354 * \name SIMD implementation integer (corresponding to double) comparisons, boolean selection
1355 * \{
1358 /*! \brief Equality comparison of two integers corresponding to double values.
1360 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1362 * \param a SIMD integer1
1363 * \param b SIMD integer2
1364 * \return SIMD integer boolean with true for elements where a==b
1366 static inline SimdDIBool gmx_simdcall
1367 operator==(SimdDInt32 a, SimdDInt32 b)
1369 SimdDIBool res;
1371 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1373 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1375 return res;
1378 /*! \brief Less-than comparison of two SIMD integers corresponding to double values.
1380 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1382 * \param a SIMD integer1
1383 * \param b SIMD integer2
1384 * \return SIMD integer boolean with true for elements where a<b
1386 static inline SimdDIBool gmx_simdcall
1387 operator<(SimdDInt32 a, SimdDInt32 b)
1389 SimdDIBool res;
1391 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1393 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1395 return res;
1398 /*! \brief Check if any bit is set in each element
1400 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1402 * \param a SIMD integer
1403 * \return SIMD integer boolean with true for elements where any bit is set
1405 static inline SimdDIBool gmx_simdcall
1406 testBits(SimdDInt32 a)
1408 SimdDIBool res;
1410 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1412 res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1414 return res;
1417 /*! \brief Logical AND on SimdDIBool.
1419 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1421 * \param a SIMD boolean 1
1422 * \param b SIMD boolean 2
1423 * \return True for elements where both a and b are true.
1425 static inline SimdDIBool gmx_simdcall
1426 operator&&(SimdDIBool a, SimdDIBool b)
1428 SimdDIBool res;
1430 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1432 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1434 return res;
1437 /*! \brief Logical OR on SimdDIBool.
1439 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1441 * \param a SIMD boolean 1
1442 * \param b SIMD boolean 2
1443 * \return True for elements where both a and b are true.
1445 static inline SimdDIBool gmx_simdcall
1446 operator||(SimdDIBool a, SimdDIBool b)
1448 SimdDIBool res;
1450 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1452 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1454 return res;
1457 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1459 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1461 * The actual return value for "any true" will depend on the architecture.
1462 * Any non-zero value should be considered truth.
1464 * \param a SIMD boolean
1465 * \return True if any of the elements in a is true, otherwise 0.
1467 static inline bool gmx_simdcall
1468 anyTrue(SimdDIBool a)
1470 bool res = false;
1472 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1474 res = res || a.simdInternal_[i];
1476 return res;
1479 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is true.
1481 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1483 * \param a SIMD integer to select from
1484 * \param mask Boolean selector
1485 * \return Elements from a where sel is true, 0 otherwise.
1487 static inline SimdDInt32 gmx_simdcall
1488 selectByMask(SimdDInt32 a, SimdDIBool mask)
1490 SimdDInt32 res;
1492 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1494 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0;
1496 return res;
1499 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is false.
1501 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1503 * \param a SIMD integer to select from
1504 * \param mask Boolean selector
1505 * \return Elements from a where sel is false, 0 otherwise (sic).
1507 static inline SimdDInt32 gmx_simdcall
1508 selectByNotMask(SimdDInt32 a, SimdDIBool mask)
1510 SimdDInt32 res;
1512 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1514 res.simdInternal_[i] = mask.simdInternal_[i] ? 0 : a.simdInternal_[i];
1516 return res;
1519 /*! \brief Vector-blend SIMD integer selection.
1521 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1523 * \param a First source
1524 * \param b Second source
1525 * \param sel Boolean selector
1526 * \return For each element, select b if sel is true, a otherwise.
1528 static inline SimdDInt32 gmx_simdcall
1529 blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
1531 SimdDInt32 res;
1533 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1535 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1537 return res;
1540 /*! \}
1542 * \name SIMD implementation conversion operations
1543 * \{
1546 /*! \brief Round double precision floating point to integer.
1548 * \param a SIMD floating-point
1549 * \return SIMD integer, rounded to nearest integer.
1551 static inline SimdDInt32 gmx_simdcall
1552 cvtR2I(SimdDouble a)
1554 SimdDInt32 b;
1556 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1558 b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1560 return b;
1563 /*! \brief Truncate double precision floating point to integer.
1565 * \param a SIMD floating-point
1566 * \return SIMD integer, truncated to nearest integer.
1568 static inline SimdDInt32 gmx_simdcall
1569 cvttR2I(SimdDouble a)
1571 SimdDInt32 b;
1573 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1575 b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1577 return b;
1580 /*! \brief Convert integer to double precision floating point.
1582 * \param a SIMD integer
1583 * \return SIMD floating-point
1585 static inline SimdDouble gmx_simdcall
1586 cvtI2R(SimdDInt32 a)
1588 SimdDouble b;
1590 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1592 b.simdInternal_[i] = a.simdInternal_[i];
1594 return b;
1597 /*! \brief Convert from double precision boolean to corresponding integer boolean
1599 * \param a SIMD floating-point boolean
1600 * \return SIMD integer boolean
1602 static inline SimdDIBool gmx_simdcall
1603 cvtB2IB(SimdDBool a)
1605 SimdDIBool b;
1607 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1609 b.simdInternal_[i] = a.simdInternal_[i];
1611 return b;
1614 /*! \brief Convert from integer boolean to corresponding double precision boolean
1616 * \param a SIMD integer boolean
1617 * \return SIMD floating-point boolean
1619 static inline SimdDBool gmx_simdcall
1620 cvtIB2B(SimdDIBool a)
1622 SimdDBool b;
1624 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1626 b.simdInternal_[i] = a.simdInternal_[i];
1628 return b;
1631 /*! \brief Convert SIMD float to double.
1633 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1634 * \ref GMX_SIMD_DOUBLE_WIDTH.
1636 * Float/double conversions are complex since the SIMD width could either
1637 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1638 * need to check for the width in the code, and have different code paths.
1640 * \param f Single-precision SIMD variable
1641 * \return Double-precision SIMD variable of the same width
1643 static inline SimdDouble gmx_simdcall
1644 cvtF2D(SimdFloat gmx_unused f)
1646 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1647 SimdDouble d;
1648 for (std::size_t i = 0; i < d.simdInternal_.size(); i++)
1650 d.simdInternal_[i] = f.simdInternal_[i];
1652 return d;
1653 #else
1654 gmx_fatal(FARGS, "cvtF2D() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1655 #endif
1658 /*! \brief Convert SIMD double to float.
1660 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1661 * \ref GMX_SIMD_DOUBLE_WIDTH.
1663 * Float/double conversions are complex since the SIMD width could either
1664 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1665 * need to check for the width in the code, and have different code paths.
1667 * \param d Double-precision SIMD variable
1668 * \return Single-precision SIMD variable of the same width
1670 static inline SimdFloat gmx_simdcall
1671 cvtD2F(SimdDouble gmx_unused d)
1673 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1674 SimdFloat f;
1675 for (std::size_t i = 0; i < f.simdInternal_.size(); i++)
1677 f.simdInternal_[i] = d.simdInternal_[i];
1679 return f;
1680 #else
1681 gmx_fatal(FARGS, "cvtD2F() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1682 #endif
1685 /*! \brief Convert SIMD float to double.
1687 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1688 * as \ref GMX_SIMD_DOUBLE_WIDTH.
1690 * Float/double conversions are complex since the SIMD width could either
1691 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1692 * need to check for the width in the code, and have different code paths.
1694 * \param f Single-precision SIMD variable
1695 * \param[out] d0 Double-precision SIMD variable, first half of values from f.
1696 * \param[out] d1 Double-precision SIMD variable, second half of values from f.
1698 static inline void gmx_simdcall
1699 cvtF2DD(SimdFloat gmx_unused f, SimdDouble gmx_unused * d0, SimdDouble gmx_unused * d1)
1701 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1702 for (std::size_t i = 0; i < d0->simdInternal_.size(); i++)
1704 d0->simdInternal_[i] = f.simdInternal_[i];
1705 d1->simdInternal_[i] = f.simdInternal_[f.simdInternal_.size()/2 + i];
1707 #else
1708 gmx_fatal(FARGS, "simdCvtF2DD() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1709 #endif
1712 /*! \brief Convert SIMD double to float.
1714 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1715 * as \ref GMX_SIMD_DOUBLE_WIDTH.
1717 * Float/double conversions are complex since the SIMD width could either
1718 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1719 * need to check for the width in the code, and have different code paths.
1721 * \param d0 Double-precision SIMD variable, first half of values to put in f.
1722 * \param d1 Double-precision SIMD variable, second half of values to put in f.
1723 * \return Single-precision SIMD variable with all values.
1725 static inline SimdFloat gmx_simdcall
1726 cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
1728 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1729 SimdFloat f;
1730 for (std::size_t i = 0; i < d0.simdInternal_.size(); i++)
1732 f.simdInternal_[i] = d0.simdInternal_[i];
1733 f.simdInternal_[f.simdInternal_.size()/2 + i] = d1.simdInternal_[i];
1735 return f;
1736 #else
1737 gmx_fatal(FARGS, "simdCvtDD2F() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1738 #endif
1741 /*! \} */
1743 /*! \} */
1744 /*! \endcond */
1746 } // namespace gmx
1748 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H