2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
39 /*! \libinternal \file
41 * \brief Reference implementation, SIMD double precision.
43 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
45 * \ingroup module_simd
58 #include "gromacs/math/utilities.h"
59 #include "gromacs/utility/fatalerror.h"
61 #include "impl_reference_definitions.h"
62 #include "impl_reference_simd_float.h"
68 /*! \addtogroup module_simd */
71 /*! \name SIMD implementation data types
75 /*! \libinternal \brief Double SIMD variable. Available if GMX_SIMD_HAVE_DOUBLE is 1.
77 * \note This variable cannot be placed inside other structures or classes, since
78 * some compilers (including at least clang-3.7) appear to lose the
79 * alignment. This is likely particularly severe when allocating such
80 * memory on the heap, but it occurs for stack structures too.
87 //! \brief Construct from scalar
88 SimdDouble(double d
) { simdInternal_
.fill(d
); }
90 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
92 * This has to be public to enable usage in combination with static inline
93 * functions, but it should never, EVER, be accessed by any code outside
94 * the corresponding implementation directory since the type will depend
95 * on the architecture.
97 std::array
<double, GMX_SIMD_DOUBLE_WIDTH
> simdInternal_
;
100 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from double.
102 * Available if GMX_SIMD_HAVE_DOUBLE is 1.
104 * \note The integer SIMD type will always be available, but on architectures
105 * that do not have any real integer SIMD support it might be defined as the
106 * floating-point type. This will work fine, since there are separate defines
107 * for whether the implementation can actually do any operations on integer
110 * \note This variable cannot be placed inside other structures or classes, since
111 * some compilers (including at least clang-3.7) appear to lose the
112 * alignment. This is likely particularly severe when allocating such
113 * memory on the heap, but it occurs for stack structures too.
120 //! \brief Construct from scalar
121 SimdDInt32(std::int32_t i
) { simdInternal_
.fill(i
); }
123 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
125 * This has to be public to enable usage in combination with static inline
126 * functions, but it should never, EVER, be accessed by any code outside
127 * the corresponding implementation directory since the type will depend
128 * on the architecture.
130 std::array
<std::int32_t, GMX_SIMD_DINT32_WIDTH
> simdInternal_
;
133 /*! \libinternal \brief Boolean type for double SIMD data.
135 * Available if GMX_SIMD_HAVE_DOUBLE is 1.
137 * \note This variable cannot be placed inside other structures or classes, since
138 * some compilers (including at least clang-3.7) appear to lose the
139 * alignment. This is likely particularly severe when allocating such
140 * memory on the heap, but it occurs for stack structures too.
147 //! \brief Construct from scalar bool
148 SimdDBool(bool b
) { simdInternal_
.fill(b
); }
150 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
152 * This has to be public to enable usage in combination with static inline
153 * functions, but it should never, EVER, be accessed by any code outside
154 * the corresponding implementation directory since the type will depend
155 * on the architecture.
157 std::array
<bool, GMX_SIMD_DOUBLE_WIDTH
> simdInternal_
;
160 /*! \libinternal \brief Boolean type for integer datatypes corresponding to double SIMD.
162 * Available if GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
164 * \note This variable cannot be placed inside other structures or classes, since
165 * some compilers (including at least clang-3.7) appear to lose the
166 * alignment. This is likely particularly severe when allocating such
167 * memory on the heap, but it occurs for stack structures too.
174 //! \brief Construct from scalar
175 SimdDIBool(bool b
) { simdInternal_
.fill(b
); }
177 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
179 * This has to be public to enable usage in combination with static inline
180 * functions, but it should never, EVER, be accessed by any code outside
181 * the corresponding implementation directory since the type will depend
182 * on the architecture.
184 std::array
<bool, GMX_SIMD_DINT32_WIDTH
> simdInternal_
;
189 * \name SIMD implementation load/store operations for double precision floating point
193 /*! \brief Load \ref GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.
195 * \param m Pointer to memory aligned to the SIMD width.
196 * \return SIMD variable with data loaded.
198 static inline SimdDouble gmx_simdcall
simdLoad(const double* m
, SimdDoubleTag
= {})
202 assert(std::size_t(m
) % (a
.simdInternal_
.size() * sizeof(double)) == 0);
204 std::copy(m
, m
+ a
.simdInternal_
.size(), a
.simdInternal_
.begin());
208 /*! \brief Store the contents of SIMD double variable to aligned memory m.
210 * \param[out] m Pointer to memory, aligned to SIMD width.
211 * \param a SIMD variable to store
213 static inline void gmx_simdcall
store(double* m
, SimdDouble a
)
215 assert(std::size_t(m
) % (a
.simdInternal_
.size() * sizeof(double)) == 0);
217 std::copy(a
.simdInternal_
.begin(), a
.simdInternal_
.end(), m
);
220 /*! \brief Load SIMD double from unaligned memory.
222 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
224 * \param m Pointer to memory, no alignment requirement.
225 * \return SIMD variable with data loaded.
227 static inline SimdDouble gmx_simdcall
simdLoadU(const double* m
, SimdDoubleTag
= {})
230 std::copy(m
, m
+ a
.simdInternal_
.size(), a
.simdInternal_
.begin());
234 /*! \brief Store SIMD double to unaligned memory.
236 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
238 * \param[out] m Pointer to memory, no alignment requirement.
239 * \param a SIMD variable to store.
241 static inline void gmx_simdcall
storeU(double* m
, SimdDouble a
)
243 std::copy(a
.simdInternal_
.begin(), a
.simdInternal_
.end(), m
);
246 /*! \brief Set all SIMD double variable elements to 0.0.
248 * You should typically just call \ref gmx::setZero(), which uses proxy objects
249 * internally to handle all types rather than adding the suffix used here.
253 static inline SimdDouble gmx_simdcall
setZeroD()
255 return SimdDouble(0.0);
260 * \name SIMD implementation load/store operations for integers (corresponding to double)
264 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
266 * You should typically just call \ref gmx::load(), which uses proxy objects
267 * internally to handle all types rather than adding the suffix used here.
269 * \param m Pointer to memory, aligned to (double) integer SIMD width.
270 * \return SIMD integer variable.
272 static inline SimdDInt32 gmx_simdcall
simdLoad(const std::int32_t* m
, SimdDInt32Tag
)
276 assert(std::size_t(m
) % (a
.simdInternal_
.size() * sizeof(std::int32_t)) == 0);
278 std::copy(m
, m
+ a
.simdInternal_
.size(), a
.simdInternal_
.begin());
282 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
284 * \param m Memory aligned to (double) integer SIMD width.
285 * \param a SIMD (double) integer variable to store.
287 static inline void gmx_simdcall
store(std::int32_t* m
, SimdDInt32 a
)
289 assert(std::size_t(m
) % (a
.simdInternal_
.size() * sizeof(std::int32_t)) == 0);
291 std::copy(a
.simdInternal_
.begin(), a
.simdInternal_
.end(), m
);
294 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdDouble.
296 * You should typically just call \ref gmx::loadU(), which uses proxy objects
297 * internally to handle all types rather than adding the suffix used here.
299 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
301 * \param m Pointer to memory, no alignment requirements.
302 * \return SIMD integer variable.
304 static inline SimdDInt32 gmx_simdcall
simdLoadU(const std::int32_t* m
, SimdDInt32Tag
)
307 std::copy(m
, m
+ a
.simdInternal_
.size(), a
.simdInternal_
.begin());
311 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
313 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
315 * \param m Memory pointer, no alignment requirements.
316 * \param a SIMD (double) integer variable to store.
318 static inline void gmx_simdcall
storeU(std::int32_t* m
, SimdDInt32 a
)
320 std::copy(a
.simdInternal_
.begin(), a
.simdInternal_
.end(), m
);
323 /*! \brief Set all SIMD (double) integer variable elements to 0.
325 * You should typically just call \ref gmx::setZero(), which uses proxy objects
326 * internally to handle all types rather than adding the suffix used here.
330 static inline SimdDInt32 gmx_simdcall
setZeroDI()
332 return SimdDInt32(0);
335 /*! \brief Extract element with index i from \ref gmx::SimdDInt32.
337 * Available if \ref GMX_SIMD_HAVE_DINT32_EXTRACT is 1.
339 * \tparam index Compile-time constant, position to extract (first position is 0)
340 * \param a SIMD variable from which to extract value.
341 * \return Single integer from position index in SIMD variable.
344 static inline std::int32_t gmx_simdcall
extract(SimdDInt32 a
)
346 return a
.simdInternal_
[index
];
351 * \name SIMD implementation double precision floating-point bitwise logical operations
355 /*! \brief Bitwise and for two SIMD double variables.
357 * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
361 * \return data1 & data2
363 static inline SimdDouble gmx_simdcall
operator&(SimdDouble a
, SimdDouble b
)
372 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
374 conv1
.r
= a
.simdInternal_
[i
];
375 conv2
.r
= b
.simdInternal_
[i
];
376 conv1
.i
= conv1
.i
& conv2
.i
;
377 res
.simdInternal_
[i
] = conv1
.r
;
382 /*! \brief Bitwise andnot for SIMD double.
384 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
388 * \return (~data1) & data2
390 static inline SimdDouble gmx_simdcall
andNot(SimdDouble a
, SimdDouble b
)
399 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
401 conv1
.r
= a
.simdInternal_
[i
];
402 conv2
.r
= b
.simdInternal_
[i
];
403 conv1
.i
= ~conv1
.i
& conv2
.i
;
404 res
.simdInternal_
[i
] = conv1
.r
;
409 /*! \brief Bitwise or for SIMD double.
411 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
415 * \return data1 | data2
417 static inline SimdDouble gmx_simdcall
operator|(SimdDouble a
, SimdDouble b
)
426 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
428 conv1
.r
= a
.simdInternal_
[i
];
429 conv2
.r
= b
.simdInternal_
[i
];
430 conv1
.i
= conv1
.i
| conv2
.i
;
431 res
.simdInternal_
[i
] = conv1
.r
;
436 /*! \brief Bitwise xor for SIMD double.
438 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
442 * \return data1 ^ data2
444 static inline SimdDouble gmx_simdcall
operator^(SimdDouble a
, SimdDouble b
)
453 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
455 conv1
.r
= a
.simdInternal_
[i
];
456 conv2
.r
= b
.simdInternal_
[i
];
457 conv1
.i
= conv1
.i
^ conv2
.i
;
458 res
.simdInternal_
[i
] = conv1
.r
;
465 * \name SIMD implementation double precision floating-point arithmetics
469 /*! \brief Add two double SIMD variables.
475 static inline SimdDouble gmx_simdcall
operator+(SimdDouble a
, SimdDouble b
)
479 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
481 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] + b
.simdInternal_
[i
];
486 /*! \brief Subtract two double SIMD variables.
492 static inline SimdDouble gmx_simdcall
operator-(SimdDouble a
, SimdDouble b
)
496 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
498 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] - b
.simdInternal_
[i
];
503 /*! \brief SIMD double precision negate.
505 * \param a SIMD double precision value
508 static inline SimdDouble gmx_simdcall
operator-(SimdDouble a
)
512 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
514 res
.simdInternal_
[i
] = -a
.simdInternal_
[i
];
519 /*! \brief Multiply two double SIMD variables.
525 static inline SimdDouble gmx_simdcall
operator*(SimdDouble a
, SimdDouble b
)
529 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
531 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] * b
.simdInternal_
[i
];
536 /*! \brief SIMD double Fused-multiply-add. Result is a*b+c.
543 static inline SimdDouble gmx_simdcall
fma(SimdDouble a
, SimdDouble b
, SimdDouble c
)
548 /*! \brief SIMD double Fused-multiply-subtract. Result is a*b-c.
555 static inline SimdDouble gmx_simdcall
fms(SimdDouble a
, SimdDouble b
, SimdDouble c
)
560 /*! \brief SIMD double Fused-negated-multiply-add. Result is -a*b+c.
567 static inline SimdDouble gmx_simdcall
fnma(SimdDouble a
, SimdDouble b
, SimdDouble c
)
572 /*! \brief SIMD double Fused-negated-multiply-subtract. Result is -a*b-c.
579 static inline SimdDouble gmx_simdcall
fnms(SimdDouble a
, SimdDouble b
, SimdDouble c
)
584 /*! \brief double SIMD 1.0/sqrt(x) lookup.
586 * This is a low-level instruction that should only be called from routines
587 * implementing the inverse square root in simd_math.h.
589 * \param x Argument, x>0
590 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
592 static inline SimdDouble gmx_simdcall
rsqrt(SimdDouble x
)
596 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
598 // sic - we only use single precision for the lookup
599 res
.simdInternal_
[i
] = 1.0F
/ std::sqrt(static_cast<float>(x
.simdInternal_
[i
]));
604 /*! \brief SIMD double 1.0/x lookup.
606 * This is a low-level instruction that should only be called from routines
607 * implementing the reciprocal in simd_math.h.
609 * \param x Argument, x!=0
610 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
612 static inline SimdDouble gmx_simdcall
rcp(SimdDouble x
)
616 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
618 // sic - we only use single precision for the lookup
619 res
.simdInternal_
[i
] = 1.0F
/ static_cast<float>(x
.simdInternal_
[i
]);
624 /*! \brief Add two double SIMD variables, masked version.
629 * \return a+b where mask is true, 0.0 otherwise.
631 static inline SimdDouble gmx_simdcall
maskAdd(SimdDouble a
, SimdDouble b
, SimdDBool m
)
635 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
637 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] + (m
.simdInternal_
[i
] ? b
.simdInternal_
[i
] : 0.0);
642 /*! \brief Multiply two double SIMD variables, masked version.
647 * \return a*b where mask is true, 0.0 otherwise.
649 static inline SimdDouble gmx_simdcall
maskzMul(SimdDouble a
, SimdDouble b
, SimdDBool m
)
653 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
655 res
.simdInternal_
[i
] = m
.simdInternal_
[i
] ? (a
.simdInternal_
[i
] * b
.simdInternal_
[i
]) : 0.0;
660 /*! \brief SIMD double fused multiply-add, masked version.
666 * \return a*b+c where mask is true, 0.0 otherwise.
668 static inline SimdDouble gmx_simdcall
maskzFma(SimdDouble a
, SimdDouble b
, SimdDouble c
, SimdDBool m
)
672 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
674 res
.simdInternal_
[i
] =
675 m
.simdInternal_
[i
] ? (a
.simdInternal_
[i
] * b
.simdInternal_
[i
] + c
.simdInternal_
[i
]) : 0.0;
680 /*! \brief SIMD double 1.0/sqrt(x) lookup, masked version.
682 * This is a low-level instruction that should only be called from routines
683 * implementing the inverse square root in simd_math.h.
685 * \param x Argument, x>0 for entries where mask is true.
687 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
688 * The result for masked-out entries will be 0.0.
690 static inline SimdDouble gmx_simdcall
maskzRsqrt(SimdDouble x
, SimdDBool m
)
694 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
696 // sic - we only use single precision for the lookup
697 res
.simdInternal_
[i
] = (m
.simdInternal_
[i
] != 0)
698 ? 1.0F
/ std::sqrt(static_cast<float>(x
.simdInternal_
[i
]))
704 /*! \brief SIMD double 1.0/x lookup, masked version.
706 * This is a low-level instruction that should only be called from routines
707 * implementing the reciprocal in simd_math.h.
709 * \param x Argument, x>0 for entries where mask is true.
711 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
712 * The result for masked-out entries will be 0.0.
714 static inline SimdDouble gmx_simdcall
maskzRcp(SimdDouble x
, SimdDBool m
)
718 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
720 res
.simdInternal_
[i
] =
721 (m
.simdInternal_
[i
] != 0) ? 1.0F
/ static_cast<float>(x
.simdInternal_
[i
]) : 0.0;
726 /*! \brief SIMD double floating-point fabs().
728 * \param a any floating point values
729 * \return fabs(a) for each element.
731 static inline SimdDouble gmx_simdcall
abs(SimdDouble a
)
735 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
737 res
.simdInternal_
[i
] = std::abs(a
.simdInternal_
[i
]);
742 /*! \brief Set each SIMD double element to the largest from two variables.
744 * \param a Any floating-point value
745 * \param b Any floating-point value
746 * \return max(a,b) for each element.
748 static inline SimdDouble gmx_simdcall
max(SimdDouble a
, SimdDouble b
)
752 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
754 res
.simdInternal_
[i
] = std::max(a
.simdInternal_
[i
], b
.simdInternal_
[i
]);
759 /*! \brief Set each SIMD double element to the smallest from two variables.
761 * \param a Any floating-point value
762 * \param b Any floating-point value
763 * \return min(a,b) for each element.
765 static inline SimdDouble gmx_simdcall
min(SimdDouble a
, SimdDouble b
)
769 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
771 res
.simdInternal_
[i
] = std::min(a
.simdInternal_
[i
], b
.simdInternal_
[i
]);
776 /*! \brief SIMD double round to nearest integer value (in floating-point format).
778 * \param a Any floating-point value
779 * \return The nearest integer, represented in floating-point format.
781 * \note Round mode is implementation defined. The only guarantee is that it
782 * is consistent between rounding functions (round, cvtR2I).
784 static inline SimdDouble gmx_simdcall
round(SimdDouble a
)
788 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
790 res
.simdInternal_
[i
] = std::round(a
.simdInternal_
[i
]);
795 /*! \brief Truncate SIMD double, i.e. round towards zero - common hardware instruction.
797 * \param a Any floating-point value
798 * \return Integer rounded towards zero, represented in floating-point format.
800 * \note This is truncation towards zero, not floor(). The reason for this
801 * is that truncation is virtually always present as a dedicated hardware
802 * instruction, but floor() frequently isn't.
804 static inline SimdDouble gmx_simdcall
trunc(SimdDouble a
)
808 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
810 res
.simdInternal_
[i
] = std::trunc(a
.simdInternal_
[i
]);
815 /*! \brief Extract (integer) exponent and fraction from double precision SIMD.
817 * \tparam opt By default this function behaves like the standard
818 * library such that frexp(+-0,exp) returns +-0 and
819 * stores 0 in the exponent when value is 0. If you
820 * know the argument is always nonzero, you can set
821 * the template parameter to MathOptimization::Unsafe
822 * to make it slightly faster.
824 * \param value Floating-point value to extract from
825 * \param[out] exponent Returned exponent of value, integer SIMD format.
826 * \return Fraction of value, floating-point SIMD format.
828 template<MathOptimization opt
= MathOptimization::Safe
>
829 static inline SimdDouble gmx_simdcall
frexp(SimdDouble value
, SimdDInt32
* exponent
)
833 for (std::size_t i
= 0; i
< fraction
.simdInternal_
.size(); i
++)
835 fraction
.simdInternal_
[i
] = std::frexp(value
.simdInternal_
[i
], &exponent
->simdInternal_
[i
]);
840 /*! \brief Multiply a SIMD double value by the number 2 raised to an exp power.
842 * \tparam opt By default, this routine will return zero for input arguments
843 * that are so small they cannot be reproduced in the current
844 * precision. If the unsafe math optimization template parameter
845 * setting is used, these tests are skipped, and the result will
846 * be undefined (possible even NaN). This might happen below -127
847 * in single precision or -1023 in double, although some
848 * might use denormal support to extend the range.
850 * \param value Floating-point number to multiply with new exponent
851 * \param exponent Integer that will not overflow as 2^exponent.
852 * \return value*2^exponent
854 template<MathOptimization opt
= MathOptimization::Safe
>
855 static inline SimdDouble gmx_simdcall
ldexp(SimdDouble value
, SimdDInt32 exponent
)
859 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
861 // std::ldexp already takes care of clamping arguments, so we do not
862 // need to do anything in the reference implementation
863 res
.simdInternal_
[i
] = std::ldexp(value
.simdInternal_
[i
], exponent
.simdInternal_
[i
]);
868 /*! \brief Return sum of all elements in SIMD double variable.
870 * \param a SIMD variable to reduce/sum.
871 * \return The sum of all elements in the argument variable.
874 static inline double gmx_simdcall
reduce(SimdDouble a
)
878 for (std::size_t i
= 0; i
< a
.simdInternal_
.size(); i
++)
880 sum
+= a
.simdInternal_
[i
];
887 * \name SIMD implementation double precision floating-point comparison, boolean, selection.
891 /*! \brief SIMD a==b for double SIMD.
895 * \return Each element of the boolean will be set to true if a==b.
897 * Beware that exact floating-point comparisons are difficult.
899 static inline SimdDBool gmx_simdcall
operator==(SimdDouble a
, SimdDouble b
)
903 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
905 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] == b
.simdInternal_
[i
]);
910 /*! \brief SIMD a!=b for double SIMD.
914 * \return Each element of the boolean will be set to true if a!=b.
916 * Beware that exact floating-point comparisons are difficult.
918 static inline SimdDBool gmx_simdcall
operator!=(SimdDouble a
, SimdDouble b
)
922 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
924 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] != b
.simdInternal_
[i
]);
929 /*! \brief SIMD a<b for double SIMD.
933 * \return Each element of the boolean will be set to true if a<b.
935 static inline SimdDBool gmx_simdcall
operator<(SimdDouble a
, SimdDouble b
)
939 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
941 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] < b
.simdInternal_
[i
]);
946 /*! \brief SIMD a<=b for double SIMD.
950 * \return Each element of the boolean will be set to true if a<=b.
952 static inline SimdDBool gmx_simdcall
operator<=(SimdDouble a
, SimdDouble b
)
956 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
958 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] <= b
.simdInternal_
[i
]);
963 /*! \brief Return true if any bits are set in the single precision SIMD.
965 * This function is used to handle bitmasks, mainly for exclusions in the
966 * inner kernels. Note that it will return true even for -0.0 (sign bit set),
967 * so it is not identical to not-equal.
970 * \return Each element of the boolean will be true if any bit in a is nonzero.
972 static inline SimdDBool gmx_simdcall
testBits(SimdDouble a
)
976 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
983 conv
.d
= a
.simdInternal_
[i
];
984 res
.simdInternal_
[i
] = (conv
.i
!= 0);
989 /*! \brief Logical \a and on double precision SIMD booleans.
991 * \param a logical vars 1
992 * \param b logical vars 2
993 * \return For each element, the result boolean is true if a \& b are true.
995 * \note This is not necessarily a bitwise operation - the storage format
996 * of booleans is implementation-dependent.
998 static inline SimdDBool gmx_simdcall
operator&&(SimdDBool a
, SimdDBool b
)
1002 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1004 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] && b
.simdInternal_
[i
]);
1009 /*! \brief Logical \a or on double precision SIMD booleans.
1011 * \param a logical vars 1
1012 * \param b logical vars 2
1013 * \return For each element, the result boolean is true if a or b is true.
1015 * Note that this is not necessarily a bitwise operation - the storage format
1016 * of booleans is implementation-dependent.
1019 static inline SimdDBool gmx_simdcall
operator||(SimdDBool a
, SimdDBool b
)
1023 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1025 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] || b
.simdInternal_
[i
]);
1030 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1032 * \param a Logical variable.
1033 * \return true if any element in a is true, otherwise false.
1035 * The actual return value for truth will depend on the architecture,
1036 * so any non-zero value is considered truth.
1038 static inline bool gmx_simdcall
anyTrue(SimdDBool a
)
1042 for (std::size_t i
= 0; i
< a
.simdInternal_
.size(); i
++)
1044 res
= res
|| a
.simdInternal_
[i
];
1049 /*! \brief Select from double precision SIMD variable where boolean is true.
1051 * \param a Floating-point variable to select from
1052 * \param mask Boolean selector
1053 * \return For each element, a is selected for true, 0 for false.
1055 static inline SimdDouble gmx_simdcall
selectByMask(SimdDouble a
, SimdDBool mask
)
1059 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1061 res
.simdInternal_
[i
] = mask
.simdInternal_
[i
] ? a
.simdInternal_
[i
] : 0.0;
1066 /*! \brief Select from double precision SIMD variable where boolean is false.
1068 * \param a Floating-point variable to select from
1069 * \param mask Boolean selector
1070 * \return For each element, a is selected for false, 0 for true (sic).
1072 static inline SimdDouble gmx_simdcall
selectByNotMask(SimdDouble a
, SimdDBool mask
)
1076 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1078 res
.simdInternal_
[i
] = mask
.simdInternal_
[i
] ? 0.0 : a
.simdInternal_
[i
];
1083 /*! \brief Vector-blend SIMD double selection.
1085 * \param a First source
1086 * \param b Second source
1087 * \param sel Boolean selector
1088 * \return For each element, select b if sel is true, a otherwise.
1090 static inline SimdDouble gmx_simdcall
blend(SimdDouble a
, SimdDouble b
, SimdDBool sel
)
1094 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1096 res
.simdInternal_
[i
] = sel
.simdInternal_
[i
] ? b
.simdInternal_
[i
] : a
.simdInternal_
[i
];
1103 * \name SIMD implementation integer (corresponding to double) bitwise logical operations
1107 /*! \brief Integer SIMD bitwise and.
1109 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1111 * \note You can \a not use this operation directly to select based on a boolean
1112 * SIMD variable, since booleans are separate from integer SIMD. If that
1113 * is what you need, have a look at \ref gmx::selectByMask instead.
1115 * \param a first integer SIMD
1116 * \param b second integer SIMD
1117 * \return a \& b (bitwise and)
1119 static inline SimdDInt32 gmx_simdcall
operator&(SimdDInt32 a
, SimdDInt32 b
)
1123 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1125 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] & b
.simdInternal_
[i
];
1130 /*! \brief Integer SIMD bitwise not/complement.
1132 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1134 * \note You can \a not use this operation directly to select based on a boolean
1135 * SIMD variable, since booleans are separate from integer SIMD. If that
1136 * is what you need, have a look at \ref gmx::selectByMask instead.
1138 * \param a integer SIMD
1139 * \param b integer SIMD
1142 static inline SimdDInt32 gmx_simdcall
andNot(SimdDInt32 a
, SimdDInt32 b
)
1146 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1148 res
.simdInternal_
[i
] = ~a
.simdInternal_
[i
] & b
.simdInternal_
[i
];
1153 /*! \brief Integer SIMD bitwise or.
1155 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1157 * \param a first integer SIMD
1158 * \param b second integer SIMD
1159 * \return a \| b (bitwise or)
1161 static inline SimdDInt32 gmx_simdcall
operator|(SimdDInt32 a
, SimdDInt32 b
)
1165 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1167 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] | b
.simdInternal_
[i
];
1172 /*! \brief Integer SIMD bitwise xor.
1174 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1176 * \param a first integer SIMD
1177 * \param b second integer SIMD
1178 * \return a ^ b (bitwise xor)
1180 static inline SimdDInt32 gmx_simdcall
operator^(SimdDInt32 a
, SimdDInt32 b
)
1184 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1186 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] ^ b
.simdInternal_
[i
];
1193 * \name SIMD implementation integer (corresponding to double) arithmetics
1197 /*! \brief Add SIMD integers.
1199 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1205 static inline SimdDInt32 gmx_simdcall
operator+(SimdDInt32 a
, SimdDInt32 b
)
1209 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1211 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] + b
.simdInternal_
[i
];
1216 /*! \brief Subtract SIMD integers.
1218 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1224 static inline SimdDInt32 gmx_simdcall
operator-(SimdDInt32 a
, SimdDInt32 b
)
1228 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1230 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] - b
.simdInternal_
[i
];
1235 /*! \brief Multiply SIMD integers.
1237 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1243 * \note Only the low 32 bits are retained, so this can overflow.
1245 static inline SimdDInt32 gmx_simdcall
operator*(SimdDInt32 a
, SimdDInt32 b
)
1249 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1251 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] * b
.simdInternal_
[i
];
1258 * \name SIMD implementation integer (corresponding to double) comparisons, boolean selection
1262 /*! \brief Equality comparison of two integers corresponding to double values.
1264 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1266 * \param a SIMD integer1
1267 * \param b SIMD integer2
1268 * \return SIMD integer boolean with true for elements where a==b
1270 static inline SimdDIBool gmx_simdcall
operator==(SimdDInt32 a
, SimdDInt32 b
)
1274 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1276 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] == b
.simdInternal_
[i
]);
1281 /*! \brief Less-than comparison of two SIMD integers corresponding to double values.
1283 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1285 * \param a SIMD integer1
1286 * \param b SIMD integer2
1287 * \return SIMD integer boolean with true for elements where a<b
1289 static inline SimdDIBool gmx_simdcall
operator<(SimdDInt32 a
, SimdDInt32 b
)
1293 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1295 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] < b
.simdInternal_
[i
]);
1300 /*! \brief Check if any bit is set in each element
1302 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1304 * \param a SIMD integer
1305 * \return SIMD integer boolean with true for elements where any bit is set
1307 static inline SimdDIBool gmx_simdcall
testBits(SimdDInt32 a
)
1311 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1313 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] != 0);
1318 /*! \brief Logical AND on SimdDIBool.
1320 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1322 * \param a SIMD boolean 1
1323 * \param b SIMD boolean 2
1324 * \return True for elements where both a and b are true.
1326 static inline SimdDIBool gmx_simdcall
operator&&(SimdDIBool a
, SimdDIBool b
)
1330 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1332 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] && b
.simdInternal_
[i
]);
1337 /*! \brief Logical OR on SimdDIBool.
1339 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1341 * \param a SIMD boolean 1
1342 * \param b SIMD boolean 2
1343 * \return True for elements where both a and b are true.
1345 static inline SimdDIBool gmx_simdcall
operator||(SimdDIBool a
, SimdDIBool b
)
1349 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1351 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] || b
.simdInternal_
[i
]);
1356 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1358 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1360 * The actual return value for "any true" will depend on the architecture.
1361 * Any non-zero value should be considered truth.
1363 * \param a SIMD boolean
1364 * \return True if any of the elements in a is true, otherwise 0.
1366 static inline bool gmx_simdcall
anyTrue(SimdDIBool a
)
1370 for (std::size_t i
= 0; i
< a
.simdInternal_
.size(); i
++)
1372 res
= res
|| a
.simdInternal_
[i
];
1377 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is true.
1379 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1381 * \param a SIMD integer to select from
1382 * \param mask Boolean selector
1383 * \return Elements from a where sel is true, 0 otherwise.
1385 static inline SimdDInt32 gmx_simdcall
selectByMask(SimdDInt32 a
, SimdDIBool mask
)
1389 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1391 res
.simdInternal_
[i
] = mask
.simdInternal_
[i
] ? a
.simdInternal_
[i
] : 0;
1396 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is false.
1398 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1400 * \param a SIMD integer to select from
1401 * \param mask Boolean selector
1402 * \return Elements from a where sel is false, 0 otherwise (sic).
1404 static inline SimdDInt32 gmx_simdcall
selectByNotMask(SimdDInt32 a
, SimdDIBool mask
)
1408 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1410 res
.simdInternal_
[i
] = mask
.simdInternal_
[i
] ? 0 : a
.simdInternal_
[i
];
1415 /*! \brief Vector-blend SIMD integer selection.
1417 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1419 * \param a First source
1420 * \param b Second source
1421 * \param sel Boolean selector
1422 * \return For each element, select b if sel is true, a otherwise.
1424 static inline SimdDInt32 gmx_simdcall
blend(SimdDInt32 a
, SimdDInt32 b
, SimdDIBool sel
)
1428 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1430 res
.simdInternal_
[i
] = sel
.simdInternal_
[i
] ? b
.simdInternal_
[i
] : a
.simdInternal_
[i
];
1437 * \name SIMD implementation conversion operations
1441 /*! \brief Round double precision floating point to integer.
1443 * \param a SIMD floating-point
1444 * \return SIMD integer, rounded to nearest integer.
1446 * \note Round mode is implementation defined. The only guarantee is that it
1447 * is consistent between rounding functions (round, cvtR2I).
1449 static inline SimdDInt32 gmx_simdcall
cvtR2I(SimdDouble a
)
1453 for (std::size_t i
= 0; i
< b
.simdInternal_
.size(); i
++)
1455 b
.simdInternal_
[i
] = std::round(a
.simdInternal_
[i
]);
1460 /*! \brief Truncate double precision floating point to integer.
1462 * \param a SIMD floating-point
1463 * \return SIMD integer, truncated to nearest integer.
1465 static inline SimdDInt32 gmx_simdcall
cvttR2I(SimdDouble a
)
1469 for (std::size_t i
= 0; i
< b
.simdInternal_
.size(); i
++)
1471 b
.simdInternal_
[i
] = std::trunc(a
.simdInternal_
[i
]);
1476 /*! \brief Convert integer to double precision floating point.
1478 * \param a SIMD integer
1479 * \return SIMD floating-point
1481 static inline SimdDouble gmx_simdcall
cvtI2R(SimdDInt32 a
)
1485 for (std::size_t i
= 0; i
< b
.simdInternal_
.size(); i
++)
1487 b
.simdInternal_
[i
] = a
.simdInternal_
[i
];
1492 /*! \brief Convert from double precision boolean to corresponding integer boolean
1494 * \param a SIMD floating-point boolean
1495 * \return SIMD integer boolean
1497 static inline SimdDIBool gmx_simdcall
cvtB2IB(SimdDBool a
)
1501 for (std::size_t i
= 0; i
< b
.simdInternal_
.size(); i
++)
1503 b
.simdInternal_
[i
] = a
.simdInternal_
[i
];
1508 /*! \brief Convert from integer boolean to corresponding double precision boolean
1510 * \param a SIMD integer boolean
1511 * \return SIMD floating-point boolean
1513 static inline SimdDBool gmx_simdcall
cvtIB2B(SimdDIBool a
)
1517 for (std::size_t i
= 0; i
< b
.simdInternal_
.size(); i
++)
1519 b
.simdInternal_
[i
] = a
.simdInternal_
[i
];
1524 /*! \brief Convert SIMD float to double.
1526 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1527 * \ref GMX_SIMD_DOUBLE_WIDTH.
1529 * Float/double conversions are complex since the SIMD width could either
1530 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1531 * need to check for the width in the code, and have different code paths.
1533 * \param f Single-precision SIMD variable
1534 * \return Double-precision SIMD variable of the same width
1536 static inline SimdDouble gmx_simdcall
cvtF2D(SimdFloat gmx_unused f
)
1538 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1540 for (std::size_t i
= 0; i
< d
.simdInternal_
.size(); i
++)
1542 d
.simdInternal_
[i
] = f
.simdInternal_
[i
];
1546 gmx_fatal(FARGS
, "cvtF2D() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1550 /*! \brief Convert SIMD double to float.
1552 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1553 * \ref GMX_SIMD_DOUBLE_WIDTH.
1555 * Float/double conversions are complex since the SIMD width could either
1556 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1557 * need to check for the width in the code, and have different code paths.
1559 * \param d Double-precision SIMD variable
1560 * \return Single-precision SIMD variable of the same width
1562 static inline SimdFloat gmx_simdcall
cvtD2F(SimdDouble gmx_unused d
)
1564 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1566 for (std::size_t i
= 0; i
< f
.simdInternal_
.size(); i
++)
1568 f
.simdInternal_
[i
] = d
.simdInternal_
[i
];
1572 gmx_fatal(FARGS
, "cvtD2F() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1576 /*! \brief Convert SIMD float to double.
1578 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1579 * as \ref GMX_SIMD_DOUBLE_WIDTH.
1581 * Float/double conversions are complex since the SIMD width could either
1582 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1583 * need to check for the width in the code, and have different code paths.
1585 * \param f Single-precision SIMD variable
1586 * \param[out] d0 Double-precision SIMD variable, first half of values from f.
1587 * \param[out] d1 Double-precision SIMD variable, second half of values from f.
1589 static inline void gmx_simdcall
cvtF2DD(SimdFloat gmx_unused f
,
1590 SimdDouble gmx_unused
* d0
,
1591 SimdDouble gmx_unused
* d1
)
1593 #if (GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH)
1594 for (std::size_t i
= 0; i
< d0
->simdInternal_
.size(); i
++)
1596 d0
->simdInternal_
[i
] = f
.simdInternal_
[i
];
1597 d1
->simdInternal_
[i
] = f
.simdInternal_
[f
.simdInternal_
.size() / 2 + i
];
1600 gmx_fatal(FARGS
, "simdCvtF2DD() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1604 /*! \brief Convert SIMD double to float.
1606 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1607 * as \ref GMX_SIMD_DOUBLE_WIDTH.
1609 * Float/double conversions are complex since the SIMD width could either
1610 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1611 * need to check for the width in the code, and have different code paths.
1613 * \param d0 Double-precision SIMD variable, first half of values to put in f.
1614 * \param d1 Double-precision SIMD variable, second half of values to put in f.
1615 * \return Single-precision SIMD variable with all values.
1617 static inline SimdFloat gmx_simdcall
cvtDD2F(SimdDouble gmx_unused d0
, SimdDouble gmx_unused d1
)
1619 #if (GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH)
1621 for (std::size_t i
= 0; i
< d0
.simdInternal_
.size(); i
++)
1623 f
.simdInternal_
[i
] = d0
.simdInternal_
[i
];
1624 f
.simdInternal_
[f
.simdInternal_
.size() / 2 + i
] = d1
.simdInternal_
[i
];
1628 gmx_fatal(FARGS
, "simdCvtDD2F() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1639 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H