2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
39 /*! \libinternal \file
41 * \brief Reference implementation, SIMD single precision.
43 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
45 * \ingroup module_simd
58 #include "gromacs/math/utilities.h"
60 #include "impl_reference_definitions.h"
66 /*! \addtogroup module_simd */
69 /*! \name SIMD implementation data types and built-in conversions between types
73 /*! \libinternal \brief Float SIMD variable. Available if GMX_SIMD_HAVE_FLOAT is 1.
75 * \note This variable cannot be placed inside other structures or classes, since
76 * some compilers (including at least clang-3.7) appear to lose the
77 * alignment. This is likely particularly severe when allocating such
78 * memory on the heap, but it occurs for stack structures too.
85 //! \brief Construct from scalar
86 SimdFloat(float f
) { simdInternal_
.fill(f
); }
88 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
90 * This has to be public to enable usage in combination with static inline
91 * functions, but it should never, EVER, be accessed by any code outside
92 * the corresponding implementation directory since the type will depend
93 * on the architecture.
95 std::array
<float, GMX_SIMD_FLOAT_WIDTH
> simdInternal_
;
98 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from float.
100 * This is also the widest integer SIMD type. Available if GMX_SIMD_HAVE_FLOAT is 1.
102 * \note The integer SIMD type will always be available, but on architectures
103 * that do not have any real integer SIMD support it might be defined as the
104 * floating-point type. This will work fine, since there are separate defines
105 * for whether the implementation can actually do any operations on integer
107 * \note This variable cannot be placed inside other structures or classes, since
108 * some compilers (including at least clang-3.7) appear to lose the
109 * alignment. This is likely particularly severe when allocating such
110 * memory on the heap, but it occurs for stack structures too.
117 //! \brief Construct from scalar
118 SimdFInt32(std::int32_t i
) { simdInternal_
.fill(i
); }
120 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
122 * This has to be public to enable usage in combination with static inline
123 * functions, but it should never, EVER, be accessed by any code outside
124 * the corresponding implementation directory since the type will depend
125 * on the architecture.
127 std::array
<std::int32_t, GMX_SIMD_FINT32_WIDTH
> simdInternal_
;
130 /*! \libinternal \brief Boolean type for float SIMD data.
132 * Available if GMX_SIMD_HAVE_FLOAT is 1.
134 * \note This variable cannot be placed inside other structures or classes, since
135 * some compilers (including at least clang-3.7) appear to lose the
136 * alignment. This is likely particularly severe when allocating such
137 * memory on the heap, but it occurs for stack structures too.
144 //! \brief Construct from scalar
145 SimdFBool(bool b
) { simdInternal_
.fill(b
); }
147 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
149 * This has to be public to enable usage in combination with static inline
150 * functions, but it should never, EVER, be accessed by any code outside
151 * the corresponding implementation directory since the type will depend
152 * on the architecture.
154 std::array
<bool, GMX_SIMD_FLOAT_WIDTH
> simdInternal_
;
157 /*! \libinternal \brief Boolean type for integer datatypes corresponding to float SIMD.
159 * Available if GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
161 * \note This variable cannot be placed inside other structures or classes, since
162 * some compilers (including at least clang-3.7) appear to lose the
163 * alignment. This is likely particularly severe when allocating such
164 * memory on the heap, but it occurs for stack structures too.
171 //! \brief Construct from scalar
172 SimdFIBool(bool b
) { simdInternal_
.fill(b
); }
174 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
176 * This has to be public to enable usage in combination with static inline
177 * functions, but it should never, EVER, be accessed by any code outside
178 * the corresponding implementation directory since the type will depend
179 * on the architecture.
181 std::array
<bool, GMX_SIMD_FINT32_WIDTH
> simdInternal_
;
186 * \name SIMD implementation load/store operations for single precision floating point
190 /*! \brief Load \ref GMX_SIMD_FLOAT_WIDTH float numbers from aligned memory.
192 * \param m Pointer to memory aligned to the SIMD width.
193 * \return SIMD variable with data loaded.
195 static inline SimdFloat gmx_simdcall
simdLoad(const float* m
, SimdFloatTag
= {})
199 assert(std::size_t(m
) % (a
.simdInternal_
.size() * sizeof(float)) == 0);
201 std::copy(m
, m
+ a
.simdInternal_
.size(), a
.simdInternal_
.begin());
205 /*! \brief Store the contents of SIMD float variable to aligned memory m.
207 * \param[out] m Pointer to memory, aligned to SIMD width.
208 * \param a SIMD variable to store
210 static inline void gmx_simdcall
store(float* m
, SimdFloat a
)
212 assert(std::size_t(m
) % (a
.simdInternal_
.size() * sizeof(float)) == 0);
214 std::copy(a
.simdInternal_
.begin(), a
.simdInternal_
.end(), m
);
217 /*! \brief Load SIMD float from unaligned memory.
219 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
221 * \param m Pointer to memory, no alignment requirement.
222 * \return SIMD variable with data loaded.
224 static inline SimdFloat gmx_simdcall
simdLoadU(const float* m
, SimdFloatTag
= {})
227 std::copy(m
, m
+ a
.simdInternal_
.size(), a
.simdInternal_
.begin());
231 /*! \brief Store SIMD float to unaligned memory.
233 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
235 * \param[out] m Pointer to memory, no alignment requirement.
236 * \param a SIMD variable to store.
238 static inline void gmx_simdcall
storeU(float* m
, SimdFloat a
)
240 std::copy(a
.simdInternal_
.begin(), a
.simdInternal_
.end(), m
);
243 /*! \brief Set all SIMD float variable elements to 0.0.
245 * You should typically just call \ref gmx::setZero(), which uses proxy objects
246 * internally to handle all types rather than adding the suffix used here.
250 static inline SimdFloat gmx_simdcall
setZeroF()
252 return SimdFloat(0.0F
);
259 * \name SIMD implementation load/store operations for integers (corresponding to float)
263 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
265 * You should typically just call \ref gmx::load(), which uses proxy objects
266 * internally to handle all types rather than adding the suffix used here.
268 * \param m Pointer to memory, aligned to (float) integer SIMD width.
269 * \return SIMD integer variable.
271 static inline SimdFInt32 gmx_simdcall
simdLoad(const std::int32_t* m
, SimdFInt32Tag
)
275 assert(std::size_t(m
) % (a
.simdInternal_
.size() * sizeof(std::int32_t)) == 0);
277 std::copy(m
, m
+ a
.simdInternal_
.size(), a
.simdInternal_
.begin());
281 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
283 * \param m Memory aligned to (float) integer SIMD width.
284 * \param a SIMD variable to store.
286 static inline void gmx_simdcall
store(std::int32_t* m
, SimdFInt32 a
)
288 assert(std::size_t(m
) % (a
.simdInternal_
.size() * sizeof(std::int32_t)) == 0);
290 std::copy(a
.simdInternal_
.begin(), a
.simdInternal_
.end(), m
);
293 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdFloat.
295 * You should typically just call \ref gmx::loadU(), which uses proxy objects
296 * internally to handle all types rather than adding the suffix used here.
298 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
300 * \param m Pointer to memory, no alignment requirements.
301 * \return SIMD integer variable.
303 static inline SimdFInt32 gmx_simdcall
simdLoadU(const std::int32_t* m
, SimdFInt32Tag
)
306 std::copy(m
, m
+ a
.simdInternal_
.size(), a
.simdInternal_
.begin());
310 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
312 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
314 * \param m Memory pointer, no alignment requirements.
315 * \param a SIMD variable to store.
317 static inline void gmx_simdcall
storeU(std::int32_t* m
, SimdFInt32 a
)
319 std::copy(a
.simdInternal_
.begin(), a
.simdInternal_
.end(), m
);
322 /*! \brief Set all SIMD (float) integer variable elements to 0.
324 * You should typically just call \ref gmx::setZero(), which uses proxy objects
325 * internally to handle all types rather than adding the suffix used here.
329 static inline SimdFInt32 gmx_simdcall
setZeroFI()
331 return SimdFInt32(0);
334 /*! \brief Extract element with index i from \ref gmx::SimdFInt32.
336 * Available if \ref GMX_SIMD_HAVE_FINT32_EXTRACT is 1.
338 * \tparam index Compile-time constant, position to extract (first position is 0)
339 * \param a SIMD variable from which to extract value.
340 * \return Single integer from position index in SIMD variable.
343 static inline std::int32_t gmx_simdcall
extract(SimdFInt32 a
)
345 return a
.simdInternal_
[index
];
350 * \name SIMD implementation single precision floating-point bitwise logical operations
354 /*! \brief Bitwise and for two SIMD float variables.
356 * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
360 * \return data1 & data2
362 static inline SimdFloat gmx_simdcall
operator&(SimdFloat a
, SimdFloat b
)
371 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
373 conv1
.r
= a
.simdInternal_
[i
];
374 conv2
.r
= b
.simdInternal_
[i
];
375 conv1
.i
= conv1
.i
& conv2
.i
;
376 res
.simdInternal_
[i
] = conv1
.r
;
381 /*! \brief Bitwise andnot for SIMD float.
383 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
387 * \return (~data1) & data2
389 static inline SimdFloat gmx_simdcall
andNot(SimdFloat a
, SimdFloat b
)
398 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
400 conv1
.r
= a
.simdInternal_
[i
];
401 conv2
.r
= b
.simdInternal_
[i
];
402 conv1
.i
= ~conv1
.i
& conv2
.i
;
403 res
.simdInternal_
[i
] = conv1
.r
;
408 /*! \brief Bitwise or for SIMD float.
410 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
414 * \return data1 | data2
416 static inline SimdFloat gmx_simdcall
operator|(SimdFloat a
, SimdFloat b
)
425 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
427 conv1
.r
= a
.simdInternal_
[i
];
428 conv2
.r
= b
.simdInternal_
[i
];
429 conv1
.i
= conv1
.i
| conv2
.i
;
430 res
.simdInternal_
[i
] = conv1
.r
;
435 /*! \brief Bitwise xor for SIMD float.
437 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
441 * \return data1 ^ data2
443 static inline SimdFloat gmx_simdcall
operator^(SimdFloat a
, SimdFloat b
)
452 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
454 conv1
.r
= a
.simdInternal_
[i
];
455 conv2
.r
= b
.simdInternal_
[i
];
456 conv1
.i
= conv1
.i
^ conv2
.i
;
457 res
.simdInternal_
[i
] = conv1
.r
;
464 * \name SIMD implementation single precision floating-point arithmetics
468 /*! \brief Add two float SIMD variables.
474 static inline SimdFloat gmx_simdcall
operator+(SimdFloat a
, SimdFloat b
)
478 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
480 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] + b
.simdInternal_
[i
];
485 /*! \brief Subtract two float SIMD variables.
491 static inline SimdFloat gmx_simdcall
operator-(SimdFloat a
, SimdFloat b
)
495 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
497 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] - b
.simdInternal_
[i
];
502 /*! \brief SIMD single precision negate.
504 * \param a SIMD double precision value
507 static inline SimdFloat gmx_simdcall
operator-(SimdFloat a
)
511 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
513 res
.simdInternal_
[i
] = -a
.simdInternal_
[i
];
518 /*! \brief Multiply two float SIMD variables.
524 static inline SimdFloat gmx_simdcall
operator*(SimdFloat a
, SimdFloat b
)
528 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
530 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] * b
.simdInternal_
[i
];
535 /*! \brief SIMD float Fused-multiply-add. Result is a*b+c.
542 static inline SimdFloat gmx_simdcall
fma(SimdFloat a
, SimdFloat b
, SimdFloat c
)
547 /*! \brief SIMD float Fused-multiply-subtract. Result is a*b-c.
554 static inline SimdFloat gmx_simdcall
fms(SimdFloat a
, SimdFloat b
, SimdFloat c
)
559 /*! \brief SIMD float Fused-negated-multiply-add. Result is -a*b+c.
566 static inline SimdFloat gmx_simdcall
fnma(SimdFloat a
, SimdFloat b
, SimdFloat c
)
571 /*! \brief SIMD float Fused-negated-multiply-subtract. Result is -a*b-c.
578 static inline SimdFloat gmx_simdcall
fnms(SimdFloat a
, SimdFloat b
, SimdFloat c
)
583 /*! \brief SIMD float 1.0/sqrt(x) lookup.
585 * This is a low-level instruction that should only be called from routines
586 * implementing the inverse square root in simd_math.h.
588 * \param x Argument, x>0
589 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
591 static inline SimdFloat gmx_simdcall
rsqrt(SimdFloat x
)
595 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
597 res
.simdInternal_
[i
] = 1.0F
/ std::sqrt(x
.simdInternal_
[i
]);
602 /*! \brief SIMD float 1.0/x lookup.
604 * This is a low-level instruction that should only be called from routines
605 * implementing the reciprocal in simd_math.h.
607 * \param x Argument, x!=0
608 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
610 static inline SimdFloat gmx_simdcall
rcp(SimdFloat x
)
614 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
616 res
.simdInternal_
[i
] = 1.0F
/ x
.simdInternal_
[i
];
621 /*! \brief Add two float SIMD variables, masked version.
626 * \return a+b where mask is true, a otherwise.
628 static inline SimdFloat gmx_simdcall
maskAdd(SimdFloat a
, SimdFloat b
, SimdFBool m
)
632 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
634 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] + (m
.simdInternal_
[i
] ? b
.simdInternal_
[i
] : 0.0F
);
639 /*! \brief Multiply two float SIMD variables, masked version.
644 * \return a*b where mask is true, 0.0 otherwise.
646 static inline SimdFloat gmx_simdcall
maskzMul(SimdFloat a
, SimdFloat b
, SimdFBool m
)
650 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
652 res
.simdInternal_
[i
] = m
.simdInternal_
[i
] ? (a
.simdInternal_
[i
] * b
.simdInternal_
[i
]) : 0.0F
;
657 /*! \brief SIMD float fused multiply-add, masked version.
663 * \return a*b+c where mask is true, 0.0 otherwise.
665 static inline SimdFloat gmx_simdcall
maskzFma(SimdFloat a
, SimdFloat b
, SimdFloat c
, SimdFBool m
)
669 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
671 res
.simdInternal_
[i
] =
672 m
.simdInternal_
[i
] ? (a
.simdInternal_
[i
] * b
.simdInternal_
[i
] + c
.simdInternal_
[i
]) : 0.0F
;
677 /*! \brief SIMD float 1.0/sqrt(x) lookup, masked version.
679 * This is a low-level instruction that should only be called from routines
680 * implementing the inverse square root in simd_math.h.
682 * \param x Argument, x>0 for entries where mask is true.
684 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
685 * The result for masked-out entries will be 0.0.
687 static inline SimdFloat gmx_simdcall
maskzRsqrt(SimdFloat x
, SimdFBool m
)
691 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
693 res
.simdInternal_
[i
] = (m
.simdInternal_
[i
] != 0) ? 1.0F
/ std::sqrt(x
.simdInternal_
[i
]) : 0.0F
;
698 /*! \brief SIMD float 1.0/x lookup, masked version.
700 * This is a low-level instruction that should only be called from routines
701 * implementing the reciprocal in simd_math.h.
703 * \param x Argument, x>0 for entries where mask is true.
705 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
706 * The result for masked-out entries will be 0.0.
708 static inline SimdFloat gmx_simdcall
maskzRcp(SimdFloat x
, SimdFBool m
)
712 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
714 res
.simdInternal_
[i
] = (m
.simdInternal_
[i
] != 0) ? 1.0F
/ x
.simdInternal_
[i
] : 0.0F
;
719 /*! \brief SIMD float Floating-point abs().
721 * \param a any floating point values
722 * \return abs(a) for each element.
724 static inline SimdFloat gmx_simdcall
abs(SimdFloat a
)
728 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
730 res
.simdInternal_
[i
] = std::abs(a
.simdInternal_
[i
]);
735 /*! \brief Set each SIMD float element to the largest from two variables.
737 * \param a Any floating-point value
738 * \param b Any floating-point value
739 * \return max(a,b) for each element.
741 static inline SimdFloat gmx_simdcall
max(SimdFloat a
, SimdFloat b
)
745 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
747 res
.simdInternal_
[i
] = std::max(a
.simdInternal_
[i
], b
.simdInternal_
[i
]);
752 /*! \brief Set each SIMD float element to the smallest from two variables.
754 * \param a Any floating-point value
755 * \param b Any floating-point value
756 * \return min(a,b) for each element.
758 static inline SimdFloat gmx_simdcall
min(SimdFloat a
, SimdFloat b
)
762 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
764 res
.simdInternal_
[i
] = std::min(a
.simdInternal_
[i
], b
.simdInternal_
[i
]);
769 /*! \brief SIMD float round to nearest integer value (in floating-point format).
771 * \param a Any floating-point value
772 * \return The nearest integer, represented in floating-point format.
774 * \note Round mode is implementation defined. The only guarantee is that it
775 * is consistent between rounding functions (round, cvtR2I).
777 static inline SimdFloat gmx_simdcall
round(SimdFloat a
)
781 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
783 res
.simdInternal_
[i
] = std::round(a
.simdInternal_
[i
]);
788 /*! \brief Truncate SIMD float, i.e. round towards zero - common hardware instruction.
790 * \param a Any floating-point value
791 * \return Integer rounded towards zero, represented in floating-point format.
793 * \note This is truncation towards zero, not floor(). The reason for this
794 * is that truncation is virtually always present as a dedicated hardware
795 * instruction, but floor() frequently isn't.
797 static inline SimdFloat gmx_simdcall
trunc(SimdFloat a
)
801 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
803 res
.simdInternal_
[i
] = std::trunc(a
.simdInternal_
[i
]);
808 /*! \brief Extract (integer) exponent and fraction from single precision SIMD.
810 * \tparam opt By default this function behaves like the standard
811 * library such that frexp(+-0,exp) returns +-0 and
812 * stores 0 in the exponent when value is 0. If you
813 * know the argument is always nonzero, you can set
814 * the template parameter to MathOptimization::Unsafe
815 * to make it slightly faster.
817 * \param value Floating-point value to extract from
818 * \param[out] exponent Returned exponent of value, integer SIMD format.
819 * \return Fraction of value, floating-point SIMD format.
821 template<MathOptimization opt
= MathOptimization::Safe
>
822 static inline SimdFloat gmx_simdcall
frexp(SimdFloat value
, SimdFInt32
* exponent
)
826 for (std::size_t i
= 0; i
< fraction
.simdInternal_
.size(); i
++)
828 fraction
.simdInternal_
[i
] = std::frexp(value
.simdInternal_
[i
], &exponent
->simdInternal_
[i
]);
833 /*! \brief Multiply a SIMD float value by the number 2 raised to an exp power.
835 * \tparam opt By default, this routine will return zero for input arguments
836 * that are so small they cannot be reproduced in the current
837 * precision. If the unsafe math optimization template parameter
838 * setting is used, these tests are skipped, and the result will
839 * be undefined (possible even NaN). This might happen below -127
840 * in single precision or -1023 in double, although some
841 * might use denormal support to extend the range.
843 * \param value Floating-point number to multiply with new exponent
844 * \param exponent Integer that will not overflow as 2^exponent.
845 * \return value*2^exponent
847 template<MathOptimization opt
= MathOptimization::Safe
>
848 static inline SimdFloat gmx_simdcall
ldexp(SimdFloat value
, SimdFInt32 exponent
)
852 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
854 // std::ldexp already takes care of clamping arguments, so we do not
855 // need to do anything in the reference implementation
856 res
.simdInternal_
[i
] = std::ldexp(value
.simdInternal_
[i
], exponent
.simdInternal_
[i
]);
861 /*! \brief Return sum of all elements in SIMD float variable.
863 * \param a SIMD variable to reduce/sum.
864 * \return The sum of all elements in the argument variable.
867 static inline float gmx_simdcall
reduce(SimdFloat a
)
871 for (std::size_t i
= 0; i
< a
.simdInternal_
.size(); i
++)
873 sum
+= a
.simdInternal_
[i
];
880 * \name SIMD implementation single precision floating-point comparisons, boolean, selection.
884 /*! \brief SIMD a==b for single SIMD.
888 * \return Each element of the boolean will be set to true if a==b.
890 * Beware that exact floating-point comparisons are difficult.
892 static inline SimdFBool gmx_simdcall
operator==(SimdFloat a
, SimdFloat b
)
896 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
898 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] == b
.simdInternal_
[i
]);
903 /*! \brief SIMD a!=b for single SIMD.
907 * \return Each element of the boolean will be set to true if a!=b.
909 * Beware that exact floating-point comparisons are difficult.
911 static inline SimdFBool gmx_simdcall
operator!=(SimdFloat a
, SimdFloat b
)
915 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
917 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] != b
.simdInternal_
[i
]);
922 /*! \brief SIMD a<b for single SIMD.
926 * \return Each element of the boolean will be set to true if a<b.
928 static inline SimdFBool gmx_simdcall
operator<(SimdFloat a
, SimdFloat b
)
932 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
934 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] < b
.simdInternal_
[i
]);
939 /*! \brief SIMD a<=b for single SIMD.
943 * \return Each element of the boolean will be set to true if a<=b.
945 static inline SimdFBool gmx_simdcall
operator<=(SimdFloat a
, SimdFloat b
)
949 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
951 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] <= b
.simdInternal_
[i
]);
956 /*! \brief Return true if any bits are set in the single precision SIMD.
958 * This function is used to handle bitmasks, mainly for exclusions in the
959 * inner kernels. Note that it will return true even for -0.0F (sign bit set),
960 * so it is not identical to not-equal.
963 * \return Each element of the boolean will be true if any bit in a is nonzero.
965 static inline SimdFBool gmx_simdcall
testBits(SimdFloat a
)
969 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
976 conv
.f
= a
.simdInternal_
[i
];
977 res
.simdInternal_
[i
] = (conv
.i
!= 0);
982 /*! \brief Logical \a and on single precision SIMD booleans.
984 * \param a logical vars 1
985 * \param b logical vars 2
986 * \return For each element, the result boolean is true if a \& b are true.
988 * \note This is not necessarily a bitwise operation - the storage format
989 * of booleans is implementation-dependent.
991 static inline SimdFBool gmx_simdcall
operator&&(SimdFBool a
, SimdFBool b
)
995 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
997 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] && b
.simdInternal_
[i
]);
1002 /*! \brief Logical \a or on single precision SIMD booleans.
1004 * \param a logical vars 1
1005 * \param b logical vars 2
1006 * \return For each element, the result boolean is true if a or b is true.
1008 * Note that this is not necessarily a bitwise operation - the storage format
1009 * of booleans is implementation-dependent.
1012 static inline SimdFBool gmx_simdcall
operator||(SimdFBool a
, SimdFBool b
)
1016 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1018 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] || b
.simdInternal_
[i
]);
1023 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1025 * \param a Logical variable.
1026 * \return true if any element in a is true, otherwise false.
1028 * The actual return value for truth will depend on the architecture,
1029 * so any non-zero value is considered truth.
1031 static inline bool gmx_simdcall
anyTrue(SimdFBool a
)
1035 for (std::size_t i
= 0; i
< a
.simdInternal_
.size(); i
++)
1037 res
= res
|| a
.simdInternal_
[i
];
1042 /*! \brief Select from single precision SIMD variable where boolean is true.
1044 * \param a Floating-point variable to select from
1045 * \param mask Boolean selector
1046 * \return For each element, a is selected for true, 0 for false.
1048 static inline SimdFloat gmx_simdcall
selectByMask(SimdFloat a
, SimdFBool mask
)
1052 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1054 res
.simdInternal_
[i
] = mask
.simdInternal_
[i
] ? a
.simdInternal_
[i
] : 0.0F
;
1059 /*! \brief Select from single precision SIMD variable where boolean is false.
1061 * \param a Floating-point variable to select from
1062 * \param mask Boolean selector
1063 * \return For each element, a is selected for false, 0 for true (sic).
1065 static inline SimdFloat gmx_simdcall
selectByNotMask(SimdFloat a
, SimdFBool mask
)
1069 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1071 res
.simdInternal_
[i
] = mask
.simdInternal_
[i
] ? 0.0F
: a
.simdInternal_
[i
];
1076 /*! \brief Vector-blend SIMD float selection.
1078 * \param a First source
1079 * \param b Second source
1080 * \param sel Boolean selector
1081 * \return For each element, select b if sel is true, a otherwise.
1083 static inline SimdFloat gmx_simdcall
blend(SimdFloat a
, SimdFloat b
, SimdFBool sel
)
1087 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1089 res
.simdInternal_
[i
] = sel
.simdInternal_
[i
] ? b
.simdInternal_
[i
] : a
.simdInternal_
[i
];
1096 * \name SIMD implementation integer (corresponding to float) bitwise logical operations
1100 /*! \brief Integer SIMD bitwise and.
1102 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1104 * \note You can \a not use this operation directly to select based on a boolean
1105 * SIMD variable, since booleans are separate from integer SIMD. If that
1106 * is what you need, have a look at \ref gmx::selectByMask instead.
1108 * \param a first integer SIMD
1109 * \param b second integer SIMD
1110 * \return a \& b (bitwise and)
1112 static inline SimdFInt32 gmx_simdcall
operator&(SimdFInt32 a
, SimdFInt32 b
)
1116 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1118 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] & b
.simdInternal_
[i
];
1123 /*! \brief Integer SIMD bitwise not/complement.
1125 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1127 * \note You can \a not use this operation directly to select based on a boolean
1128 * SIMD variable, since booleans are separate from integer SIMD. If that
1129 * is what you need, have a look at \ref gmx::selectByMask instead.
1131 * \param a integer SIMD
1132 * \param b integer SIMD
1135 static inline SimdFInt32 gmx_simdcall
andNot(SimdFInt32 a
, SimdFInt32 b
)
1139 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1141 res
.simdInternal_
[i
] = ~a
.simdInternal_
[i
] & b
.simdInternal_
[i
];
1146 /*! \brief Integer SIMD bitwise or.
1148 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1150 * \param a first integer SIMD
1151 * \param b second integer SIMD
1152 * \return a \| b (bitwise or)
1154 static inline SimdFInt32 gmx_simdcall
operator|(SimdFInt32 a
, SimdFInt32 b
)
1158 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1160 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] | b
.simdInternal_
[i
];
1165 /*! \brief Integer SIMD bitwise xor.
1167 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1169 * \param a first integer SIMD
1170 * \param b second integer SIMD
1171 * \return a ^ b (bitwise xor)
1173 static inline SimdFInt32 gmx_simdcall
operator^(SimdFInt32 a
, SimdFInt32 b
)
1177 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1179 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] ^ b
.simdInternal_
[i
];
1186 * \name SIMD implementation integer (corresponding to float) arithmetics
1190 /*! \brief Add SIMD integers.
1192 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1193 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1199 static inline SimdFInt32 gmx_simdcall
operator+(SimdFInt32 a
, SimdFInt32 b
)
1203 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1205 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] + b
.simdInternal_
[i
];
1210 /*! \brief Subtract SIMD integers.
1212 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1213 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1219 static inline SimdFInt32 gmx_simdcall
operator-(SimdFInt32 a
, SimdFInt32 b
)
1223 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1225 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] - b
.simdInternal_
[i
];
1230 /*! \brief Multiply SIMD integers.
1232 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1233 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1239 * \note Only the low 32 bits are retained, so this can overflow.
1241 static inline SimdFInt32 gmx_simdcall
operator*(SimdFInt32 a
, SimdFInt32 b
)
1245 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1247 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] * b
.simdInternal_
[i
];
1254 * \name SIMD implementation integer (corresponding to float) comparisons, boolean, selection
1258 /*! \brief Equality comparison of two integers corresponding to float values.
1260 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1262 * \param a SIMD integer1
1263 * \param b SIMD integer2
1264 * \return SIMD integer boolean with true for elements where a==b
1266 static inline SimdFIBool gmx_simdcall
operator==(SimdFInt32 a
, SimdFInt32 b
)
1270 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1272 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] == b
.simdInternal_
[i
]);
1277 /*! \brief Less-than comparison of two SIMD integers corresponding to float values.
1279 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1281 * \param a SIMD integer1
1282 * \param b SIMD integer2
1283 * \return SIMD integer boolean with true for elements where a<b
1285 static inline SimdFIBool gmx_simdcall
operator<(SimdFInt32 a
, SimdFInt32 b
)
1289 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1291 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] < b
.simdInternal_
[i
]);
1296 /*! \brief Check if any bit is set in each element
1298 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1300 * \param a SIMD integer
1301 * \return SIMD integer boolean with true for elements where any bit is set
1303 static inline SimdFIBool gmx_simdcall
testBits(SimdFInt32 a
)
1307 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1309 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] != 0);
1314 /*! \brief Logical AND on SimdFIBool.
1316 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1318 * \param a SIMD boolean 1
1319 * \param b SIMD boolean 2
1320 * \return True for elements where both a and b are true.
1322 static inline SimdFIBool gmx_simdcall
operator&&(SimdFIBool a
, SimdFIBool b
)
1326 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1328 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] && b
.simdInternal_
[i
]);
1333 /*! \brief Logical OR on SimdFIBool.
1335 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1337 * \param a SIMD boolean 1
1338 * \param b SIMD boolean 2
1339 * \return True for elements where both a and b are true.
1341 static inline SimdFIBool gmx_simdcall
operator||(SimdFIBool a
, SimdFIBool b
)
1345 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1347 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] || b
.simdInternal_
[i
]);
1352 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1354 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1356 * The actual return value for "any true" will depend on the architecture.
1357 * Any non-zero value should be considered truth.
1359 * \param a SIMD boolean
1360 * \return True if any of the elements in a is true, otherwise 0.
1362 static inline bool gmx_simdcall
anyTrue(SimdFIBool a
)
1366 for (std::size_t i
= 0; i
< a
.simdInternal_
.size(); i
++)
1368 res
= res
|| a
.simdInternal_
[i
];
1373 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is true.
1375 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1377 * \param a SIMD integer to select from
1378 * \param mask Boolean selector
1379 * \return Elements from a where sel is true, 0 otherwise.
1381 static inline SimdFInt32 gmx_simdcall
selectByMask(SimdFInt32 a
, SimdFIBool mask
)
1385 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1387 res
.simdInternal_
[i
] = mask
.simdInternal_
[i
] ? a
.simdInternal_
[i
] : 0.0F
;
1392 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is false.
1394 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1396 * \param a SIMD integer to select from
1397 * \param mask Boolean selector
1398 * \return Elements from a where sel is false, 0 otherwise (sic).
1400 static inline SimdFInt32 gmx_simdcall
selectByNotMask(SimdFInt32 a
, SimdFIBool mask
)
1404 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1406 res
.simdInternal_
[i
] = mask
.simdInternal_
[i
] ? 0.0F
: a
.simdInternal_
[i
];
1411 /*! \brief Vector-blend SIMD integer selection.
1413 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1415 * \param a First source
1416 * \param b Second source
1417 * \param sel Boolean selector
1418 * \return For each element, select b if sel is true, a otherwise.
1420 static inline SimdFInt32 gmx_simdcall
blend(SimdFInt32 a
, SimdFInt32 b
, SimdFIBool sel
)
1424 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
1426 res
.simdInternal_
[i
] = sel
.simdInternal_
[i
] ? b
.simdInternal_
[i
] : a
.simdInternal_
[i
];
1433 * \name SIMD implementation conversion operations
1437 /*! \brief Round single precision floating point to integer.
1439 * \param a SIMD floating-point
1440 * \return SIMD integer, rounded to nearest integer.
1442 * \note Round mode is implementation defined. The only guarantee is that it
1443 * is consistent between rounding functions (round, cvtR2I).
1445 static inline SimdFInt32 gmx_simdcall
cvtR2I(SimdFloat a
)
1449 for (std::size_t i
= 0; i
< b
.simdInternal_
.size(); i
++)
1451 b
.simdInternal_
[i
] = std::round(a
.simdInternal_
[i
]);
1456 /*! \brief Truncate single precision floating point to integer.
1458 * \param a SIMD floating-point
1459 * \return SIMD integer, truncated to nearest integer.
1461 static inline SimdFInt32 gmx_simdcall
cvttR2I(SimdFloat a
)
1465 for (std::size_t i
= 0; i
< b
.simdInternal_
.size(); i
++)
1467 b
.simdInternal_
[i
] = std::trunc(a
.simdInternal_
[i
]);
1472 /*! \brief Convert integer to single precision floating point.
1474 * \param a SIMD integer
1475 * \return SIMD floating-point
1477 static inline SimdFloat gmx_simdcall
cvtI2R(SimdFInt32 a
)
1481 for (std::size_t i
= 0; i
< b
.simdInternal_
.size(); i
++)
1483 b
.simdInternal_
[i
] = a
.simdInternal_
[i
];
1488 /*! \brief Convert from single precision boolean to corresponding integer boolean
1490 * \param a SIMD floating-point boolean
1491 * \return SIMD integer boolean
1493 static inline SimdFIBool gmx_simdcall
cvtB2IB(SimdFBool a
)
1497 for (std::size_t i
= 0; i
< b
.simdInternal_
.size(); i
++)
1499 b
.simdInternal_
[i
] = a
.simdInternal_
[i
];
1504 /*! \brief Convert from integer boolean to corresponding single precision boolean
1506 * \param a SIMD integer boolean
1507 * \return SIMD floating-point boolean
1509 static inline SimdFBool gmx_simdcall
cvtIB2B(SimdFIBool a
)
1513 for (std::size_t i
= 0; i
< b
.simdInternal_
.size(); i
++)
1515 b
.simdInternal_
[i
] = a
.simdInternal_
[i
];
1527 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H