Added scalar functions cbrt as well as masked Rcp
[gromacs.git] / src / gromacs / simd / scalar / scalar_math.h
bloba5239859843c8f0b256084865520395479e88b99
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #ifndef GMX_SIMD_SCALAR_MATH_H
36 #define GMX_SIMD_SCALAR_MATH_H
38 #include <cmath>
40 #include "gromacs/math/functions.h"
41 #include "gromacs/math/utilities.h"
42 #include "gromacs/simd/scalar/scalar.h"
44 /*! \libinternal \file
46 * \brief Scalar math functions mimicking GROMACS SIMD math functions
48 * These versions make it possible to write functions that are templated with
49 * either a SIMD or scalar type. While some of these functions might not appear
50 * SIMD-specific, we have placed them here because the only reason to use these
51 * instead of generic function is in templated combined SIMD/non-SIMD code.
52 * It is important that these functions match the SIMD versions exactly in their
53 * arguments and template arguments so that overload resolution works correctly.
55 * \author Erik Lindahl <erik.lindahl@gmail.com>
57 * \inlibraryapi
58 * \ingroup module_simd
61 namespace gmx
64 /*****************************************************************************
65 * Single-precision floating point math functions mimicking SIMD versions *
66 *****************************************************************************/
69 /*! \brief Composes single value with the magnitude of x and the sign of y.
71 * \param x Value to set sign for
72 * \param y Value used to set sign
73 * \return Magnitude of x, sign of y
75 * \note This function might be superficially meaningless, but it helps us to
76 * write templated SIMD/non-SIMD code. For clarity it should not be used
77 * outside such code.
79 static inline float copysign(float x, float y)
81 return std::copysign(x, y);
84 // invsqrt(x) is already defined in math/functions.h
86 /*! \brief Calculate 1/sqrt(x) for two floats.
88 * \param x0 First argument, x0 must be positive - no argument checking.
89 * \param x1 Second argument, x1 must be positive - no argument checking.
90 * \param[out] out0 Result 1/sqrt(x0)
91 * \param[out] out1 Result 1/sqrt(x1)
93 * \note This function might be superficially meaningless, but it helps us to
94 * write templated SIMD/non-SIMD code. For clarity it should not be used
95 * outside such code.
97 static inline void invsqrtPair(float x0, float x1, float* out0, float* out1)
99 *out0 = invsqrt(x0);
100 *out1 = invsqrt(x1);
103 /*! \brief Calculate 1/x for float.
105 * \param x Argument that must be nonzero. This routine does not check arguments.
106 * \return 1/x. Result is undefined if your argument was invalid.
108 * \note This function might be superficially meaningless, but it helps us to
109 * write templated SIMD/non-SIMD code. For clarity it should not be used
110 * outside such code.
112 static inline float inv(float x)
114 return 1.0F / x;
117 /*! \brief Calculate 1/sqrt(x) for masked entry of float.
119 * This routine only evaluates 1/sqrt(x) if mask is true.
120 * Illegal values for a masked-out float will not lead to
121 * floating-point exceptions.
123 * \param x Argument that must be >0 if masked-in.
124 * \param m Mask
125 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
126 * entry was not masked, and 0.0 for masked-out entries.
128 * \note This function might be superficially meaningless, but it helps us to
129 * write templated SIMD/non-SIMD code. For clarity it should not be used
130 * outside such code.
132 static inline float maskzInvsqrt(float x, bool m)
134 return m ? invsqrt(x) : 0.0F;
137 /*! \brief Calculate 1/x for masked entry of float.
139 * This routine only evaluates 1/x if mask is true.
140 * Illegal values for a masked-out float will not lead to
141 * floating-point exceptions.
143 * \param x Argument that must be nonzero if masked-in.
144 * \param m Mask
145 * \return 1/x. Result is undefined if your argument was invalid or
146 * entry was not masked, and 0.0 for masked-out entries.
148 * \note This function might be superficially meaningless, but it helps us to
149 * write templated SIMD/non-SIMD code. For clarity it should not be used
150 * outside such code.
152 static inline float maskzInv(float x, bool m)
154 return m ? inv(x) : 0.0F;
157 /*! \brief Float sqrt(x). This is the square root.
159 * \param x Argument, should be >= 0.
160 * \result The square root of x. Undefined if argument is invalid.
162 * \note This function might be superficially meaningless, but it helps us to
163 * write templated SIMD/non-SIMD code. For clarity it should not be used
164 * outside such code.
166 template<MathOptimization opt = MathOptimization::Safe>
167 static inline float sqrt(float x)
169 return std::sqrt(x);
172 /*! \brief Float cbrt(x). This is the cubic root.
174 * \param x Argument, should be >= 0.
175 * \result The cubic root of x. Undefined if argument is invalid.
177 * \note This function might be superficially meaningless, but it helps us to
178 * write templated SIMD/non-SIMD code. For clarity it should not be used
179 * outside such code.
181 template<MathOptimization opt = MathOptimization::Safe>
182 static inline float cbrt(float x)
184 return std::cbrt(x);
187 /*! \brief Float log(x). This is the natural logarithm.
189 * \param x Argument, should be >0.
190 * \result The natural logarithm of x. Undefined if argument is invalid.
192 * \note This function might be superficially meaningless, but it helps us to
193 * write templated SIMD/non-SIMD code. For clarity it should not be used
194 * outside such code.
196 static inline float log(float x)
198 return std::log(x);
201 /*! \brief Float 2^x.
203 * \param x Argument.
204 * \result 2^x. Undefined if input argument caused overflow.
206 * \note This function might be superficially meaningless, but it helps us to
207 * write templated SIMD/non-SIMD code. For clarity it should not be used
208 * outside such code.
210 template<MathOptimization opt = MathOptimization::Safe>
211 static inline float exp2(float x)
213 return std::exp2(x);
216 /*! \brief Float exp(x).
218 * \param x Argument.
219 * \result exp(x). Undefined if input argument caused overflow.
221 * \note This function might be superficially meaningless, but it helps us to
222 * write templated SIMD/non-SIMD code. For clarity it should not be used
223 * outside such code.
225 template<MathOptimization opt = MathOptimization::Safe>
226 static inline float exp(float x)
228 return std::exp(x);
231 /*! \brief Float erf(x).
233 * \param x Argument.
234 * \result erf(x)
236 * \note This function might be superficially meaningless, but it helps us to
237 * write templated SIMD/non-SIMD code. For clarity it should not be used
238 * outside such code.
240 static inline float erf(float x)
242 return std::erf(x);
245 /*! \brief Float erfc(x).
247 * \param x Argument.
248 * \result erfc(x)
250 * \note This function might be superficially meaningless, but it helps us to
251 * write templated SIMD/non-SIMD code. For clarity it should not be used
252 * outside such code.
254 static inline float erfc(float x)
256 return std::erfc(x);
260 /*! \brief Float sin \& cos.
262 * \param x The argument to evaluate sin/cos for
263 * \param[out] sinval Sin(x)
264 * \param[out] cosval Cos(x)
266 * \note This function might be superficially meaningless, but it helps us to
267 * write templated SIMD/non-SIMD code. For clarity it should not be used
268 * outside such code.
270 static inline void sincos(float x, float* sinval, float* cosval)
272 *sinval = std::sin(x);
273 *cosval = std::cos(x);
276 /*! \brief Float sin.
278 * \param x The argument to evaluate sin for
279 * \result Sin(x)
281 * \note This function might be superficially meaningless, but it helps us to
282 * write templated SIMD/non-SIMD code. For clarity it should not be used
283 * outside such code.
285 static inline float sin(float x)
287 return std::sin(x);
290 /*! \brief Float cos.
292 * \param x The argument to evaluate cos for
293 * \result Cos(x)
295 * \note This function might be superficially meaningless, but it helps us to
296 * write templated SIMD/non-SIMD code. For clarity it should not be used
297 * outside such code.
299 static inline float cos(float x)
301 return std::cos(x);
304 /*! \brief Float tan.
306 * \param x The argument to evaluate tan for
307 * \result Tan(x)
309 * \note This function might be superficially meaningless, but it helps us to
310 * write templated SIMD/non-SIMD code. For clarity it should not be used
311 * outside such code.
313 static inline float tan(float x)
315 return std::tan(x);
318 /*! \brief float asin.
320 * \param x The argument to evaluate asin for
321 * \result Asin(x)
323 * \note This function might be superficially meaningless, but it helps us to
324 * write templated SIMD/non-SIMD code. For clarity it should not be used
325 * outside such code.
327 static inline float asin(float x)
329 return std::asin(x);
332 /*! \brief Float acos.
334 * \param x The argument to evaluate acos for
335 * \result Acos(x)
337 * \note This function might be superficially meaningless, but it helps us to
338 * write templated SIMD/non-SIMD code. For clarity it should not be used
339 * outside such code.
341 static inline float acos(float x)
343 return std::acos(x);
346 /*! \brief Float atan.
348 * \param x The argument to evaluate atan for
349 * \result Atan(x)
351 * \note This function might be superficially meaningless, but it helps us to
352 * write templated SIMD/non-SIMD code. For clarity it should not be used
353 * outside such code.
355 static inline float atan(float x)
357 return std::atan(x);
360 /*! \brief Float atan2(y,x).
362 * \param y Y component of vector, any quartile
363 * \param x X component of vector, any quartile
364 * \result Atan(y,x)
366 * \note This function might be superficially meaningless, but it helps us to
367 * write templated SIMD/non-SIMD code. For clarity it should not be used
368 * outside such code.
370 static inline float atan2(float y, float x)
372 return std::atan2(y, x);
375 /*! \brief Calculate the force correction due to PME analytically in float.
377 * See the SIMD version of this function for details.
379 * \param z2 input parameter
380 * \returns Correction to use on force
382 * \note This function might be superficially meaningless, but it helps us to
383 * write templated SIMD/non-SIMD code. For clarity it should not be used
384 * outside such code.
386 static inline float pmeForceCorrection(float z2)
388 const float FN6(-1.7357322914161492954e-8F);
389 const float FN5(1.4703624142580877519e-6F);
390 const float FN4(-0.000053401640219807709149F);
391 const float FN3(0.0010054721316683106153F);
392 const float FN2(-0.019278317264888380590F);
393 const float FN1(0.069670166153766424023F);
394 const float FN0(-0.75225204789749321333F);
396 const float FD4(0.0011193462567257629232F);
397 const float FD3(0.014866955030185295499F);
398 const float FD2(0.11583842382862377919F);
399 const float FD1(0.50736591960530292870F);
400 const float FD0(1.0F);
402 float z4;
403 float polyFN0, polyFN1, polyFD0, polyFD1;
405 z4 = z2 * z2;
407 polyFD0 = fma(FD4, z4, FD2);
408 polyFD1 = fma(FD3, z4, FD1);
409 polyFD0 = fma(polyFD0, z4, FD0);
410 polyFD0 = fma(polyFD1, z2, polyFD0);
412 polyFN0 = fma(FN6, z4, FN4);
413 polyFN1 = fma(FN5, z4, FN3);
414 polyFN0 = fma(polyFN0, z4, FN2);
415 polyFN1 = fma(polyFN1, z4, FN1);
416 polyFN0 = fma(polyFN0, z4, FN0);
417 polyFN0 = fma(polyFN1, z2, polyFN0);
419 return polyFN0 / polyFD0;
422 /*! \brief Calculate the potential correction due to PME analytically in float.
424 * See the SIMD version of this function for details.
426 * \param z2 input parameter
427 * \returns Correction to use on potential.
429 * \note This function might be superficially meaningless, but it helps us to
430 * write templated SIMD/non-SIMD code. For clarity it should not be used
431 * outside such code.
433 static inline float pmePotentialCorrection(float z2)
435 const float VN6(1.9296833005951166339e-8F);
436 const float VN5(-1.4213390571557850962e-6F);
437 const float VN4(0.000041603292906656984871F);
438 const float VN3(-0.00013134036773265025626F);
439 const float VN2(0.038657983986041781264F);
440 const float VN1(0.11285044772717598220F);
441 const float VN0(1.1283802385263030286F);
443 const float VD3(0.0066752224023576045451F);
444 const float VD2(0.078647795836373922256F);
445 const float VD1(0.43336185284710920150F);
446 const float VD0(1.0F);
448 float z4;
449 float polyVN0, polyVN1, polyVD0, polyVD1;
451 z4 = z2 * z2;
453 polyVD1 = fma(VD3, z4, VD1);
454 polyVD0 = fma(VD2, z4, VD0);
455 polyVD0 = fma(polyVD1, z2, polyVD0);
457 polyVN0 = fma(VN6, z4, VN4);
458 polyVN1 = fma(VN5, z4, VN3);
459 polyVN0 = fma(polyVN0, z4, VN2);
460 polyVN1 = fma(polyVN1, z4, VN1);
461 polyVN0 = fma(polyVN0, z4, VN0);
462 polyVN0 = fma(polyVN1, z2, polyVN0);
464 return polyVN0 / polyVD0;
467 /*****************************************************************************
468 * Double-precision floating point math functions mimicking SIMD versions *
469 *****************************************************************************/
472 /*! \brief Composes double value with the magnitude of x and the sign of y.
474 * \param x Value to set sign for
475 * \param y Value used to set sign
476 * \return Magnitude of x, sign of y
478 * \note This function might be superficially meaningless, but it helps us to
479 * write templated SIMD/non-SIMD code. For clarity it should not be used
480 * outside such code.
482 static inline double copysign(double x, double y)
484 return std::copysign(x, y);
487 // invsqrt(x) is already defined in math/functions.h
489 /*! \brief Calculate 1/sqrt(x) for two doubles.
491 * \param x0 First argument, x0 must be positive - no argument checking.
492 * \param x1 Second argument, x1 must be positive - no argument checking.
493 * \param[out] out0 Result 1/sqrt(x0)
494 * \param[out] out1 Result 1/sqrt(x1)
496 * \note This function might be superficially meaningless, but it helps us to
497 * write templated SIMD/non-SIMD code. For clarity it should not be used
498 * outside such code.
500 static inline void invsqrtPair(double x0, double x1, double* out0, double* out1)
502 *out0 = invsqrt(x0);
503 *out1 = invsqrt(x1);
506 /*! \brief Calculate 1/x for double.
508 * \param x Argument that must be nonzero. This routine does not check arguments.
509 * \return 1/x. Result is undefined if your argument was invalid.
511 * \note This function might be superficially meaningless, but it helps us to
512 * write templated SIMD/non-SIMD code. For clarity it should not be used
513 * outside such code.
515 static inline double inv(double x)
517 return 1.0 / x;
520 /*! \brief Calculate 1/sqrt(x) for masked entry of double.
522 * This routine only evaluates 1/sqrt(x) if mask is true.
523 * Illegal values for a masked-out double will not lead to
524 * floating-point exceptions.
526 * \param x Argument that must be >0 if masked-in.
527 * \param m Mask
528 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
529 * entry was not masked, and 0.0 for masked-out entries.
531 * \note This function might be superficially meaningless, but it helps us to
532 * write templated SIMD/non-SIMD code. For clarity it should not be used
533 * outside such code.
535 static inline double maskzInvsqrt(double x, bool m)
537 return m ? invsqrt(x) : 0.0;
540 /*! \brief Calculate 1/x for masked entry of double.
542 * This routine only evaluates 1/x if mask is true.
543 * Illegal values for a masked-out double will not lead to
544 * floating-point exceptions.
546 * \param x Argument that must be nonzero if masked-in.
547 * \param m Mask
548 * \return 1/x. Result is undefined if your argument was invalid or
549 * entry was not masked, and 0.0 for masked-out entries.
551 * \note This function might be superficially meaningless, but it helps us to
552 * write templated SIMD/non-SIMD code. For clarity it should not be used
553 * outside such code.
555 static inline double maskzInv(double x, bool m)
557 return m ? inv(x) : 0.0;
560 /*! \brief Double sqrt(x). This is the square root.
562 * \param x Argument, should be >= 0.
563 * \result The square root of x. Undefined if argument is invalid.
565 * \note This function might be superficially meaningless, but it helps us to
566 * write templated SIMD/non-SIMD code. For clarity it should not be used
567 * outside such code.
569 template<MathOptimization opt = MathOptimization::Safe>
570 static inline double sqrt(double x)
572 return std::sqrt(x);
575 /*! \brief Double cbrt(x). This is the cubic root.
577 * \param x Argument, should be >= 0.
578 * \result The cubic root of x. Undefined if argument is invalid.
580 * \note This function might be superficially meaningless, but it helps us to
581 * write templated SIMD/non-SIMD code. For clarity it should not be used
582 * outside such code.
584 template<MathOptimization opt = MathOptimization::Safe>
585 static inline double cbrt(double x)
587 return std::cbrt(x);
590 /*! \brief Double log(x). This is the natural logarithm.
592 * \param x Argument, should be >0.
593 * \result The natural logarithm of x. Undefined if argument is invalid.
595 * \note This function might be superficially meaningless, but it helps us to
596 * write templated SIMD/non-SIMD code. For clarity it should not be used
597 * outside such code.
599 static inline double log(double x)
601 return std::log(x);
604 /*! \brief Double 2^x.
606 * \param x Argument.
607 * \result 2^x. Undefined if input argument caused overflow.
609 * \note This function might be superficially meaningless, but it helps us to
610 * write templated SIMD/non-SIMD code. For clarity it should not be used
611 * outside such code.
613 template<MathOptimization opt = MathOptimization::Safe>
614 static inline double exp2(double x)
616 return std::exp2(x);
619 /*! \brief Double exp(x).
621 * \param x Argument.
622 * \result exp(x). Undefined if input argument caused overflow.
624 * \note This function might be superficially meaningless, but it helps us to
625 * write templated SIMD/non-SIMD code. For clarity it should not be used
626 * outside such code.
628 template<MathOptimization opt = MathOptimization::Safe>
629 static inline double exp(double x)
631 return std::exp(x);
634 /*! \brief Double erf(x).
636 * \param x Argument.
637 * \result erf(x)
639 * \note This function might be superficially meaningless, but it helps us to
640 * write templated SIMD/non-SIMD code. For clarity it should not be used
641 * outside such code.
643 static inline double erf(double x)
645 return std::erf(x);
648 /*! \brief Double erfc(x).
650 * \param x Argument.
651 * \result erfc(x)
653 * \note This function might be superficially meaningless, but it helps us to
654 * write templated SIMD/non-SIMD code. For clarity it should not be used
655 * outside such code.
657 static inline double erfc(double x)
659 return std::erfc(x);
663 /*! \brief Double sin \& cos.
665 * \param x The argument to evaluate sin/cos for
666 * \param[out] sinval Sin(x)
667 * \param[out] cosval Cos(x)
669 * \note This function might be superficially meaningless, but it helps us to
670 * write templated SIMD/non-SIMD code. For clarity it should not be used
671 * outside such code.
673 static inline void sincos(double x, double* sinval, double* cosval)
675 *sinval = std::sin(x);
676 *cosval = std::cos(x);
679 /*! \brief Double sin.
681 * \param x The argument to evaluate sin for
682 * \result Sin(x)
684 * \note This function might be superficially meaningless, but it helps us to
685 * write templated SIMD/non-SIMD code. For clarity it should not be used
686 * outside such code.
688 static inline double sin(double x)
690 return std::sin(x);
693 /*! \brief Double cos.
695 * \param x The argument to evaluate cos for
696 * \result Cos(x)
698 * \note This function might be superficially meaningless, but it helps us to
699 * write templated SIMD/non-SIMD code. For clarity it should not be used
700 * outside such code.
702 static inline double cos(double x)
704 return std::cos(x);
707 /*! \brief Double tan.
709 * \param x The argument to evaluate tan for
710 * \result Tan(x)
712 * \note This function might be superficially meaningless, but it helps us to
713 * write templated SIMD/non-SIMD code. For clarity it should not be used
714 * outside such code.
716 static inline double tan(double x)
718 return std::tan(x);
721 /*! \brief Double asin.
723 * \param x The argument to evaluate asin for
724 * \result Asin(x)
726 * \note This function might be superficially meaningless, but it helps us to
727 * write templated SIMD/non-SIMD code. For clarity it should not be used
728 * outside such code.
730 static inline double asin(double x)
732 return std::asin(x);
735 /*! \brief Double acos.
737 * \param x The argument to evaluate acos for
738 * \result Acos(x)
740 * \note This function might be superficially meaningless, but it helps us to
741 * write templated SIMD/non-SIMD code. For clarity it should not be used
742 * outside such code.
744 static inline double acos(double x)
746 return std::acos(x);
749 /*! \brief Double atan.
751 * \param x The argument to evaluate atan for
752 * \result Atan(x)
754 * \note This function might be superficially meaningless, but it helps us to
755 * write templated SIMD/non-SIMD code. For clarity it should not be used
756 * outside such code.
758 static inline double atan(double x)
760 return std::atan(x);
763 /*! \brief Double atan2(y,x).
765 * \param y Y component of vector, any quartile
766 * \param x X component of vector, any quartile
767 * \result Atan(y,x)
769 * \note This function might be superficially meaningless, but it helps us to
770 * write templated SIMD/non-SIMD code. For clarity it should not be used
771 * outside such code.
773 static inline double atan2(double y, double x)
775 return std::atan2(y, x);
778 /*! \brief Calculate the force correction due to PME analytically in double.
780 * See the SIMD version of this function for details.
782 * \param z2 input parameter
783 * \returns Correction to use on force
785 * \note This function might be superficially meaningless, but it helps us to
786 * write templated SIMD/non-SIMD code. For clarity it should not be used
787 * outside such code.
789 static inline double pmeForceCorrection(double z2)
791 const double FN10(-8.0072854618360083154e-14);
792 const double FN9(1.1859116242260148027e-11);
793 const double FN8(-8.1490406329798423616e-10);
794 const double FN7(3.4404793543907847655e-8);
795 const double FN6(-9.9471420832602741006e-7);
796 const double FN5(0.000020740315999115847456);
797 const double FN4(-0.00031991745139313364005);
798 const double FN3(0.0035074449373659008203);
799 const double FN2(-0.031750380176100813405);
800 const double FN1(0.13884101728898463426);
801 const double FN0(-0.75225277815249618847);
803 const double FD5(0.000016009278224355026701);
804 const double FD4(0.00051055686934806966046);
805 const double FD3(0.0081803507497974289008);
806 const double FD2(0.077181146026670287235);
807 const double FD1(0.41543303143712535988);
808 const double FD0(1.0);
810 double z4;
811 double polyFN0, polyFN1, polyFD0, polyFD1;
813 z4 = z2 * z2;
815 polyFD1 = fma(FD5, z4, FD3);
816 polyFD1 = fma(polyFD1, z4, FD1);
817 polyFD1 = polyFD1 * z2;
818 polyFD0 = fma(FD4, z4, FD2);
819 polyFD0 = fma(polyFD0, z4, FD0);
820 polyFD0 = polyFD0 + polyFD1;
822 polyFD0 = inv(polyFD0);
824 polyFN0 = fma(FN10, z4, FN8);
825 polyFN0 = fma(polyFN0, z4, FN6);
826 polyFN0 = fma(polyFN0, z4, FN4);
827 polyFN0 = fma(polyFN0, z4, FN2);
828 polyFN0 = fma(polyFN0, z4, FN0);
829 polyFN1 = fma(FN9, z4, FN7);
830 polyFN1 = fma(polyFN1, z4, FN5);
831 polyFN1 = fma(polyFN1, z4, FN3);
832 polyFN1 = fma(polyFN1, z4, FN1);
833 polyFN0 = fma(polyFN1, z2, polyFN0);
835 return polyFN0 * polyFD0;
838 /*! \brief Calculate the potential correction due to PME analytically in double.
840 * See the SIMD version of this function for details.
842 * \param z2 input parameter
843 * \returns Correction to use on potential.
845 * \note This function might be superficially meaningless, but it helps us to
846 * write templated SIMD/non-SIMD code. For clarity it should not be used
847 * outside such code.
849 static inline double pmePotentialCorrection(double z2)
851 const double VN9(-9.3723776169321855475e-13);
852 const double VN8(1.2280156762674215741e-10);
853 const double VN7(-7.3562157912251309487e-9);
854 const double VN6(2.6215886208032517509e-7);
855 const double VN5(-4.9532491651265819499e-6);
856 const double VN4(0.00025907400778966060389);
857 const double VN3(0.0010585044856156469792);
858 const double VN2(0.045247661136833092885);
859 const double VN1(0.11643931522926034421);
860 const double VN0(1.1283791671726767970);
862 const double VD5(0.000021784709867336150342);
863 const double VD4(0.00064293662010911388448);
864 const double VD3(0.0096311444822588683504);
865 const double VD2(0.085608012351550627051);
866 const double VD1(0.43652499166614811084);
867 const double VD0(1.0);
869 double z4;
870 double polyVN0, polyVN1, polyVD0, polyVD1;
872 z4 = z2 * z2;
874 polyVD1 = fma(VD5, z4, VD3);
875 polyVD0 = fma(VD4, z4, VD2);
876 polyVD1 = fma(polyVD1, z4, VD1);
877 polyVD0 = fma(polyVD0, z4, VD0);
878 polyVD0 = fma(polyVD1, z2, polyVD0);
880 polyVD0 = inv(polyVD0);
882 polyVN1 = fma(VN9, z4, VN7);
883 polyVN0 = fma(VN8, z4, VN6);
884 polyVN1 = fma(polyVN1, z4, VN5);
885 polyVN0 = fma(polyVN0, z4, VN4);
886 polyVN1 = fma(polyVN1, z4, VN3);
887 polyVN0 = fma(polyVN0, z4, VN2);
888 polyVN1 = fma(polyVN1, z4, VN1);
889 polyVN0 = fma(polyVN0, z4, VN0);
890 polyVN0 = fma(polyVN1, z2, polyVN0);
892 return polyVN0 * polyVD0;
896 /*****************************************************************************
897 * Floating point math functions mimicking SIMD versions. *
898 * Double precision data, single precision accuracy. *
899 *****************************************************************************/
902 /*! \brief Calculate 1/sqrt(x) for double, but with single accuracy.
904 * \param x Argument that must be >0. This routine does not check arguments.
905 * \return 1/sqrt(x). Result is undefined if your argument was invalid.
907 * \note This function might be superficially meaningless, but it helps us to
908 * write templated SIMD/non-SIMD code. For clarity it should not be used
909 * outside such code.
911 static inline double invsqrtSingleAccuracy(double x)
913 return invsqrt(static_cast<float>(x));
916 /*! \brief Calculate 1/sqrt(x) for two doubles, but with single accuracy.
918 * \param x0 First argument, x0 must be positive - no argument checking.
919 * \param x1 Second argument, x1 must be positive - no argument checking.
920 * \param[out] out0 Result 1/sqrt(x0)
921 * \param[out] out1 Result 1/sqrt(x1)
923 * \note This function might be superficially meaningless, but it helps us to
924 * write templated SIMD/non-SIMD code. For clarity it should not be used
925 * outside such code.
927 static inline void invsqrtPairSingleAccuracy(double x0, double x1, double* out0, double* out1)
929 *out0 = invsqrt(static_cast<float>(x0));
930 *out1 = invsqrt(static_cast<float>(x1));
933 /*! \brief Calculate 1/x for double, but with single accuracy.
935 * \param x Argument that must be nonzero. This routine does not check arguments.
936 * \return 1/x. Result is undefined if your argument was invalid.
938 * \note This function might be superficially meaningless, but it helps us to
939 * write templated SIMD/non-SIMD code. For clarity it should not be used
940 * outside such code.
942 static inline double invSingleAccuracy(double x)
944 return 1.0F / x;
947 /*! \brief Calculate 1/sqrt(x) for masked entry of double, but with single accuracy.
949 * This routine only evaluates 1/sqrt(x) if mask is true.
950 * Illegal values for a masked-out double will not lead to
951 * floating-point exceptions.
953 * \param x Argument that must be >0 if masked-in.
954 * \param m Mask
955 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
956 * entry was not masked, and 0.0 for masked-out entries.
958 * \note This function might be superficially meaningless, but it helps us to
959 * write templated SIMD/non-SIMD code. For clarity it should not be used
960 * outside such code.
962 static inline double maskzInvsqrtSingleAccuracy(double x, bool m)
964 return m ? invsqrtSingleAccuracy(x) : 0.0;
967 /*! \brief Calculate 1/x for masked entry of double, but with single accuracy.
969 * This routine only evaluates 1/x if mask is true.
970 * Illegal values for a masked-out double will not lead to
971 * floating-point exceptions.
973 * \param x Argument that must be nonzero if masked-in.
974 * \param m Mask
975 * \return 1/x. Result is undefined if your argument was invalid or
976 * entry was not masked, and 0.0 for masked-out entries.
978 * \note This function might be superficially meaningless, but it helps us to
979 * write templated SIMD/non-SIMD code. For clarity it should not be used
980 * outside such code.
982 static inline double maskzInvSingleAccuracy(double x, bool m)
984 return m ? invSingleAccuracy(x) : 0.0;
987 /*! \brief Calculate sqrt(x) for double, but with single accuracy.
989 * \param x Argument that must be >=0.
990 * \return sqrt(x).
992 * \note This function might be superficially meaningless, but it helps us to
993 * write templated SIMD/non-SIMD code. For clarity it should not be used
994 * outside such code.
996 static inline double sqrtSingleAccuracy(double x)
998 return std::sqrt(static_cast<float>(x));
1001 /*! \brief Double log(x), but with single accuracy. This is the natural logarithm.
1003 * \param x Argument, should be >0.
1004 * \result The natural logarithm of x. Undefined if argument is invalid.
1006 * \note This function might be superficially meaningless, but it helps us to
1007 * write templated SIMD/non-SIMD code. For clarity it should not be used
1008 * outside such code.
1010 static inline double logSingleAccuracy(double x)
1012 return std::log(static_cast<float>(x));
1015 /*! \brief Double 2^x, but with single accuracy.
1017 * \param x Argument.
1018 * \result 2^x. Undefined if input argument caused overflow.
1020 * \note This function might be superficially meaningless, but it helps us to
1021 * write templated SIMD/non-SIMD code. For clarity it should not be used
1022 * outside such code.
1024 static inline double exp2SingleAccuracy(double x)
1026 return std::exp2(static_cast<float>(x));
1029 /*! \brief Double exp(x), but with single accuracy.
1031 * \param x Argument.
1032 * \result exp(x). Undefined if input argument caused overflow.
1034 * \note This function might be superficially meaningless, but it helps us to
1035 * write templated SIMD/non-SIMD code. For clarity it should not be used
1036 * outside such code.
1038 static inline double expSingleAccuracy(double x)
1040 return std::exp(static_cast<float>(x));
1043 /*! \brief Double erf(x), but with single accuracy.
1045 * \param x Argument.
1046 * \result erf(x)
1048 * \note This function might be superficially meaningless, but it helps us to
1049 * write templated SIMD/non-SIMD code. For clarity it should not be used
1050 * outside such code.
1052 static inline double erfSingleAccuracy(double x)
1054 return std::erf(static_cast<float>(x));
1057 /*! \brief Double erfc(x), but with single accuracy.
1059 * \param x Argument.
1060 * \result erfc(x)
1062 * \note This function might be superficially meaningless, but it helps us to
1063 * write templated SIMD/non-SIMD code. For clarity it should not be used
1064 * outside such code.
1066 static inline double erfcSingleAccuracy(double x)
1068 return std::erfc(static_cast<float>(x));
1072 /*! \brief Double sin \& cos, but with single accuracy.
1074 * \param x The argument to evaluate sin/cos for
1075 * \param[out] sinval Sin(x)
1076 * \param[out] cosval Cos(x)
1078 * \note This function might be superficially meaningless, but it helps us to
1079 * write templated SIMD/non-SIMD code. For clarity it should not be used
1080 * outside such code.
1082 static inline void sincosSingleAccuracy(double x, double* sinval, double* cosval)
1084 // There is no single-precision sincos guaranteed in C++11, so use
1085 // separate functions and hope the compiler optimizes it for us.
1086 *sinval = std::sin(static_cast<float>(x));
1087 *cosval = std::cos(static_cast<float>(x));
1090 /*! \brief Double sin, but with single accuracy.
1092 * \param x The argument to evaluate sin for
1093 * \result Sin(x)
1095 * \note This function might be superficially meaningless, but it helps us to
1096 * write templated SIMD/non-SIMD code. For clarity it should not be used
1097 * outside such code.
1099 static inline double sinSingleAccuracy(double x)
1101 return std::sin(static_cast<float>(x));
1104 /*! \brief Double cos, but with single accuracy.
1106 * \param x The argument to evaluate cos for
1107 * \result Cos(x)
1109 * \note This function might be superficially meaningless, but it helps us to
1110 * write templated SIMD/non-SIMD code. For clarity it should not be used
1111 * outside such code.
1113 static inline double cosSingleAccuracy(double x)
1115 return std::cos(static_cast<float>(x));
1118 /*! \brief Double tan, but with single accuracy.
1120 * \param x The argument to evaluate tan for
1121 * \result Tan(x)
1123 * \note This function might be superficially meaningless, but it helps us to
1124 * write templated SIMD/non-SIMD code. For clarity it should not be used
1125 * outside such code.
1127 static inline double tanSingleAccuracy(double x)
1129 return std::tan(static_cast<float>(x));
1132 /*! \brief Double asin, but with single accuracy.
1134 * \param x The argument to evaluate asin for
1135 * \result Asin(x)
1137 * \note This function might be superficially meaningless, but it helps us to
1138 * write templated SIMD/non-SIMD code. For clarity it should not be used
1139 * outside such code.
1141 static inline double asinSingleAccuracy(double x)
1143 return std::asin(static_cast<float>(x));
1146 /*! \brief Double acos, but with single accuracy.
1148 * \param x The argument to evaluate acos for
1149 * \result Acos(x)
1151 * \note This function might be superficially meaningless, but it helps us to
1152 * write templated SIMD/non-SIMD code. For clarity it should not be used
1153 * outside such code.
1155 static inline double acosSingleAccuracy(double x)
1157 return std::acos(static_cast<float>(x));
1160 /*! \brief Double atan, but with single accuracy.
1162 * \param x The argument to evaluate atan for
1163 * \result Atan(x)
1165 * \note This function might be superficially meaningless, but it helps us to
1166 * write templated SIMD/non-SIMD code. For clarity it should not be used
1167 * outside such code.
1169 static inline double atanSingleAccuracy(double x)
1171 return std::atan(static_cast<float>(x));
1174 /*! \brief Double atan2(y,x), but with single accuracy.
1176 * \param y Y component of vector, any quartile
1177 * \param x X component of vector, any quartile
1178 * \result Atan(y,x)
1180 * \note This function might be superficially meaningless, but it helps us to
1181 * write templated SIMD/non-SIMD code. For clarity it should not be used
1182 * outside such code.
1184 static inline double atan2SingleAccuracy(double y, double x)
1186 return std::atan2(static_cast<float>(y), static_cast<float>(x));
1189 /*! \brief Force correction due to PME in double, but with single accuracy.
1191 * See the SIMD version of this function for details.
1193 * \param z2 input parameter
1194 * \returns Correction to use on force
1196 * \note This function might be superficially meaningless, but it helps us to
1197 * write templated SIMD/non-SIMD code. For clarity it should not be used
1198 * outside such code.
1200 static inline double pmeForceCorrectionSingleAccuracy(double z2)
1202 const float FN6(-1.7357322914161492954e-8F);
1203 const float FN5(1.4703624142580877519e-6F);
1204 const float FN4(-0.000053401640219807709149F);
1205 const float FN3(0.0010054721316683106153F);
1206 const float FN2(-0.019278317264888380590F);
1207 const float FN1(0.069670166153766424023F);
1208 const float FN0(-0.75225204789749321333F);
1210 const float FD4(0.0011193462567257629232F);
1211 const float FD3(0.014866955030185295499F);
1212 const float FD2(0.11583842382862377919F);
1213 const float FD1(0.50736591960530292870F);
1214 const float FD0(1.0F);
1216 float z4;
1217 float polyFN0, polyFN1, polyFD0, polyFD1;
1219 float z2f = z2;
1221 z4 = z2f * z2f;
1223 polyFD0 = fma(FD4, z4, FD2);
1224 polyFD1 = fma(FD3, z4, FD1);
1225 polyFD0 = fma(polyFD0, z4, FD0);
1226 polyFD0 = fma(polyFD1, z2f, polyFD0);
1228 polyFN0 = fma(FN6, z4, FN4);
1229 polyFN1 = fma(FN5, z4, FN3);
1230 polyFN0 = fma(polyFN0, z4, FN2);
1231 polyFN1 = fma(polyFN1, z4, FN1);
1232 polyFN0 = fma(polyFN0, z4, FN0);
1233 polyFN0 = fma(polyFN1, z2f, polyFN0);
1235 return polyFN0 / polyFD0;
1238 /*! \brief Potential correction due to PME in double, but with single accuracy.
1240 * See the SIMD version of this function for details.
1242 * \param z2 input parameter
1243 * \returns Correction to use on potential.
1245 * \note This function might be superficially meaningless, but it helps us to
1246 * write templated SIMD/non-SIMD code. For clarity it should not be used
1247 * outside such code.
1249 static inline double pmePotentialCorrectionSingleAccuracy(double z2)
1251 const float VN6(1.9296833005951166339e-8F);
1252 const float VN5(-1.4213390571557850962e-6F);
1253 const float VN4(0.000041603292906656984871F);
1254 const float VN3(-0.00013134036773265025626F);
1255 const float VN2(0.038657983986041781264F);
1256 const float VN1(0.11285044772717598220F);
1257 const float VN0(1.1283802385263030286F);
1259 const float VD3(0.0066752224023576045451F);
1260 const float VD2(0.078647795836373922256F);
1261 const float VD1(0.43336185284710920150F);
1262 const float VD0(1.0F);
1264 float z4;
1265 float polyVN0, polyVN1, polyVD0, polyVD1;
1267 float z2f = z2;
1269 z4 = z2f * z2f;
1271 polyVD1 = fma(VD3, z4, VD1);
1272 polyVD0 = fma(VD2, z4, VD0);
1273 polyVD0 = fma(polyVD1, z2f, polyVD0);
1275 polyVN0 = fma(VN6, z4, VN4);
1276 polyVN1 = fma(VN5, z4, VN3);
1277 polyVN0 = fma(polyVN0, z4, VN2);
1278 polyVN1 = fma(polyVN1, z4, VN1);
1279 polyVN0 = fma(polyVN0, z4, VN0);
1280 polyVN0 = fma(polyVN1, z2f, polyVN0);
1282 return polyVN0 / polyVD0;
1286 } // namespace gmx
1289 #endif // GMX_SIMD_SCALAR_MATH_H