src/gromacs/simd/impl_reference/impl_reference_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
  37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
  38
  39 /*! \libinternal \file
  40  *
  41  * \brief Reference implementation, SIMD double precision.
  42  *
  43  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  44  *
  45  * \ingroup module_simd
  46  */
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cmath>
  52 #include <cstddef>
  53 #include <cstdint>
  54
  55 #include <algorithm>
  56 #include <array>
  57
  58 #include "gromacs/math/utilities.h"
  59 #include "gromacs/utility/fatalerror.h"
  60
  61 #include "impl_reference_definitions.h"
  62 #include "impl_reference_simd_float.h"
  63
  64 namespace gmx
  65 {
  66
  67 /*! \cond libapi */
  68 /*! \addtogroup module_simd */
  69 /*! \{ */
  70
  71 /*! \name SIMD implementation data types
  72  * \{
  73  */
  74
  75 /*! \libinternal \brief Double SIMD variable. Available if GMX_SIMD_HAVE_DOUBLE is 1.
  76  *
  77  * \note This variable cannot be placed inside other structures or classes, since
  78  *       some compilers (including at least clang-3.7) appear to lose the
  79  *       alignment. This is likely particularly severe when allocating such
  80  *       memory on the heap, but it occurs for stack structures too.
  81  */
  82 class SimdDouble
  83 {
  84 public:
  85     SimdDouble() {}
  86
  87     //! \brief Construct from scalar
  88     SimdDouble(double d) { simdInternal_.fill(d); }
  89
  90     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
  91      *
  92      * This has to be public to enable usage in combination with static inline
  93      * functions, but it should never, EVER, be accessed by any code outside
  94      * the corresponding implementation directory since the type will depend
  95      * on the architecture.
  96      */
  97     std::array<double, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
  98 };
  99
 100 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from double.
 101  *
 102  * Available if GMX_SIMD_HAVE_DOUBLE is 1.
 103  *
 104  * \note The integer SIMD type will always be available, but on architectures
 105  * that do not have any real integer SIMD support it might be defined as the
 106  * floating-point type. This will work fine, since there are separate defines
 107  * for whether the implementation can actually do any operations on integer
 108  * SIMD types.
 109  *
 110  * \note This variable cannot be placed inside other structures or classes, since
 111  *       some compilers (including at least clang-3.7) appear to lose the
 112  *       alignment. This is likely particularly severe when allocating such
 113  *       memory on the heap, but it occurs for stack structures too.
 114  */
 115 class SimdDInt32
 116 {
 117 public:
 118     SimdDInt32() {}
 119
 120     //! \brief Construct from scalar
 121     SimdDInt32(std::int32_t i) { simdInternal_.fill(i); }
 122
 123     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 124      *
 125      * This has to be public to enable usage in combination with static inline
 126      * functions, but it should never, EVER, be accessed by any code outside
 127      * the corresponding implementation directory since the type will depend
 128      * on the architecture.
 129      */
 130     std::array<std::int32_t, GMX_SIMD_DINT32_WIDTH> simdInternal_;
 131 };
 132
 133 /*! \libinternal \brief Boolean type for double SIMD data.
 134  *
 135  *  Available if GMX_SIMD_HAVE_DOUBLE is 1.
 136  *
 137  * \note This variable cannot be placed inside other structures or classes, since
 138  *       some compilers (including at least clang-3.7) appear to lose the
 139  *       alignment. This is likely particularly severe when allocating such
 140  *       memory on the heap, but it occurs for stack structures too.
 141  */
 142 class SimdDBool
 143 {
 144 public:
 145     SimdDBool() {}
 146
 147     //! \brief Construct from scalar bool
 148     SimdDBool(bool b) { simdInternal_.fill(b); }
 149
 150     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 151      *
 152      * This has to be public to enable usage in combination with static inline
 153      * functions, but it should never, EVER, be accessed by any code outside
 154      * the corresponding implementation directory since the type will depend
 155      * on the architecture.
 156      */
 157     std::array<bool, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
 158 };
 159
 160 /*! \libinternal \brief Boolean type for integer datatypes corresponding to double SIMD.
 161  *
 162  * Available if GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
 163  *
 164  * \note This variable cannot be placed inside other structures or classes, since
 165  *       some compilers (including at least clang-3.7) appear to lose the
 166  *       alignment. This is likely particularly severe when allocating such
 167  *       memory on the heap, but it occurs for stack structures too.
 168  */
 169 class SimdDIBool
 170 {
 171 public:
 172     SimdDIBool() {}
 173
 174     //! \brief Construct from scalar
 175     SimdDIBool(bool b) { simdInternal_.fill(b); }
 176
 177     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 178      *
 179      * This has to be public to enable usage in combination with static inline
 180      * functions, but it should never, EVER, be accessed by any code outside
 181      * the corresponding implementation directory since the type will depend
 182      * on the architecture.
 183      */
 184     std::array<bool, GMX_SIMD_DINT32_WIDTH> simdInternal_;
 185 };
 186
 187 /*! \}
 188  *
 189  * \name SIMD implementation load/store operations for double precision floating point
 190  * \{
 191  */
 192
 193 /*! \brief Load \ref GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.
 194  *
 195  * \param m Pointer to memory aligned to the SIMD width.
 196  * \return SIMD variable with data loaded.
 197  */
 198 static inline SimdDouble gmx_simdcall simdLoad(const double* m, SimdDoubleTag = {})
 199 {
 200     SimdDouble a;
 201
 202     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
 203
 204     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 205     return a;
 206 }
 207
 208 /*! \brief Store the contents of SIMD double variable to aligned memory m.
 209  *
 210  * \param[out] m Pointer to memory, aligned to SIMD width.
 211  * \param a SIMD variable to store
 212  */
 213 static inline void gmx_simdcall store(double* m, SimdDouble a)
 214 {
 215     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
 216
 217     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 218 }
 219
 220 /*! \brief Load SIMD double from unaligned memory.
 221  *
 222  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 223  *
 224  * \param m Pointer to memory, no alignment requirement.
 225  * \return SIMD variable with data loaded.
 226  */
 227 static inline SimdDouble gmx_simdcall simdLoadU(const double* m, SimdDoubleTag = {})
 228 {
 229     SimdDouble a;
 230     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 231     return a;
 232 }
 233
 234 /*! \brief Store SIMD double to unaligned memory.
 235  *
 236  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 237  *
 238  * \param[out] m Pointer to memory, no alignment requirement.
 239  * \param a SIMD variable to store.
 240  */
 241 static inline void gmx_simdcall storeU(double* m, SimdDouble a)
 242 {
 243     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 244 }
 245
 246 /*! \brief Set all SIMD double variable elements to 0.0.
 247  *
 248  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 249  * internally to handle all types rather than adding the suffix used here.
 250  *
 251  * \return SIMD 0.0
 252  */
 253 static inline SimdDouble gmx_simdcall setZeroD()
 254 {
 255     return SimdDouble(0.0);
 256 }
 257
 258 /*! \}
 259  *
 260  * \name SIMD implementation load/store operations for integers (corresponding to double)
 261  * \{
 262  */
 263
 264 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 265  *
 266  * You should typically just call \ref gmx::load(), which uses proxy objects
 267  * internally to handle all types rather than adding the suffix used here.
 268  *
 269  * \param m Pointer to memory, aligned to (double) integer SIMD width.
 270  * \return SIMD integer variable.
 271  */
 272 static inline SimdDInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdDInt32Tag)
 273 {
 274     SimdDInt32 a;
 275
 276     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
 277
 278     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 279     return a;
 280 };
 281
 282 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 283  *
 284  * \param m Memory aligned to (double) integer SIMD width.
 285  * \param a SIMD (double) integer variable to store.
 286  */
 287 static inline void gmx_simdcall store(std::int32_t* m, SimdDInt32 a)
 288 {
 289     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(std::int32_t)) == 0);
 290
 291     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 292 };
 293
 294 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdDouble.
 295  *
 296  * You should typically just call \ref gmx::loadU(), which uses proxy objects
 297  * internally to handle all types rather than adding the suffix used here.
 298  *
 299  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 300  *
 301  * \param m Pointer to memory, no alignment requirements.
 302  * \return SIMD integer variable.
 303  */
 304 static inline SimdDInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdDInt32Tag)
 305 {
 306     SimdDInt32 a;
 307     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 308     return a;
 309 }
 310
 311 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 312  *
 313  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 314  *
 315  * \param m Memory pointer, no alignment requirements.
 316  * \param a SIMD (double) integer variable to store.
 317  */
 318 static inline void gmx_simdcall storeU(std::int32_t* m, SimdDInt32 a)
 319 {
 320     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 321 }
 322
 323 /*! \brief Set all SIMD (double) integer variable elements to 0.
 324  *
 325  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 326  * internally to handle all types rather than adding the suffix used here.
 327  *
 328  * \return SIMD 0
 329  */
 330 static inline SimdDInt32 gmx_simdcall setZeroDI()
 331 {
 332     return SimdDInt32(0);
 333 }
 334
 335 /*! \brief Extract element with index i from \ref gmx::SimdDInt32.
 336  *
 337  * Available if \ref GMX_SIMD_HAVE_DINT32_EXTRACT is 1.
 338  *
 339  * \tparam index Compile-time constant, position to extract (first position is 0)
 340  * \param  a     SIMD variable from which to extract value.
 341  * \return Single integer from position index in SIMD variable.
 342  */
 343 template<int index>
 344 static inline std::int32_t gmx_simdcall extract(SimdDInt32 a)
 345 {
 346     return a.simdInternal_[index];
 347 }
 348
 349 /*! \}
 350  *
 351  * \name SIMD implementation double precision floating-point bitwise logical operations
 352  * \{
 353  */
 354
 355 /*! \brief Bitwise and for two SIMD double variables.
 356  *
 357  * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 358  *
 359  * \param a data1
 360  * \param b data2
 361  * \return data1 & data2
 362  */
 363 static inline SimdDouble gmx_simdcall operator&(SimdDouble a, SimdDouble b)
 364 {
 365     SimdDouble res;
 366
 367     union {
 368         double       r;
 369         std::int64_t i;
 370     } conv1, conv2;
 371
 372     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 373     {
 374         conv1.r              = a.simdInternal_[i];
 375         conv2.r              = b.simdInternal_[i];
 376         conv1.i              = conv1.i & conv2.i;
 377         res.simdInternal_[i] = conv1.r;
 378     }
 379     return res;
 380 }
 381
 382 /*! \brief Bitwise andnot for SIMD double.
 383  *
 384  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 385  *
 386  * \param a data1
 387  * \param b data2
 388  * \return (~data1) & data2
 389  */
 390 static inline SimdDouble gmx_simdcall andNot(SimdDouble a, SimdDouble b)
 391 {
 392     SimdDouble res;
 393
 394     union {
 395         double       r;
 396         std::int64_t i;
 397     } conv1, conv2;
 398
 399     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 400     {
 401         conv1.r              = a.simdInternal_[i];
 402         conv2.r              = b.simdInternal_[i];
 403         conv1.i              = ~conv1.i & conv2.i;
 404         res.simdInternal_[i] = conv1.r;
 405     }
 406     return res;
 407 }
 408
 409 /*! \brief Bitwise or for SIMD double.
 410  *
 411  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 412  *
 413  * \param a data1
 414  * \param b data2
 415  * \return data1 | data2
 416  */
 417 static inline SimdDouble gmx_simdcall operator|(SimdDouble a, SimdDouble b)
 418 {
 419     SimdDouble res;
 420
 421     union {
 422         double       r;
 423         std::int64_t i;
 424     } conv1, conv2;
 425
 426     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 427     {
 428         conv1.r              = a.simdInternal_[i];
 429         conv2.r              = b.simdInternal_[i];
 430         conv1.i              = conv1.i | conv2.i;
 431         res.simdInternal_[i] = conv1.r;
 432     }
 433     return res;
 434 }
 435
 436 /*! \brief Bitwise xor for SIMD double.
 437  *
 438  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 439  *
 440  * \param a data1
 441  * \param b data2
 442  * \return data1 ^ data2
 443  */
 444 static inline SimdDouble gmx_simdcall operator^(SimdDouble a, SimdDouble b)
 445 {
 446     SimdDouble res;
 447
 448     union {
 449         double       r;
 450         std::int64_t i;
 451     } conv1, conv2;
 452
 453     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 454     {
 455         conv1.r              = a.simdInternal_[i];
 456         conv2.r              = b.simdInternal_[i];
 457         conv1.i              = conv1.i ^ conv2.i;
 458         res.simdInternal_[i] = conv1.r;
 459     }
 460     return res;
 461 }
 462
 463 /*! \}
 464  *
 465  * \name SIMD implementation double precision floating-point arithmetics
 466  * \{
 467  */
 468
 469 /*! \brief Add two double SIMD variables.
 470  *
 471  * \param a term1
 472  * \param b term2
 473  * \return a+b
 474  */
 475 static inline SimdDouble gmx_simdcall operator+(SimdDouble a, SimdDouble b)
 476 {
 477     SimdDouble res;
 478
 479     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 480     {
 481         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
 482     }
 483     return res;
 484 }
 485
 486 /*! \brief Subtract two double SIMD variables.
 487  *
 488  * \param a term1
 489  * \param b term2
 490  * \return a-b
 491  */
 492 static inline SimdDouble gmx_simdcall operator-(SimdDouble a, SimdDouble b)
 493 {
 494     SimdDouble res;
 495
 496     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 497     {
 498         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
 499     }
 500     return res;
 501 }
 502
 503 /*! \brief SIMD double precision negate.
 504  *
 505  * \param a SIMD double precision value
 506  * \return -a
 507  */
 508 static inline SimdDouble gmx_simdcall operator-(SimdDouble a)
 509 {
 510     SimdDouble res;
 511
 512     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 513     {
 514         res.simdInternal_[i] = -a.simdInternal_[i];
 515     }
 516     return res;
 517 }
 518
 519 /*! \brief Multiply two double SIMD variables.
 520  *
 521  * \param a factor1
 522  * \param b factor2
 523  * \return a*b.
 524  */
 525 static inline SimdDouble gmx_simdcall operator*(SimdDouble a, SimdDouble b)
 526 {
 527     SimdDouble res;
 528
 529     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 530     {
 531         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
 532     }
 533     return res;
 534 }
 535
 536 /*! \brief SIMD double Fused-multiply-add. Result is a*b+c.
 537  *
 538  * \param a factor1
 539  * \param b factor2
 540  * \param c term
 541  * \return a*b+c
 542  */
 543 static inline SimdDouble gmx_simdcall fma(SimdDouble a, SimdDouble b, SimdDouble c)
 544 {
 545     return a * b + c;
 546 }
 547
 548 /*! \brief SIMD double Fused-multiply-subtract. Result is a*b-c.
 549  *
 550  * \param a factor1
 551  * \param b factor2
 552  * \param c term
 553  * \return a*b-c
 554  */
 555 static inline SimdDouble gmx_simdcall fms(SimdDouble a, SimdDouble b, SimdDouble c)
 556 {
 557     return a * b - c;
 558 }
 559
 560 /*! \brief SIMD double Fused-negated-multiply-add. Result is -a*b+c.
 561  *
 562  * \param a factor1
 563  * \param b factor2
 564  * \param c term
 565  * \return -a*b+c
 566  */
 567 static inline SimdDouble gmx_simdcall fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 568 {
 569     return c - a * b;
 570 }
 571
 572 /*! \brief SIMD double Fused-negated-multiply-subtract. Result is -a*b-c.
 573  *
 574  * \param a factor1
 575  * \param b factor2
 576  * \param c term
 577  * \return -a*b-c
 578  */
 579 static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 580 {
 581     return -a * b - c;
 582 }
 583
 584 /*! \brief double SIMD 1.0/sqrt(x) lookup.
 585  *
 586  * This is a low-level instruction that should only be called from routines
 587  * implementing the inverse square root in simd_math.h.
 588  *
 589  * \param x Argument, x>0
 590  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 591  */
 592 static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
 593 {
 594     SimdDouble res;
 595
 596     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 597     {
 598         // sic - we only use single precision for the lookup
 599         res.simdInternal_[i] = 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]));
 600     }
 601     return res;
 602 };
 603
 604 /*! \brief SIMD double 1.0/x lookup.
 605  *
 606  * This is a low-level instruction that should only be called from routines
 607  * implementing the reciprocal in simd_math.h.
 608  *
 609  * \param x Argument, x!=0
 610  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 611  */
 612 static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
 613 {
 614     SimdDouble res;
 615
 616     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 617     {
 618         // sic - we only use single precision for the lookup
 619         res.simdInternal_[i] = 1.0F / static_cast<float>(x.simdInternal_[i]);
 620     }
 621     return res;
 622 };
 623
 624 /*! \brief Add two double SIMD variables, masked version.
 625  *
 626  * \param a term1
 627  * \param b term2
 628  * \param m mask
 629  * \return a+b where mask is true, 0.0 otherwise.
 630  */
 631 static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 632 {
 633     SimdDouble res;
 634
 635     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 636     {
 637         res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0);
 638     }
 639     return res;
 640 }
 641
 642 /*! \brief Multiply two double SIMD variables, masked version.
 643  *
 644  * \param a factor1
 645  * \param b factor2
 646  * \param m mask
 647  * \return a*b where mask is true, 0.0 otherwise.
 648  */
 649 static inline SimdDouble gmx_simdcall maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 650 {
 651     SimdDouble res;
 652
 653     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 654     {
 655         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0;
 656     }
 657     return res;
 658 }
 659
 660 /*! \brief SIMD double fused multiply-add, masked version.
 661  *
 662  * \param a factor1
 663  * \param b factor2
 664  * \param c term
 665  * \param m mask
 666  * \return a*b+c where mask is true, 0.0 otherwise.
 667  */
 668 static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 669 {
 670     SimdDouble res;
 671
 672     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 673     {
 674         res.simdInternal_[i] =
 675                 m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0;
 676     }
 677     return res;
 678 }
 679
 680 /*! \brief SIMD double 1.0/sqrt(x) lookup, masked version.
 681  *
 682  * This is a low-level instruction that should only be called from routines
 683  * implementing the inverse square root in simd_math.h.
 684  *
 685  * \param x Argument, x>0 for entries where mask is true.
 686  * \param m Mask
 687  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 688  *         The result for masked-out entries will be 0.0.
 689  */
 690 static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
 691 {
 692     SimdDouble res;
 693
 694     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 695     {
 696         // sic - we only use single precision for the lookup
 697         res.simdInternal_[i] = (m.simdInternal_[i] != 0)
 698                                        ? 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]))
 699                                        : 0.0;
 700     }
 701     return res;
 702 }
 703
 704 /*! \brief SIMD double 1.0/x lookup, masked version.
 705  *
 706  * This is a low-level instruction that should only be called from routines
 707  * implementing the reciprocal in simd_math.h.
 708  *
 709  * \param x Argument, x>0 for entries where mask is true.
 710  * \param m Mask
 711  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 712  *         The result for masked-out entries will be 0.0.
 713  */
 714 static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
 715 {
 716     SimdDouble res;
 717
 718     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 719     {
 720         res.simdInternal_[i] =
 721                 (m.simdInternal_[i] != 0) ? 1.0F / static_cast<float>(x.simdInternal_[i]) : 0.0;
 722     }
 723     return res;
 724 }
 725
 726 /*! \brief SIMD double floating-point fabs().
 727  *
 728  * \param a any floating point values
 729  * \return fabs(a) for each element.
 730  */
 731 static inline SimdDouble gmx_simdcall abs(SimdDouble a)
 732 {
 733     SimdDouble res;
 734
 735     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 736     {
 737         res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
 738     }
 739     return res;
 740 }
 741
 742 /*! \brief Set each SIMD double element to the largest from two variables.
 743  *
 744  * \param a Any floating-point value
 745  * \param b Any floating-point value
 746  * \return max(a,b) for each element.
 747  */
 748 static inline SimdDouble gmx_simdcall max(SimdDouble a, SimdDouble b)
 749 {
 750     SimdDouble res;
 751
 752     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 753     {
 754         res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
 755     }
 756     return res;
 757 }
 758
 759 /*! \brief Set each SIMD double element to the smallest from two variables.
 760  *
 761  * \param a Any floating-point value
 762  * \param b Any floating-point value
 763  * \return min(a,b) for each element.
 764  */
 765 static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b)
 766 {
 767     SimdDouble res;
 768
 769     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 770     {
 771         res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
 772     }
 773     return res;
 774 }
 775
 776 /*! \brief SIMD double round to nearest integer value (in floating-point format).
 777  *
 778  * \param a Any floating-point value
 779  * \return The nearest integer, represented in floating-point format.
 780  *
 781  * \note Round mode is implementation defined. The only guarantee is that it
 782  * is consistent between rounding functions (round, cvtR2I).
 783  */
 784 static inline SimdDouble gmx_simdcall round(SimdDouble a)
 785 {
 786     SimdDouble res;
 787
 788     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 789     {
 790         res.simdInternal_[i] = std::round(a.simdInternal_[i]);
 791     }
 792     return res;
 793 }
 794
 795 /*! \brief Truncate SIMD double, i.e. round towards zero - common hardware instruction.
 796  *
 797  * \param a Any floating-point value
 798  * \return Integer rounded towards zero, represented in floating-point format.
 799  *
 800  * \note This is truncation towards zero, not floor(). The reason for this
 801  * is that truncation is virtually always present as a dedicated hardware
 802  * instruction, but floor() frequently isn't.
 803  */
 804 static inline SimdDouble gmx_simdcall trunc(SimdDouble a)
 805 {
 806     SimdDouble res;
 807
 808     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 809     {
 810         res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
 811     }
 812     return res;
 813 }
 814
 815 /*! \brief Extract (integer) exponent and fraction from double precision SIMD.
 816  *
 817  * \tparam      opt       By default this function behaves like the standard
 818  *                        library such that frexp(+-0,exp) returns +-0 and
 819  *                        stores 0 in the exponent when value is 0. If you
 820  *                        know the argument is always nonzero, you can set
 821  *                        the template parameter to MathOptimization::Unsafe
 822  *                        to make it slightly faster.
 823  *
 824  * \param       value     Floating-point value to extract from
 825  * \param[out]  exponent  Returned exponent of value, integer SIMD format.
 826  * \return      Fraction of value, floating-point SIMD format.
 827  */
 828 template<MathOptimization opt = MathOptimization::Safe>
 829 static inline SimdDouble gmx_simdcall frexp(SimdDouble value, SimdDInt32* exponent)
 830 {
 831     SimdDouble fraction;
 832
 833     for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
 834     {
 835         fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
 836     }
 837     return fraction;
 838 }
 839
 840 /*! \brief Multiply a SIMD double value by the number 2 raised to an exp power.
 841  *
 842  * \tparam opt By default, this routine will return zero for input arguments
 843  *             that are so small they cannot be reproduced in the current
 844  *             precision. If the unsafe math optimization template parameter
 845  *             setting is used, these tests are skipped, and the result will
 846  *             be undefined (possible even NaN). This might happen below -127
 847  *             in single precision or -1023 in double, although some
 848  *             might use denormal support to extend the range.
 849  *
 850  * \param value Floating-point number to multiply with new exponent
 851  * \param exponent Integer that will not overflow as 2^exponent.
 852  * \return value*2^exponent
 853  */
 854 template<MathOptimization opt = MathOptimization::Safe>
 855 static inline SimdDouble gmx_simdcall ldexp(SimdDouble value, SimdDInt32 exponent)
 856 {
 857     SimdDouble res;
 858
 859     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 860     {
 861         // std::ldexp already takes care of clamping arguments, so we do not
 862         // need to do anything in the reference implementation
 863         res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
 864     }
 865     return res;
 866 }
 867
 868 /*! \brief Return sum of all elements in SIMD double variable.
 869  *
 870  * \param a SIMD variable to reduce/sum.
 871  * \return The sum of all elements in the argument variable.
 872  *
 873  */
 874 static inline double gmx_simdcall reduce(SimdDouble a)
 875 {
 876     double sum = 0.0;
 877
 878     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 879     {
 880         sum += a.simdInternal_[i];
 881     }
 882     return sum;
 883 }
 884
 885 /*! \}
 886  *
 887  * \name SIMD implementation double precision floating-point comparison, boolean, selection.
 888  * \{
 889  */
 890
 891 /*! \brief SIMD a==b for double SIMD.
 892  *
 893  * \param a value1
 894  * \param b value2
 895  * \return Each element of the boolean will be set to true if a==b.
 896  *
 897  * Beware that exact floating-point comparisons are difficult.
 898  */
 899 static inline SimdDBool gmx_simdcall operator==(SimdDouble a, SimdDouble b)
 900 {
 901     SimdDBool res;
 902
 903     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 904     {
 905         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
 906     }
 907     return res;
 908 }
 909
 910 /*! \brief SIMD a!=b for double SIMD.
 911  *
 912  * \param a value1
 913  * \param b value2
 914  * \return Each element of the boolean will be set to true if a!=b.
 915  *
 916  * Beware that exact floating-point comparisons are difficult.
 917  */
 918 static inline SimdDBool gmx_simdcall operator!=(SimdDouble a, SimdDouble b)
 919 {
 920     SimdDBool res;
 921
 922     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 923     {
 924         res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
 925     }
 926     return res;
 927 }
 928
 929 /*! \brief SIMD a<b for double SIMD.
 930  *
 931  * \param a value1
 932  * \param b value2
 933  * \return Each element of the boolean will be set to true if a<b.
 934  */
 935 static inline SimdDBool gmx_simdcall operator<(SimdDouble a, SimdDouble b)
 936 {
 937     SimdDBool res;
 938
 939     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 940     {
 941         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
 942     }
 943     return res;
 944 }
 945
 946 /*! \brief SIMD a<=b for double SIMD.
 947  *
 948  * \param a value1
 949  * \param b value2
 950  * \return Each element of the boolean will be set to true if a<=b.
 951  */
 952 static inline SimdDBool gmx_simdcall operator<=(SimdDouble a, SimdDouble b)
 953 {
 954     SimdDBool res;
 955
 956     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 957     {
 958         res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
 959     }
 960     return res;
 961 }
 962
 963 /*! \brief Return true if any bits are set in the single precision SIMD.
 964  *
 965  * This function is used to handle bitmasks, mainly for exclusions in the
 966  * inner kernels. Note that it will return true even for -0.0 (sign bit set),
 967  * so it is not identical to not-equal.
 968  *
 969  * \param a value
 970  * \return Each element of the boolean will be true if any bit in a is nonzero.
 971  */
 972 static inline SimdDBool gmx_simdcall testBits(SimdDouble a)
 973 {
 974     SimdDBool res;
 975
 976     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 977     {
 978         union {
 979             std::uint64_t i;
 980             double        d;
 981         } conv;
 982
 983         conv.d               = a.simdInternal_[i];
 984         res.simdInternal_[i] = (conv.i != 0);
 985     }
 986     return res;
 987 }
 988
 989 /*! \brief Logical \a and on double precision SIMD booleans.
 990  *
 991  * \param a logical vars 1
 992  * \param b logical vars 2
 993  * \return For each element, the result boolean is true if a \& b are true.
 994  *
 995  * \note This is not necessarily a bitwise operation - the storage format
 996  * of booleans is implementation-dependent.
 997  */
 998 static inline SimdDBool gmx_simdcall operator&&(SimdDBool a, SimdDBool b)
 999 {
1000     SimdDBool res;
1001
1002     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1003     {
1004         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1005     }
1006     return res;
1007 }
1008
1009 /*! \brief Logical \a or on double precision SIMD booleans.
1010  *
1011  * \param a logical vars 1
1012  * \param b logical vars 2
1013  * \return For each element, the result boolean is true if a or b is true.
1014  *
1015  * Note that this is not necessarily a bitwise operation - the storage format
1016  * of booleans is implementation-dependent.
1017  *
1018  \ */
1019 static inline SimdDBool gmx_simdcall operator||(SimdDBool a, SimdDBool b)
1020 {
1021     SimdDBool res;
1022
1023     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1024     {
1025         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1026     }
1027     return res;
1028 }
1029
1030 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1031  *
1032  * \param a Logical variable.
1033  * \return true if any element in a is true, otherwise false.
1034  *
1035  * The actual return value for truth will depend on the architecture,
1036  * so any non-zero value is considered truth.
1037  */
1038 static inline bool gmx_simdcall anyTrue(SimdDBool a)
1039 {
1040     bool res = false;
1041
1042     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1043     {
1044         res = res || a.simdInternal_[i];
1045     }
1046     return res;
1047 }
1048
1049 /*! \brief Select from double precision SIMD variable where boolean is true.
1050  *
1051  * \param a Floating-point variable to select from
1052  * \param mask Boolean selector
1053  * \return  For each element, a is selected for true, 0 for false.
1054  */
1055 static inline SimdDouble gmx_simdcall selectByMask(SimdDouble a, SimdDBool mask)
1056 {
1057     SimdDouble res;
1058
1059     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1060     {
1061         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
1062     }
1063     return res;
1064 }
1065
1066 /*! \brief Select from double precision SIMD variable where boolean is false.
1067  *
1068  * \param a Floating-point variable to select from
1069  * \param mask Boolean selector
1070  * \return  For each element, a is selected for false, 0 for true (sic).
1071  */
1072 static inline SimdDouble gmx_simdcall selectByNotMask(SimdDouble a, SimdDBool mask)
1073 {
1074     SimdDouble res;
1075
1076     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1077     {
1078         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
1079     }
1080     return res;
1081 }
1082
1083 /*! \brief Vector-blend SIMD double selection.
1084  *
1085  * \param a First source
1086  * \param b Second source
1087  * \param sel Boolean selector
1088  * \return For each element, select b if sel is true, a otherwise.
1089  */
1090 static inline SimdDouble gmx_simdcall blend(SimdDouble a, SimdDouble b, SimdDBool sel)
1091 {
1092     SimdDouble res;
1093
1094     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1095     {
1096         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1097     }
1098     return res;
1099 }
1100
1101 /*! \}
1102  *
1103  * \name SIMD implementation integer (corresponding to double) bitwise logical operations
1104  * \{
1105  */
1106
1107 /*! \brief Integer SIMD bitwise and.
1108  *
1109  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1110  *
1111  * \note You can \a not use this operation directly to select based on a boolean
1112  * SIMD variable, since booleans are separate from integer SIMD. If that
1113  * is what you need, have a look at \ref gmx::selectByMask instead.
1114  *
1115  * \param a first integer SIMD
1116  * \param b second integer SIMD
1117  * \return a \& b (bitwise and)
1118  */
1119 static inline SimdDInt32 gmx_simdcall operator&(SimdDInt32 a, SimdDInt32 b)
1120 {
1121     SimdDInt32 res;
1122
1123     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1124     {
1125         res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1126     }
1127     return res;
1128 }
1129
1130 /*! \brief Integer SIMD bitwise not/complement.
1131  *
1132  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1133  *
1134  * \note You can \a not use this operation directly to select based on a boolean
1135  * SIMD variable, since booleans are separate from integer SIMD. If that
1136  * is what you need, have a look at \ref gmx::selectByMask instead.
1137  *
1138  * \param a integer SIMD
1139  * \param b integer SIMD
1140  * \return (~a) & b
1141  */
1142 static inline SimdDInt32 gmx_simdcall andNot(SimdDInt32 a, SimdDInt32 b)
1143 {
1144     SimdDInt32 res;
1145
1146     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1147     {
1148         res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1149     }
1150     return res;
1151 }
1152
1153 /*! \brief Integer SIMD bitwise or.
1154  *
1155  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1156  *
1157  * \param a first integer SIMD
1158  * \param b second integer SIMD
1159  * \return a \| b (bitwise or)
1160  */
1161 static inline SimdDInt32 gmx_simdcall operator|(SimdDInt32 a, SimdDInt32 b)
1162 {
1163     SimdDInt32 res;
1164
1165     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1166     {
1167         res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1168     }
1169     return res;
1170 }
1171
1172 /*! \brief Integer SIMD bitwise xor.
1173  *
1174  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1175  *
1176  * \param a first integer SIMD
1177  * \param b second integer SIMD
1178  * \return a ^ b (bitwise xor)
1179  */
1180 static inline SimdDInt32 gmx_simdcall operator^(SimdDInt32 a, SimdDInt32 b)
1181 {
1182     SimdDInt32 res;
1183
1184     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1185     {
1186         res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1187     }
1188     return res;
1189 }
1190
1191 /*! \}
1192  *
1193  * \name SIMD implementation integer (corresponding to double) arithmetics
1194  * \{
1195  */
1196
1197 /*! \brief Add SIMD integers.
1198  *
1199  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1200  *
1201  * \param a term1
1202  * \param b term2
1203  * \return a+b
1204  */
1205 static inline SimdDInt32 gmx_simdcall operator+(SimdDInt32 a, SimdDInt32 b)
1206 {
1207     SimdDInt32 res;
1208
1209     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1210     {
1211         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1212     }
1213     return res;
1214 }
1215
1216 /*! \brief Subtract SIMD integers.
1217  *
1218  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1219  *
1220  * \param a term1
1221  * \param b term2
1222  * \return a-b
1223  */
1224 static inline SimdDInt32 gmx_simdcall operator-(SimdDInt32 a, SimdDInt32 b)
1225 {
1226     SimdDInt32 res;
1227
1228     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1229     {
1230         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1231     }
1232     return res;
1233 }
1234
1235 /*! \brief Multiply SIMD integers.
1236  *
1237  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1238  *
1239  * \param a factor1
1240  * \param b factor2
1241  * \return a*b.
1242  *
1243  * \note Only the low 32 bits are retained, so this can overflow.
1244  */
1245 static inline SimdDInt32 gmx_simdcall operator*(SimdDInt32 a, SimdDInt32 b)
1246 {
1247     SimdDInt32 res;
1248
1249     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1250     {
1251         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1252     }
1253     return res;
1254 }
1255
1256 /*! \}
1257  *
1258  * \name SIMD implementation integer (corresponding to double) comparisons, boolean selection
1259  * \{
1260  */
1261
1262 /*! \brief Equality comparison of two integers corresponding to double values.
1263  *
1264  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1265  *
1266  * \param a SIMD integer1
1267  * \param b SIMD integer2
1268  * \return SIMD integer boolean with true for elements where a==b
1269  */
1270 static inline SimdDIBool gmx_simdcall operator==(SimdDInt32 a, SimdDInt32 b)
1271 {
1272     SimdDIBool res;
1273
1274     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1275     {
1276         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1277     }
1278     return res;
1279 }
1280
1281 /*! \brief Less-than comparison of two SIMD integers corresponding to double values.
1282  *
1283  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1284  *
1285  * \param a SIMD integer1
1286  * \param b SIMD integer2
1287  * \return SIMD integer boolean with true for elements where a<b
1288  */
1289 static inline SimdDIBool gmx_simdcall operator<(SimdDInt32 a, SimdDInt32 b)
1290 {
1291     SimdDIBool res;
1292
1293     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1294     {
1295         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1296     }
1297     return res;
1298 }
1299
1300 /*! \brief Check if any bit is set in each element
1301  *
1302  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1303  *
1304  * \param a SIMD integer
1305  * \return SIMD integer boolean with true for elements where any bit is set
1306  */
1307 static inline SimdDIBool gmx_simdcall testBits(SimdDInt32 a)
1308 {
1309     SimdDIBool res;
1310
1311     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1312     {
1313         res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1314     }
1315     return res;
1316 }
1317
1318 /*! \brief Logical AND on SimdDIBool.
1319  *
1320  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1321  *
1322  * \param a SIMD boolean 1
1323  * \param b SIMD boolean 2
1324  * \return True for elements where both a and b are true.
1325  */
1326 static inline SimdDIBool gmx_simdcall operator&&(SimdDIBool a, SimdDIBool b)
1327 {
1328     SimdDIBool res;
1329
1330     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1331     {
1332         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1333     }
1334     return res;
1335 }
1336
1337 /*! \brief Logical OR on SimdDIBool.
1338  *
1339  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1340  *
1341  * \param a SIMD boolean 1
1342  * \param b SIMD boolean 2
1343  * \return True for elements where both a and b are true.
1344  */
1345 static inline SimdDIBool gmx_simdcall operator||(SimdDIBool a, SimdDIBool b)
1346 {
1347     SimdDIBool res;
1348
1349     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1350     {
1351         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1352     }
1353     return res;
1354 }
1355
1356 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1357  *
1358  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1359  *
1360  * The actual return value for "any true" will depend on the architecture.
1361  * Any non-zero value should be considered truth.
1362  *
1363  * \param a SIMD boolean
1364  * \return True if any of the elements in a is true, otherwise 0.
1365  */
1366 static inline bool gmx_simdcall anyTrue(SimdDIBool a)
1367 {
1368     bool res = false;
1369
1370     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1371     {
1372         res = res || a.simdInternal_[i];
1373     }
1374     return res;
1375 }
1376
1377 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is true.
1378  *
1379  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1380  *
1381  * \param a SIMD integer to select from
1382  * \param mask Boolean selector
1383  * \return Elements from a where sel is true, 0 otherwise.
1384  */
1385 static inline SimdDInt32 gmx_simdcall selectByMask(SimdDInt32 a, SimdDIBool mask)
1386 {
1387     SimdDInt32 res;
1388
1389     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1390     {
1391         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0;
1392     }
1393     return res;
1394 }
1395
1396 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is false.
1397  *
1398  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1399  *
1400  * \param a SIMD integer to select from
1401  * \param mask Boolean selector
1402  * \return Elements from a where sel is false, 0 otherwise (sic).
1403  */
1404 static inline SimdDInt32 gmx_simdcall selectByNotMask(SimdDInt32 a, SimdDIBool mask)
1405 {
1406     SimdDInt32 res;
1407
1408     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1409     {
1410         res.simdInternal_[i] = mask.simdInternal_[i] ? 0 : a.simdInternal_[i];
1411     }
1412     return res;
1413 }
1414
1415 /*! \brief Vector-blend SIMD integer selection.
1416  *
1417  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1418  *
1419  * \param a First source
1420  * \param b Second source
1421  * \param sel Boolean selector
1422  * \return For each element, select b if sel is true, a otherwise.
1423  */
1424 static inline SimdDInt32 gmx_simdcall blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
1425 {
1426     SimdDInt32 res;
1427
1428     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1429     {
1430         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1431     }
1432     return res;
1433 }
1434
1435 /*! \}
1436  *
1437  * \name SIMD implementation conversion operations
1438  * \{
1439  */
1440
1441 /*! \brief Round double precision floating point to integer.
1442  *
1443  * \param a SIMD floating-point
1444  * \return SIMD integer, rounded to nearest integer.
1445  *
1446  * \note Round mode is implementation defined. The only guarantee is that it
1447  * is consistent between rounding functions (round, cvtR2I).
1448  */
1449 static inline SimdDInt32 gmx_simdcall cvtR2I(SimdDouble a)
1450 {
1451     SimdDInt32 b;
1452
1453     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1454     {
1455         b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1456     }
1457     return b;
1458 };
1459
1460 /*! \brief Truncate double precision floating point to integer.
1461  *
1462  * \param a SIMD floating-point
1463  * \return SIMD integer, truncated to nearest integer.
1464  */
1465 static inline SimdDInt32 gmx_simdcall cvttR2I(SimdDouble a)
1466 {
1467     SimdDInt32 b;
1468
1469     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1470     {
1471         b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1472     }
1473     return b;
1474 };
1475
1476 /*! \brief Convert integer to double precision floating point.
1477  *
1478  * \param a SIMD integer
1479  * \return SIMD floating-point
1480  */
1481 static inline SimdDouble gmx_simdcall cvtI2R(SimdDInt32 a)
1482 {
1483     SimdDouble b;
1484
1485     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1486     {
1487         b.simdInternal_[i] = a.simdInternal_[i];
1488     }
1489     return b;
1490 };
1491
1492 /*! \brief Convert from double precision boolean to corresponding integer boolean
1493  *
1494  * \param a SIMD floating-point boolean
1495  * \return SIMD integer boolean
1496  */
1497 static inline SimdDIBool gmx_simdcall cvtB2IB(SimdDBool a)
1498 {
1499     SimdDIBool b;
1500
1501     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1502     {
1503         b.simdInternal_[i] = a.simdInternal_[i];
1504     }
1505     return b;
1506 };
1507
1508 /*! \brief Convert from integer boolean to corresponding double precision boolean
1509  *
1510  * \param a SIMD integer boolean
1511  * \return SIMD floating-point boolean
1512  */
1513 static inline SimdDBool gmx_simdcall cvtIB2B(SimdDIBool a)
1514 {
1515     SimdDBool b;
1516
1517     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1518     {
1519         b.simdInternal_[i] = a.simdInternal_[i];
1520     }
1521     return b;
1522 };
1523
1524 /*! \brief Convert SIMD float to double.
1525  *
1526  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1527  * \ref GMX_SIMD_DOUBLE_WIDTH.
1528  *
1529  * Float/double conversions are complex since the SIMD width could either
1530  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1531  * need to check for the width in the code, and have different code paths.
1532  *
1533  * \param f Single-precision SIMD variable
1534  * \return Double-precision SIMD variable of the same width
1535  */
1536 static inline SimdDouble gmx_simdcall cvtF2D(SimdFloat gmx_unused f)
1537 {
1538 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1539     SimdDouble d;
1540     for (std::size_t i = 0; i < d.simdInternal_.size(); i++)
1541     {
1542         d.simdInternal_[i] = f.simdInternal_[i];
1543     }
1544     return d;
1545 #else
1546     gmx_fatal(FARGS, "cvtF2D() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1547 #endif
1548 }
1549
1550 /*! \brief Convert SIMD double to float.
1551  *
1552  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1553  * \ref GMX_SIMD_DOUBLE_WIDTH.
1554  *
1555  * Float/double conversions are complex since the SIMD width could either
1556  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1557  * need to check for the width in the code, and have different code paths.
1558  *
1559  * \param d Double-precision SIMD variable
1560  * \return Single-precision SIMD variable of the same width
1561  */
1562 static inline SimdFloat gmx_simdcall cvtD2F(SimdDouble gmx_unused d)
1563 {
1564 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1565     SimdFloat f;
1566     for (std::size_t i = 0; i < f.simdInternal_.size(); i++)
1567     {
1568         f.simdInternal_[i] = d.simdInternal_[i];
1569     }
1570     return f;
1571 #else
1572     gmx_fatal(FARGS, "cvtD2F() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1573 #endif
1574 }
1575
1576 /*! \brief Convert SIMD float to double.
1577  *
1578  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1579  * as \ref GMX_SIMD_DOUBLE_WIDTH.
1580  *
1581  * Float/double conversions are complex since the SIMD width could either
1582  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1583  * need to check for the width in the code, and have different code paths.
1584  *
1585  * \param f Single-precision SIMD variable
1586  * \param[out] d0 Double-precision SIMD variable, first half of values from f.
1587  * \param[out] d1 Double-precision SIMD variable, second half of values from f.
1588  */
1589 static inline void gmx_simdcall cvtF2DD(SimdFloat gmx_unused f,
1590                                         SimdDouble gmx_unused* d0,
1591                                         SimdDouble gmx_unused* d1)
1592 {
1593 #if (GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH)
1594     for (std::size_t i = 0; i < d0->simdInternal_.size(); i++)
1595     {
1596         d0->simdInternal_[i] = f.simdInternal_[i];
1597         d1->simdInternal_[i] = f.simdInternal_[f.simdInternal_.size() / 2 + i];
1598     }
1599 #else
1600     gmx_fatal(FARGS, "simdCvtF2DD() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1601 #endif
1602 }
1603
1604 /*! \brief Convert SIMD double to float.
1605  *
1606  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1607  * as \ref GMX_SIMD_DOUBLE_WIDTH.
1608  *
1609  * Float/double conversions are complex since the SIMD width could either
1610  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1611  * need to check for the width in the code, and have different code paths.
1612  *
1613  * \param d0 Double-precision SIMD variable, first half of values to put in f.
1614  * \param d1 Double-precision SIMD variable, second half of values to put in f.
1615  * \return Single-precision SIMD variable with all values.
1616  */
1617 static inline SimdFloat gmx_simdcall cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
1618 {
1619 #if (GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH)
1620     SimdFloat f;
1621     for (std::size_t i = 0; i < d0.simdInternal_.size(); i++)
1622     {
1623         f.simdInternal_[i]                              = d0.simdInternal_[i];
1624         f.simdInternal_[f.simdInternal_.size() / 2 + i] = d1.simdInternal_[i];
1625     }
1626     return f;
1627 #else
1628     gmx_fatal(FARGS, "simdCvtDD2F() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1629 #endif
1630 }
1631
1632 /*! \} */
1633
1634 /*! \} */
1635 /*! \endcond */
1636
1637 } // namespace gmx
1638
1639 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H