src/gromacs/simd/impl_reference/impl_reference_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
  37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
  38
  39 /*! \libinternal \file
  40  *
  41  * \brief Reference implementation, SIMD double precision.
  42  *
  43  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  44  *
  45  * \ingroup module_simd
  46  */
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cmath>
  52 #include <cstddef>
  53 #include <cstdint>
  54
  55 #include <algorithm>
  56 #include <array>
  57
  58 #include "gromacs/math/utilities.h"
  59 #include "gromacs/utility/fatalerror.h"
  60
  61 #include "impl_reference_definitions.h"
  62 #include "impl_reference_simd_float.h"
  63
  64 namespace gmx
  65 {
  66
  67 /*! \cond libapi */
  68 /*! \addtogroup module_simd */
  69 /*! \{ */
  70
  71 /* \name SIMD implementation data types
  72  * \{
  73  */
  74
  75 /*! \libinternal \brief Double SIMD variable. Available if GMX_SIMD_HAVE_DOUBLE is 1.
  76  *
  77  * \note This variable cannot be placed inside other structures or classes, since
  78  *       some compilers (including at least clang-3.7) appear to lose the
  79  *       alignment. This is likely particularly severe when allocating such
  80  *       memory on the heap, but it occurs for stack structures too.
  81  */
  82 class SimdDouble
  83 {
  84     public:
  85         SimdDouble() {}
  86
  87         //! \brief Construct from scalar
  88         SimdDouble(double d) { simdInternal_.fill(d); }
  89
  90         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
  91          *
  92          * This has to be public to enable usage in combination with static inline
  93          * functions, but it should never, EVER, be accessed by any code outside
  94          * the corresponding implementation directory since the type will depend
  95          * on the architecture.
  96          */
  97         std::array<double, GMX_SIMD_DOUBLE_WIDTH>  simdInternal_;
  98 };
  99
 100 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from double.
 101  *
 102  * Available if GMX_SIMD_HAVE_DOUBLE is 1.
 103  *
 104  * \note The integer SIMD type will always be available, but on architectures
 105  * that do not have any real integer SIMD support it might be defined as the
 106  * floating-point type. This will work fine, since there are separate defines
 107  * for whether the implementation can actually do any operations on integer
 108  * SIMD types.
 109  *
 110  * \note This variable cannot be placed inside other structures or classes, since
 111  *       some compilers (including at least clang-3.7) appear to lose the
 112  *       alignment. This is likely particularly severe when allocating such
 113  *       memory on the heap, but it occurs for stack structures too.
 114  */
 115 class SimdDInt32
 116 {
 117     public:
 118         SimdDInt32() {}
 119
 120         //! \brief Construct from scalar
 121         SimdDInt32(std::int32_t i) { simdInternal_.fill(i); }
 122
 123         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 124          *
 125          * This has to be public to enable usage in combination with static inline
 126          * functions, but it should never, EVER, be accessed by any code outside
 127          * the corresponding implementation directory since the type will depend
 128          * on the architecture.
 129          */
 130         std::array<std::int32_t, GMX_SIMD_DINT32_WIDTH>  simdInternal_;
 131 };
 132
 133 /*! \libinternal \brief Boolean type for double SIMD data.
 134  *
 135  *  Available if GMX_SIMD_HAVE_DOUBLE is 1.
 136  *
 137  * \note This variable cannot be placed inside other structures or classes, since
 138  *       some compilers (including at least clang-3.7) appear to lose the
 139  *       alignment. This is likely particularly severe when allocating such
 140  *       memory on the heap, but it occurs for stack structures too.
 141  */
 142 class SimdDBool
 143 {
 144     public:
 145         SimdDBool() {}
 146
 147         //! \brief Construct from scalar bool
 148         SimdDBool(bool b) { simdInternal_.fill(b); }
 149
 150         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 151          *
 152          * This has to be public to enable usage in combination with static inline
 153          * functions, but it should never, EVER, be accessed by any code outside
 154          * the corresponding implementation directory since the type will depend
 155          * on the architecture.
 156          */
 157         std::array<bool, GMX_SIMD_DOUBLE_WIDTH>  simdInternal_;
 158 };
 159
 160 /*! \libinternal \brief Boolean type for integer datatypes corresponding to double SIMD.
 161  *
 162  * Available if GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
 163  *
 164  * \note This variable cannot be placed inside other structures or classes, since
 165  *       some compilers (including at least clang-3.7) appear to lose the
 166  *       alignment. This is likely particularly severe when allocating such
 167  *       memory on the heap, but it occurs for stack structures too.
 168  */
 169 class SimdDIBool
 170 {
 171     public:
 172         SimdDIBool() {}
 173
 174         //! \brief Construct from scalar
 175         SimdDIBool(bool b) { simdInternal_.fill(b); }
 176
 177         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 178          *
 179          * This has to be public to enable usage in combination with static inline
 180          * functions, but it should never, EVER, be accessed by any code outside
 181          * the corresponding implementation directory since the type will depend
 182          * on the architecture.
 183          */
 184         std::array<bool, GMX_SIMD_DINT32_WIDTH>  simdInternal_;
 185 };
 186
 187 /*! \}
 188  *
 189  * \name SIMD implementation load/store operations for double precision floating point
 190  * \{
 191  */
 192
 193 /*! \brief Load \ref GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.
 194  *
 195  * \param m Pointer to memory aligned to the SIMD width.
 196  * \return SIMD variable with data loaded.
 197  */
 198 static inline SimdDouble gmx_simdcall
 199 simdLoad(const double *m)
 200 {
 201     SimdDouble a;
 202
 203     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
 204
 205     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 206     return a;
 207 }
 208
 209 /*! \brief Store the contents of SIMD double variable to aligned memory m.
 210  *
 211  * \param[out] m Pointer to memory, aligned to SIMD width.
 212  * \param a SIMD variable to store
 213  */
 214 static inline void gmx_simdcall
 215 store(double *m, SimdDouble a)
 216 {
 217     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
 218
 219     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 220 }
 221
 222 /*! \brief Load SIMD double from unaligned memory.
 223  *
 224  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 225  *
 226  * \param m Pointer to memory, no alignment requirement.
 227  * \return SIMD variable with data loaded.
 228  */
 229 static inline SimdDouble gmx_simdcall
 230 simdLoadU(const double *m)
 231 {
 232     SimdDouble a;
 233     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 234     return a;
 235 }
 236
 237 /*! \brief Store SIMD double to unaligned memory.
 238  *
 239  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 240  *
 241  * \param[out] m Pointer to memory, no alignment requirement.
 242  * \param a SIMD variable to store.
 243  */
 244 static inline void gmx_simdcall
 245 storeU(double *m, SimdDouble a)
 246 {
 247     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 248 }
 249
 250 /*! \brief Set all SIMD double variable elements to 0.0.
 251  *
 252  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 253  * internally to handle all types rather than adding the suffix used here.
 254  *
 255  * \return SIMD 0.0
 256  */
 257 static inline SimdDouble gmx_simdcall
 258 setZeroD()
 259 {
 260     return SimdDouble(0.0);
 261 }
 262
 263 /*! \}
 264  *
 265  * \name SIMD implementation load/store operations for integers (corresponding to double)
 266  * \{
 267  */
 268
 269 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 270  *
 271  * You should typically just call \ref gmx::load(), which uses proxy objects
 272  * internally to handle all types rather than adding the suffix used here.
 273  *
 274  * \param m Pointer to memory, aligned to (double) integer SIMD width.
 275  * \return SIMD integer variable.
 276  */
 277 static inline SimdDInt32 gmx_simdcall
 278 simdLoadDI(const std::int32_t * m)
 279 {
 280     SimdDInt32 a;
 281
 282     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
 283
 284     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 285     return a;
 286 };
 287
 288 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 289  *
 290  * \param m Memory aligned to (double) integer SIMD width.
 291  * \param a SIMD (double) integer variable to store.
 292  */
 293 static inline void gmx_simdcall
 294 store(std::int32_t * m, SimdDInt32 a)
 295 {
 296     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
 297
 298     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 299 };
 300
 301 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdDouble.
 302  *
 303  * You should typically just call \ref gmx::loadU(), which uses proxy objects
 304  * internally to handle all types rather than adding the suffix used here.
 305  *
 306  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 307  *
 308  * \param m Pointer to memory, no alignment requirements.
 309  * \return SIMD integer variable.
 310  */
 311 static inline SimdDInt32 gmx_simdcall
 312 simdLoadUDI(const std::int32_t *m)
 313 {
 314     SimdDInt32 a;
 315     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 316     return a;
 317 }
 318
 319 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 320  *
 321  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 322  *
 323  * \param m Memory pointer, no alignment requirements.
 324  * \param a SIMD (double) integer variable to store.
 325  */
 326 static inline void gmx_simdcall
 327 storeU(std::int32_t * m, SimdDInt32 a)
 328 {
 329     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 330 }
 331
 332 /*! \brief Set all SIMD (double) integer variable elements to 0.
 333  *
 334  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 335  * internally to handle all types rather than adding the suffix used here.
 336  *
 337  * \return SIMD 0
 338  */
 339 static inline SimdDInt32 gmx_simdcall
 340 setZeroDI()
 341 {
 342     return SimdDInt32(0);
 343 }
 344
 345 /*! \brief Extract element with index i from \ref gmx::SimdDInt32.
 346  *
 347  * Available if \ref GMX_SIMD_HAVE_DINT32_EXTRACT is 1.
 348  *
 349  * \tparam index Compile-time constant, position to extract (first position is 0)
 350  * \param  a     SIMD variable from which to extract value.
 351  * \return Single integer from position index in SIMD variable.
 352  */
 353 template<int index>
 354 static inline std::int32_t gmx_simdcall
 355 extract(SimdDInt32 a)
 356 {
 357     return a.simdInternal_[index];
 358 }
 359
 360 /*! \}
 361  *
 362  * \name SIMD implementation double precision floating-point bitwise logical operations
 363  * \{
 364  */
 365
 366 /*! \brief Bitwise and for two SIMD double variables.
 367  *
 368  * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 369  *
 370  * \param a data1
 371  * \param b data2
 372  * \return data1 & data2
 373  */
 374 static inline SimdDouble gmx_simdcall
 375 operator&(SimdDouble a, SimdDouble b)
 376 {
 377     SimdDouble         res;
 378
 379     union
 380     {
 381         double        r;
 382         std::int64_t  i;
 383     }
 384     conv1, conv2;
 385
 386     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 387     {
 388         conv1.r              = a.simdInternal_[i];
 389         conv2.r              = b.simdInternal_[i];
 390         conv1.i              = conv1.i & conv2.i;
 391         res.simdInternal_[i] = conv1.r;
 392     }
 393     return res;
 394 }
 395
 396 /*! \brief Bitwise andnot for SIMD double.
 397  *
 398  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 399  *
 400  * \param a data1
 401  * \param b data2
 402  * \return (~data1) & data2
 403  */
 404 static inline SimdDouble gmx_simdcall
 405 andNot(SimdDouble a, SimdDouble b)
 406 {
 407     SimdDouble         res;
 408
 409     union
 410     {
 411         double        r;
 412         std::int64_t  i;
 413     }
 414     conv1, conv2;
 415
 416     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 417     {
 418         conv1.r              = a.simdInternal_[i];
 419         conv2.r              = b.simdInternal_[i];
 420         conv1.i              = ~conv1.i & conv2.i;
 421         res.simdInternal_[i] = conv1.r;
 422     }
 423     return res;
 424 }
 425
 426 /*! \brief Bitwise or for SIMD double.
 427  *
 428  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 429  *
 430  * \param a data1
 431  * \param b data2
 432  * \return data1 | data2
 433  */
 434 static inline SimdDouble gmx_simdcall
 435 operator|(SimdDouble a, SimdDouble b)
 436 {
 437     SimdDouble         res;
 438
 439     union
 440     {
 441         double        r;
 442         std::int64_t  i;
 443     }
 444     conv1, conv2;
 445
 446     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 447     {
 448         conv1.r              = a.simdInternal_[i];
 449         conv2.r              = b.simdInternal_[i];
 450         conv1.i              = conv1.i | conv2.i;
 451         res.simdInternal_[i] = conv1.r;
 452     }
 453     return res;
 454 }
 455
 456 /*! \brief Bitwise xor for SIMD double.
 457  *
 458  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 459  *
 460  * \param a data1
 461  * \param b data2
 462  * \return data1 ^ data2
 463  */
 464 static inline SimdDouble gmx_simdcall
 465 operator^(SimdDouble a, SimdDouble b)
 466 {
 467     SimdDouble         res;
 468
 469     union
 470     {
 471         double        r;
 472         std::int64_t  i;
 473     }
 474     conv1, conv2;
 475
 476     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 477     {
 478         conv1.r              = a.simdInternal_[i];
 479         conv2.r              = b.simdInternal_[i];
 480         conv1.i              = conv1.i ^ conv2.i;
 481         res.simdInternal_[i] = conv1.r;
 482     }
 483     return res;
 484 }
 485
 486 /*! \}
 487  *
 488  * \name SIMD implementation double precision floating-point arithmetics
 489  * \{
 490  */
 491
 492 /*! \brief Add two double SIMD variables.
 493  *
 494  * \param a term1
 495  * \param b term2
 496  * \return a+b
 497  */
 498 static inline SimdDouble gmx_simdcall
 499 operator+(SimdDouble a, SimdDouble b)
 500 {
 501     SimdDouble         res;
 502
 503     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 504     {
 505         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
 506     }
 507     return res;
 508 }
 509
 510 /*! \brief Subtract two double SIMD variables.
 511  *
 512  * \param a term1
 513  * \param b term2
 514  * \return a-b
 515  */
 516 static inline SimdDouble gmx_simdcall
 517 operator-(SimdDouble a, SimdDouble b)
 518 {
 519     SimdDouble         res;
 520
 521     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 522     {
 523         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
 524     }
 525     return res;
 526 }
 527
 528 /*! \brief SIMD double precision negate.
 529  *
 530  * \param a SIMD double precision value
 531  * \return -a
 532  */
 533 static inline SimdDouble gmx_simdcall
 534 operator-(SimdDouble a)
 535 {
 536     SimdDouble         res;
 537
 538     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 539     {
 540         res.simdInternal_[i] = -a.simdInternal_[i];
 541     }
 542     return res;
 543 }
 544
 545 /*! \brief Multiply two double SIMD variables.
 546  *
 547  * \param a factor1
 548  * \param b factor2
 549  * \return a*b.
 550  */
 551 static inline SimdDouble gmx_simdcall
 552 operator*(SimdDouble a, SimdDouble b)
 553 {
 554     SimdDouble         res;
 555
 556     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 557     {
 558         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
 559     }
 560     return res;
 561 }
 562
 563 /*! \brief SIMD double Fused-multiply-add. Result is a*b+c.
 564  *
 565  * \param a factor1
 566  * \param b factor2
 567  * \param c term
 568  * \return a*b+c
 569  */
 570 static inline SimdDouble gmx_simdcall
 571 fma(SimdDouble a, SimdDouble b, SimdDouble c)
 572 {
 573     return a*b+c;
 574 }
 575
 576 /*! \brief SIMD double Fused-multiply-subtract. Result is a*b-c.
 577  *
 578  * \param a factor1
 579  * \param b factor2
 580  * \param c term
 581  * \return a*b-c
 582  */
 583 static inline SimdDouble gmx_simdcall
 584 fms(SimdDouble a, SimdDouble b, SimdDouble c)
 585 {
 586     return a*b-c;
 587 }
 588
 589 /*! \brief SIMD double Fused-negated-multiply-add. Result is -a*b+c.
 590  *
 591  * \param a factor1
 592  * \param b factor2
 593  * \param c term
 594  * \return -a*b+c
 595  */
 596 static inline SimdDouble gmx_simdcall
 597 fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 598 {
 599     return c-a*b;
 600 }
 601
 602 /*! \brief SIMD double Fused-negated-multiply-subtract. Result is -a*b-c.
 603  *
 604  * \param a factor1
 605  * \param b factor2
 606  * \param c term
 607  * \return -a*b-c
 608  */
 609 static inline SimdDouble gmx_simdcall
 610 fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 611 {
 612     return -a*b-c;
 613 }
 614
 615 /*! \brief double SIMD 1.0/sqrt(x) lookup.
 616  *
 617  * This is a low-level instruction that should only be called from routines
 618  * implementing the inverse square root in simd_math.h.
 619  *
 620  * \param x Argument, x>0
 621  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 622  */
 623 static inline SimdDouble gmx_simdcall
 624 rsqrt(SimdDouble x)
 625 {
 626     SimdDouble         res;
 627
 628     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 629     {
 630         // sic - we only use single precision for the lookup
 631         res.simdInternal_[i] = 1.0f / std::sqrt(static_cast<float>(x.simdInternal_[i]));
 632     }
 633     return res;
 634 };
 635
 636 /*! \brief SIMD double 1.0/x lookup.
 637  *
 638  * This is a low-level instruction that should only be called from routines
 639  * implementing the reciprocal in simd_math.h.
 640  *
 641  * \param x Argument, x!=0
 642  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 643  */
 644 static inline SimdDouble gmx_simdcall
 645 rcp(SimdDouble x)
 646 {
 647     SimdDouble         res;
 648
 649     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 650     {
 651         // sic - we only use single precision for the lookup
 652         res.simdInternal_[i] = 1.0f / static_cast<float>(x.simdInternal_[i]);
 653     }
 654     return res;
 655 };
 656
 657 /*! \brief Add two double SIMD variables, masked version.
 658  *
 659  * \param a term1
 660  * \param b term2
 661  * \param m mask
 662  * \return a+b where mask is true, 0.0 otherwise.
 663  */
 664 static inline SimdDouble gmx_simdcall
 665 maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 666 {
 667     SimdDouble         res;
 668
 669     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 670     {
 671         res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0);
 672     }
 673     return res;
 674 }
 675
 676 /*! \brief Multiply two double SIMD variables, masked version.
 677  *
 678  * \param a factor1
 679  * \param b factor2
 680  * \param m mask
 681  * \return a*b where mask is true, 0.0 otherwise.
 682  */
 683 static inline SimdDouble gmx_simdcall
 684 maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 685 {
 686     SimdDouble         res;
 687
 688     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 689     {
 690         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0;
 691     }
 692     return res;
 693 }
 694
 695 /*! \brief SIMD double fused multiply-add, masked version.
 696  *
 697  * \param a factor1
 698  * \param b factor2
 699  * \param c term
 700  * \param m mask
 701  * \return a*b+c where mask is true, 0.0 otherwise.
 702  */
 703 static inline SimdDouble gmx_simdcall
 704 maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 705 {
 706     SimdDouble         res;
 707
 708     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 709     {
 710         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0;
 711     }
 712     return res;
 713 }
 714
 715 /*! \brief SIMD double 1.0/sqrt(x) lookup, masked version.
 716  *
 717  * This is a low-level instruction that should only be called from routines
 718  * implementing the inverse square root in simd_math.h.
 719  *
 720  * \param x Argument, x>0 for entries where mask is true.
 721  * \param m Mask
 722  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 723  *         The result for masked-out entries will be 0.0.
 724  */
 725 static inline SimdDouble gmx_simdcall
 726 maskzRsqrt(SimdDouble x, SimdDBool m)
 727 {
 728     SimdDouble         res;
 729
 730     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 731     {
 732         // sic - we only use single precision for the lookup
 733         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0f / std::sqrt(static_cast<float>(x.simdInternal_[i])) : 0.0;
 734     }
 735     return res;
 736 }
 737
 738 /*! \brief SIMD double 1.0/x lookup, masked version.
 739  *
 740  * This is a low-level instruction that should only be called from routines
 741  * implementing the reciprocal in simd_math.h.
 742  *
 743  * \param x Argument, x>0 for entries where mask is true.
 744  * \param m Mask
 745  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 746  *         The result for masked-out entries will be 0.0.
 747  */
 748 static inline SimdDouble gmx_simdcall
 749 maskzRcp(SimdDouble x, SimdDBool m)
 750 {
 751     SimdDouble         res;
 752
 753     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 754     {
 755         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0f / static_cast<float>(x.simdInternal_[i]) : 0.0;
 756     }
 757     return res;
 758 }
 759
 760 /*! \brief SIMD double floating-point fabs().
 761  *
 762  * \param a any floating point values
 763  * \return fabs(a) for each element.
 764  */
 765 static inline SimdDouble gmx_simdcall
 766 abs(SimdDouble a)
 767 {
 768     SimdDouble         res;
 769
 770     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 771     {
 772         res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
 773     }
 774     return res;
 775 }
 776
 777 /*! \brief Set each SIMD double element to the largest from two variables.
 778  *
 779  * \param a Any floating-point value
 780  * \param b Any floating-point value
 781  * \return max(a,b) for each element.
 782  */
 783 static inline SimdDouble gmx_simdcall
 784 max(SimdDouble a, SimdDouble b)
 785 {
 786     SimdDouble         res;
 787
 788     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 789     {
 790         res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
 791     }
 792     return res;
 793 }
 794
 795 /*! \brief Set each SIMD double element to the smallest from two variables.
 796  *
 797  * \param a Any floating-point value
 798  * \param b Any floating-point value
 799  * \return min(a,b) for each element.
 800  */
 801 static inline SimdDouble gmx_simdcall
 802 min(SimdDouble a, SimdDouble b)
 803 {
 804     SimdDouble         res;
 805
 806     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 807     {
 808         res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
 809     }
 810     return res;
 811 }
 812
 813 /*! \brief SIMD double round to nearest integer value (in floating-point format).
 814  *
 815  * \param a Any floating-point value
 816  * \return The nearest integer, represented in floating-point format.
 817  */
 818 static inline SimdDouble gmx_simdcall
 819 round(SimdDouble a)
 820 {
 821     SimdDouble         res;
 822
 823     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 824     {
 825         res.simdInternal_[i] = std::round(a.simdInternal_[i]);
 826     }
 827     return res;
 828 }
 829
 830 /*! \brief Truncate SIMD double, i.e. round towards zero - common hardware instruction.
 831  *
 832  * \param a Any floating-point value
 833  * \return Integer rounded towards zero, represented in floating-point format.
 834  *
 835  * \note This is truncation towards zero, not floor(). The reason for this
 836  * is that truncation is virtually always present as a dedicated hardware
 837  * instruction, but floor() frequently isn't.
 838  */
 839 static inline SimdDouble gmx_simdcall
 840 trunc(SimdDouble a)
 841 {
 842     SimdDouble         res;
 843
 844     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 845     {
 846         res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
 847     }
 848     return res;
 849 }
 850
 851 /*! \brief Extract (integer) exponent and fraction from double precision SIMD.
 852  *
 853  * \param       value     Floating-point value to extract from
 854  * \param[out]  exponent  Returned exponent of value, integer SIMD format.
 855  * \return      Fraction of value, floating-point SIMD format.
 856  */
 857 static inline SimdDouble gmx_simdcall
 858 frexp(SimdDouble value, SimdDInt32 * exponent)
 859 {
 860     SimdDouble fraction;
 861
 862     for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
 863     {
 864         fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
 865     }
 866     return fraction;
 867 }
 868
 869 /*! \brief Multiply a SIMD double value by the number 2 raised to an exp power.
 870  *
 871  * \tparam opt By default, this routine will return zero for input arguments
 872  *             that are so small they cannot be reproduced in the current
 873  *             precision. If the unsafe math optimization template parameter
 874  *             setting is used, these tests are skipped, and the result will
 875  *             be undefined (possible even NaN). This might happen below -127
 876  *             in single precision or -1023 in double, although some
 877  *             might use denormal support to extend the range.
 878  *
 879  * \param value Floating-point number to multiply with new exponent
 880  * \param exponent Integer that will not overflow as 2^exponent.
 881  * \return value*2^exponent
 882  */
 883 template <MathOptimization opt = MathOptimization::Safe>
 884 static inline SimdDouble gmx_simdcall
 885 ldexp(SimdDouble value, SimdDInt32 exponent)
 886 {
 887     SimdDouble           res;
 888
 889     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 890     {
 891         // std::ldexp already takes care of clamping arguments, so we do not
 892         // need to do anything in the reference implementation
 893         res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
 894     }
 895     return res;
 896 }
 897
 898 /*! \brief Return sum of all elements in SIMD double variable.
 899  *
 900  * \param a SIMD variable to reduce/sum.
 901  * \return The sum of all elements in the argument variable.
 902  *
 903  */
 904 static inline double gmx_simdcall
 905 reduce(SimdDouble a)
 906 {
 907     double sum = 0.0;
 908
 909     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 910     {
 911         sum += a.simdInternal_[i];
 912     }
 913     return sum;
 914 }
 915
 916 /*! \}
 917  *
 918  * \name SIMD implementation double precision floating-point comparison, boolean, selection.
 919  * \{
 920  */
 921
 922 /*! \brief SIMD a==b for double SIMD.
 923  *
 924  * \param a value1
 925  * \param b value2
 926  * \return Each element of the boolean will be set to true if a==b.
 927  *
 928  * Beware that exact floating-point comparisons are difficult.
 929  */
 930 static inline SimdDBool gmx_simdcall
 931 operator==(SimdDouble a, SimdDouble b)
 932 {
 933     SimdDBool         res;
 934
 935     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 936     {
 937         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
 938     }
 939     return res;
 940 }
 941
 942 /*! \brief SIMD a!=b for double SIMD.
 943  *
 944  * \param a value1
 945  * \param b value2
 946  * \return Each element of the boolean will be set to true if a!=b.
 947  *
 948  * Beware that exact floating-point comparisons are difficult.
 949  */
 950 static inline SimdDBool gmx_simdcall
 951 operator!=(SimdDouble a, SimdDouble b)
 952 {
 953     SimdDBool         res;
 954
 955     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 956     {
 957         res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
 958     }
 959     return res;
 960 }
 961
 962 /*! \brief SIMD a<b for double SIMD.
 963  *
 964  * \param a value1
 965  * \param b value2
 966  * \return Each element of the boolean will be set to true if a<b.
 967  */
 968 static inline SimdDBool gmx_simdcall
 969 operator<(SimdDouble a, SimdDouble b)
 970 {
 971     SimdDBool          res;
 972
 973     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 974     {
 975         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
 976     }
 977     return res;
 978 }
 979
 980 /*! \brief SIMD a<=b for double SIMD.
 981  *
 982  * \param a value1
 983  * \param b value2
 984  * \return Each element of the boolean will be set to true if a<=b.
 985  */
 986 static inline SimdDBool gmx_simdcall
 987 operator<=(SimdDouble a, SimdDouble b)
 988 {
 989     SimdDBool          res;
 990
 991     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 992     {
 993         res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
 994     }
 995     return res;
 996 }
 997
 998 /*! \brief Return true if any bits are set in the single precision SIMD.
 999  *
1000  * This function is used to handle bitmasks, mainly for exclusions in the
1001  * inner kernels. Note that it will return true even for -0.0 (sign bit set),
1002  * so it is not identical to not-equal.
1003  *
1004  * \param a value
1005  * \return Each element of the boolean will be true if any bit in a is nonzero.
1006  */
1007 static inline SimdDBool gmx_simdcall
1008 testBits(SimdDouble a)
1009 {
1010     SimdDBool         res;
1011
1012     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1013     {
1014         union
1015         {
1016             std::uint64_t i;
1017             double        d;
1018         } conv;
1019
1020         conv.d               = a.simdInternal_[i];
1021         res.simdInternal_[i] = (conv.i != 0);
1022     }
1023     return res;
1024 }
1025
1026 /*! \brief Logical \a and on double precision SIMD booleans.
1027  *
1028  * \param a logical vars 1
1029  * \param b logical vars 2
1030  * \return For each element, the result boolean is true if a \& b are true.
1031  *
1032  * \note This is not necessarily a bitwise operation - the storage format
1033  * of booleans is implementation-dependent.
1034  */
1035 static inline SimdDBool gmx_simdcall
1036 operator&&(SimdDBool a, SimdDBool b)
1037 {
1038     SimdDBool         res;
1039
1040     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1041     {
1042         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1043     }
1044     return res;
1045 }
1046
1047 /*! \brief Logical \a or on double precision SIMD booleans.
1048  *
1049  * \param a logical vars 1
1050  * \param b logical vars 2
1051  * \return For each element, the result boolean is true if a or b is true.
1052  *
1053  * Note that this is not necessarily a bitwise operation - the storage format
1054  * of booleans is implementation-dependent.
1055  *
1056  \ */
1057 static inline SimdDBool gmx_simdcall
1058 operator||(SimdDBool a, SimdDBool b)
1059 {
1060     SimdDBool         res;
1061
1062     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1063     {
1064         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1065     }
1066     return res;
1067 }
1068
1069 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1070  *
1071  * \param a Logical variable.
1072  * \return true if any element in a is true, otherwise false.
1073  *
1074  * The actual return value for truth will depend on the architecture,
1075  * so any non-zero value is considered truth.
1076  */
1077 static inline bool gmx_simdcall
1078 anyTrue(SimdDBool a)
1079 {
1080     bool res = false;
1081
1082     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1083     {
1084         res = res || a.simdInternal_[i];
1085     }
1086     return res;
1087 }
1088
1089 /*! \brief Select from double precision SIMD variable where boolean is true.
1090  *
1091  * \param a Floating-point variable to select from
1092  * \param mask Boolean selector
1093  * \return  For each element, a is selected for true, 0 for false.
1094  */
1095 static inline SimdDouble gmx_simdcall
1096 selectByMask(SimdDouble a, SimdDBool mask)
1097 {
1098     SimdDouble          res;
1099
1100     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1101     {
1102         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
1103     }
1104     return res;
1105 }
1106
1107 /*! \brief Select from double precision SIMD variable where boolean is false.
1108  *
1109  * \param a Floating-point variable to select from
1110  * \param mask Boolean selector
1111  * \return  For each element, a is selected for false, 0 for true (sic).
1112  */
1113 static inline SimdDouble gmx_simdcall
1114 selectByNotMask(SimdDouble a, SimdDBool mask)
1115 {
1116     SimdDouble          res;
1117
1118     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1119     {
1120         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
1121     }
1122     return res;
1123 }
1124
1125 /*! \brief Vector-blend SIMD double selection.
1126  *
1127  * \param a First source
1128  * \param b Second source
1129  * \param sel Boolean selector
1130  * \return For each element, select b if sel is true, a otherwise.
1131  */
1132 static inline SimdDouble gmx_simdcall
1133 blend(SimdDouble a, SimdDouble b, SimdDBool sel)
1134 {
1135     SimdDouble         res;
1136
1137     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1138     {
1139         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1140     }
1141     return res;
1142 }
1143
1144 /*! \}
1145  *
1146  * \name SIMD implementation integer (corresponding to double) bitwise logical operations
1147  * \{
1148  */
1149
1150 /*! \brief SIMD integer shift left logical, based on immediate value.
1151  *
1152  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1153  *
1154  *  Logical shift. Each element is shifted (independently) up to 32 positions
1155  *  left, while zeros are shifted in from the right.
1156  *
1157  * \param a integer data to shift
1158  * \param n number of positions to shift left. n<=32.
1159  * \return shifted values
1160  */
1161 static inline SimdDInt32 gmx_simdcall
1162 operator<<(SimdDInt32 a, int n)
1163 {
1164     SimdDInt32         res;
1165
1166     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1167     {
1168         res.simdInternal_[i] = a.simdInternal_[i] << n;
1169     }
1170     return res;
1171 }
1172
1173 /*! \brief SIMD integer shift right logical, based on immediate value.
1174  *
1175  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1176  *
1177  *  Logical shift. Each element is shifted (independently) up to 32 positions
1178  *  right, while zeros are shifted in from the left.
1179  *
1180  * \param a integer data to shift
1181  * \param n number of positions to shift right. n<=32.
1182  * \return shifted values
1183  */
1184 static inline SimdDInt32 gmx_simdcall
1185 operator>>(SimdDInt32 a, int n)
1186 {
1187     SimdDInt32         res;
1188
1189     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1190     {
1191         res.simdInternal_[i] = a.simdInternal_[i] >> n;
1192     }
1193     return res;
1194 }
1195
1196 /*! \brief Integer SIMD bitwise and.
1197  *
1198  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1199  *
1200  * \note You can \a not use this operation directly to select based on a boolean
1201  * SIMD variable, since booleans are separate from integer SIMD. If that
1202  * is what you need, have a look at \ref gmx::selectByMask instead.
1203  *
1204  * \param a first integer SIMD
1205  * \param b second integer SIMD
1206  * \return a \& b (bitwise and)
1207  */
1208 static inline SimdDInt32 gmx_simdcall
1209 operator&(SimdDInt32 a, SimdDInt32 b)
1210 {
1211     SimdDInt32         res;
1212
1213     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1214     {
1215         res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1216     }
1217     return res;
1218 }
1219
1220 /*! \brief Integer SIMD bitwise not/complement.
1221  *
1222  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1223  *
1224  * \note You can \a not use this operation directly to select based on a boolean
1225  * SIMD variable, since booleans are separate from integer SIMD. If that
1226  * is what you need, have a look at \ref gmx::selectByMask instead.
1227  *
1228  * \param a integer SIMD
1229  * \param b integer SIMD
1230  * \return (~a) & b
1231  */
1232 static inline SimdDInt32 gmx_simdcall
1233 andNot(SimdDInt32 a, SimdDInt32 b)
1234 {
1235     SimdDInt32         res;
1236
1237     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1238     {
1239         res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1240     }
1241     return res;
1242 }
1243
1244 /*! \brief Integer SIMD bitwise or.
1245  *
1246  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1247  *
1248  * \param a first integer SIMD
1249  * \param b second integer SIMD
1250  * \return a \| b (bitwise or)
1251  */
1252 static inline SimdDInt32 gmx_simdcall
1253 operator|(SimdDInt32 a, SimdDInt32 b)
1254 {
1255     SimdDInt32         res;
1256
1257     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1258     {
1259         res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1260     }
1261     return res;
1262 }
1263
1264 /*! \brief Integer SIMD bitwise xor.
1265  *
1266  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1267  *
1268  * \param a first integer SIMD
1269  * \param b second integer SIMD
1270  * \return a ^ b (bitwise xor)
1271  */
1272 static inline SimdDInt32 gmx_simdcall
1273 operator^(SimdDInt32 a, SimdDInt32 b)
1274 {
1275     SimdDInt32         res;
1276
1277     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1278     {
1279         res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1280     }
1281     return res;
1282 }
1283
1284 /*! \}
1285  *
1286  * \name SIMD implementation integer (corresponding to double) arithmetics
1287  * \{
1288  */
1289
1290 /*! \brief Add SIMD integers.
1291  *
1292  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1293  *
1294  * \param a term1
1295  * \param b term2
1296  * \return a+b
1297  */
1298 static inline SimdDInt32 gmx_simdcall
1299 operator+(SimdDInt32 a, SimdDInt32 b)
1300 {
1301     SimdDInt32         res;
1302
1303     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1304     {
1305         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1306     }
1307     return res;
1308 }
1309
1310 /*! \brief Subtract SIMD integers.
1311  *
1312  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1313  *
1314  * \param a term1
1315  * \param b term2
1316  * \return a-b
1317  */
1318 static inline SimdDInt32 gmx_simdcall
1319 operator-(SimdDInt32 a, SimdDInt32 b)
1320 {
1321     SimdDInt32         res;
1322
1323     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1324     {
1325         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1326     }
1327     return res;
1328 }
1329
1330 /*! \brief Multiply SIMD integers.
1331  *
1332  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1333  *
1334  * \param a factor1
1335  * \param b factor2
1336  * \return a*b.
1337  *
1338  * \note Only the low 32 bits are retained, so this can overflow.
1339  */
1340 static inline SimdDInt32 gmx_simdcall
1341 operator*(SimdDInt32 a, SimdDInt32 b)
1342 {
1343     SimdDInt32         res;
1344
1345     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1346     {
1347         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1348     }
1349     return res;
1350 }
1351
1352 /*! \}
1353  *
1354  * \name SIMD implementation integer (corresponding to double) comparisons, boolean selection
1355  * \{
1356  */
1357
1358 /*! \brief Equality comparison of two integers corresponding to double values.
1359  *
1360  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1361  *
1362  * \param a SIMD integer1
1363  * \param b SIMD integer2
1364  * \return SIMD integer boolean with true for elements where a==b
1365  */
1366 static inline SimdDIBool gmx_simdcall
1367 operator==(SimdDInt32 a, SimdDInt32 b)
1368 {
1369     SimdDIBool         res;
1370
1371     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1372     {
1373         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1374     }
1375     return res;
1376 }
1377
1378 /*! \brief Less-than comparison of two SIMD integers corresponding to double values.
1379  *
1380  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1381  *
1382  * \param a SIMD integer1
1383  * \param b SIMD integer2
1384  * \return SIMD integer boolean with true for elements where a<b
1385  */
1386 static inline SimdDIBool gmx_simdcall
1387 operator<(SimdDInt32 a, SimdDInt32 b)
1388 {
1389     SimdDIBool         res;
1390
1391     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1392     {
1393         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1394     }
1395     return res;
1396 }
1397
1398 /*! \brief Check if any bit is set in each element
1399  *
1400  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1401  *
1402  * \param a SIMD integer
1403  * \return SIMD integer boolean with true for elements where any bit is set
1404  */
1405 static inline SimdDIBool gmx_simdcall
1406 testBits(SimdDInt32 a)
1407 {
1408     SimdDIBool         res;
1409
1410     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1411     {
1412         res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1413     }
1414     return res;
1415 }
1416
1417 /*! \brief Logical AND on SimdDIBool.
1418  *
1419  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1420  *
1421  * \param a SIMD boolean 1
1422  * \param b SIMD boolean 2
1423  * \return True for elements where both a and b are true.
1424  */
1425 static inline SimdDIBool gmx_simdcall
1426 operator&&(SimdDIBool a, SimdDIBool b)
1427 {
1428     SimdDIBool        res;
1429
1430     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1431     {
1432         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1433     }
1434     return res;
1435 }
1436
1437 /*! \brief Logical OR on SimdDIBool.
1438  *
1439  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1440  *
1441  * \param a SIMD boolean 1
1442  * \param b SIMD boolean 2
1443  * \return True for elements where both a and b are true.
1444  */
1445 static inline SimdDIBool gmx_simdcall
1446 operator||(SimdDIBool a, SimdDIBool b)
1447 {
1448     SimdDIBool         res;
1449
1450     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1451     {
1452         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1453     }
1454     return res;
1455 }
1456
1457 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1458  *
1459  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1460  *
1461  * The actual return value for "any true" will depend on the architecture.
1462  * Any non-zero value should be considered truth.
1463  *
1464  * \param a SIMD boolean
1465  * \return True if any of the elements in a is true, otherwise 0.
1466  */
1467 static inline bool gmx_simdcall
1468 anyTrue(SimdDIBool a)
1469 {
1470     bool res = false;
1471
1472     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1473     {
1474         res = res || a.simdInternal_[i];
1475     }
1476     return res;
1477 }
1478
1479 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is true.
1480  *
1481  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1482  *
1483  * \param a SIMD integer to select from
1484  * \param mask Boolean selector
1485  * \return Elements from a where sel is true, 0 otherwise.
1486  */
1487 static inline SimdDInt32 gmx_simdcall
1488 selectByMask(SimdDInt32 a, SimdDIBool mask)
1489 {
1490     SimdDInt32         res;
1491
1492     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1493     {
1494         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0;
1495     }
1496     return res;
1497 }
1498
1499 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is false.
1500  *
1501  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1502  *
1503  * \param a SIMD integer to select from
1504  * \param mask Boolean selector
1505  * \return Elements from a where sel is false, 0 otherwise (sic).
1506  */
1507 static inline SimdDInt32 gmx_simdcall
1508 selectByNotMask(SimdDInt32 a, SimdDIBool mask)
1509 {
1510     SimdDInt32         res;
1511
1512     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1513     {
1514         res.simdInternal_[i] = mask.simdInternal_[i] ? 0 : a.simdInternal_[i];
1515     }
1516     return res;
1517 }
1518
1519 /*! \brief Vector-blend SIMD integer selection.
1520  *
1521  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1522  *
1523  * \param a First source
1524  * \param b Second source
1525  * \param sel Boolean selector
1526  * \return For each element, select b if sel is true, a otherwise.
1527  */
1528 static inline SimdDInt32 gmx_simdcall
1529 blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
1530 {
1531     SimdDInt32        res;
1532
1533     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1534     {
1535         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1536     }
1537     return res;
1538 }
1539
1540 /*! \}
1541  *
1542  * \name SIMD implementation conversion operations
1543  * \{
1544  */
1545
1546 /*! \brief Round double precision floating point to integer.
1547  *
1548  * \param a SIMD floating-point
1549  * \return SIMD integer, rounded to nearest integer.
1550  */
1551 static inline SimdDInt32 gmx_simdcall
1552 cvtR2I(SimdDouble a)
1553 {
1554     SimdDInt32         b;
1555
1556     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1557     {
1558         b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1559     }
1560     return b;
1561 };
1562
1563 /*! \brief Truncate double precision floating point to integer.
1564  *
1565  * \param a SIMD floating-point
1566  * \return SIMD integer, truncated to nearest integer.
1567  */
1568 static inline SimdDInt32 gmx_simdcall
1569 cvttR2I(SimdDouble a)
1570 {
1571     SimdDInt32         b;
1572
1573     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1574     {
1575         b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1576     }
1577     return b;
1578 };
1579
1580 /*! \brief Convert integer to double precision floating point.
1581  *
1582  * \param a SIMD integer
1583  * \return SIMD floating-point
1584  */
1585 static inline SimdDouble gmx_simdcall
1586 cvtI2R(SimdDInt32 a)
1587 {
1588     SimdDouble         b;
1589
1590     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1591     {
1592         b.simdInternal_[i] = a.simdInternal_[i];
1593     }
1594     return b;
1595 };
1596
1597 /*! \brief Convert from double precision boolean to corresponding integer boolean
1598  *
1599  * \param a SIMD floating-point boolean
1600  * \return SIMD integer boolean
1601  */
1602 static inline SimdDIBool gmx_simdcall
1603 cvtB2IB(SimdDBool a)
1604 {
1605     SimdDIBool         b;
1606
1607     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1608     {
1609         b.simdInternal_[i] = a.simdInternal_[i];
1610     }
1611     return b;
1612 };
1613
1614 /*! \brief Convert from integer boolean to corresponding double precision boolean
1615  *
1616  * \param a SIMD integer boolean
1617  * \return SIMD floating-point boolean
1618  */
1619 static inline SimdDBool gmx_simdcall
1620 cvtIB2B(SimdDIBool a)
1621 {
1622     SimdDBool         b;
1623
1624     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1625     {
1626         b.simdInternal_[i] = a.simdInternal_[i];
1627     }
1628     return b;
1629 };
1630
1631 /*! \brief Convert SIMD float to double.
1632  *
1633  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1634  * \ref GMX_SIMD_DOUBLE_WIDTH.
1635  *
1636  * Float/double conversions are complex since the SIMD width could either
1637  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1638  * need to check for the width in the code, and have different code paths.
1639  *
1640  * \param f Single-precision SIMD variable
1641  * \return Double-precision SIMD variable of the same width
1642  */
1643 static inline SimdDouble gmx_simdcall
1644 cvtF2D(SimdFloat gmx_unused f)
1645 {
1646 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1647     SimdDouble        d;
1648     for (std::size_t i = 0; i < d.simdInternal_.size(); i++)
1649     {
1650         d.simdInternal_[i] = f.simdInternal_[i];
1651     }
1652     return d;
1653 #else
1654     gmx_fatal(FARGS, "cvtF2D() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1655 #endif
1656 }
1657
1658 /*! \brief Convert SIMD double to float.
1659  *
1660  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1661  * \ref GMX_SIMD_DOUBLE_WIDTH.
1662  *
1663  * Float/double conversions are complex since the SIMD width could either
1664  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1665  * need to check for the width in the code, and have different code paths.
1666  *
1667  * \param d Double-precision SIMD variable
1668  * \return Single-precision SIMD variable of the same width
1669  */
1670 static inline SimdFloat gmx_simdcall
1671 cvtD2F(SimdDouble gmx_unused d)
1672 {
1673 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1674     SimdFloat        f;
1675     for (std::size_t i = 0; i < f.simdInternal_.size(); i++)
1676     {
1677         f.simdInternal_[i] = d.simdInternal_[i];
1678     }
1679     return f;
1680 #else
1681     gmx_fatal(FARGS, "cvtD2F() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1682 #endif
1683 }
1684
1685 /*! \brief Convert SIMD float to double.
1686  *
1687  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1688  * as \ref GMX_SIMD_DOUBLE_WIDTH.
1689  *
1690  * Float/double conversions are complex since the SIMD width could either
1691  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1692  * need to check for the width in the code, and have different code paths.
1693  *
1694  * \param f Single-precision SIMD variable
1695  * \param[out] d0 Double-precision SIMD variable, first half of values from f.
1696  * \param[out] d1 Double-precision SIMD variable, second half of values from f.
1697  */
1698 static inline void gmx_simdcall
1699 cvtF2DD(SimdFloat gmx_unused f, SimdDouble gmx_unused * d0, SimdDouble gmx_unused * d1)
1700 {
1701 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1702     for (std::size_t i = 0; i < d0->simdInternal_.size(); i++)
1703     {
1704         d0->simdInternal_[i] = f.simdInternal_[i];
1705         d1->simdInternal_[i] = f.simdInternal_[f.simdInternal_.size()/2 + i];
1706     }
1707 #else
1708     gmx_fatal(FARGS, "simdCvtF2DD() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1709 #endif
1710 }
1711
1712 /*! \brief Convert SIMD double to float.
1713  *
1714  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1715  * as \ref GMX_SIMD_DOUBLE_WIDTH.
1716  *
1717  * Float/double conversions are complex since the SIMD width could either
1718  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1719  * need to check for the width in the code, and have different code paths.
1720  *
1721  * \param d0 Double-precision SIMD variable, first half of values to put in f.
1722  * \param d1 Double-precision SIMD variable, second half of values to put in f.
1723  * \return Single-precision SIMD variable with all values.
1724  */
1725 static inline SimdFloat gmx_simdcall
1726 cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
1727 {
1728 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1729     SimdFloat        f;
1730     for (std::size_t i = 0; i < d0.simdInternal_.size(); i++)
1731     {
1732         f.simdInternal_[i]                            = d0.simdInternal_[i];
1733         f.simdInternal_[f.simdInternal_.size()/2 + i] = d1.simdInternal_[i];
1734     }
1735     return f;
1736 #else
1737     gmx_fatal(FARGS, "simdCvtDD2F() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1738 #endif
1739 }
1740
1741 /*! \} */
1742
1743 /*! \} */
1744 /*! \endcond */
1745
1746 }      // namespace gmx
1747
1748 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H