src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2020 Research Organization for Information Science and Technology (RIST).
   5  * Copyright (c) 2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36
  37 /*
  38  * armv8+sve support to GROMACS was contributed by the Research Organization for
  39  * Information Science and Technology (RIST).
  40  */
  41
  42 #ifndef GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H
  43 #define GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H
  44
  45 #include "config.h"
  46
  47 #include <cassert>
  48 #include <cstddef>
  49 #include <cstdint>
  50
  51 #include <arm_sve.h>
  52
  53 #include "gromacs/math/utilities.h"
  54
  55 #include "impl_arm_sve_simd_float.h"
  56
  57 #define SVE_DOUBLE_MASK svptrue_b64()
  58 #define SVE_DINT32_MASK svptrue_b64()
  59
  60 namespace gmx
  61 {
  62
  63 class SimdDouble
  64 {
  65 public:
  66     SimdDouble() {}
  67
  68     SimdDouble(const double d) { this->simdInternal_ = svdup_f64(d); }
  69
  70     SimdDouble(svfloat64_t simd) : simdInternal_(simd) {}
  71
  72     float64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
  73 };
  74
  75 class SimdDInt32
  76 {
  77 public:
  78     SimdDInt32() {}
  79
  80     SimdDInt32(const int32_t i) { this->simdInternal_ = svdup_s64(i); }
  81
  82     SimdDInt32(svint64_t simd) : simdInternal_(simd) {}
  83
  84     int64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
  85 };
  86
  87 class SimdDBool
  88 {
  89 public:
  90     SimdDBool() {}
  91
  92     SimdDBool(const bool b)
  93     {
  94         this->simdInternal_ = svdup_n_u64_x(svptrue_b64(), b ? 0xFFFFFFFFFFFFFFFF : 0);
  95     }
  96
  97     SimdDBool(svbool_t simd) { this->simdInternal_ = svdup_n_u64_z(simd, 0xFFFFFFFFFFFFFFFF); }
  98
  99     SimdDBool(svuint64_t simd) : simdInternal_(simd) {}
 100
 101     uint64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
 102 };
 103
 104 class SimdDIBool
 105 {
 106 public:
 107     SimdDIBool() {}
 108
 109     SimdDIBool(const bool b)
 110     {
 111         this->simdInternal_ = svdup_n_u64_x(svptrue_b64(), b ? 0xFFFFFFFFFFFFFFFF : 0);
 112     }
 113
 114     SimdDIBool(svbool_t simd) { this->simdInternal_ = svdup_n_u64_z(simd, 0xFFFFFFFFFFFFFFFF); }
 115
 116     SimdDIBool(svuint64_t simd) : simdInternal_(simd) {}
 117
 118     uint64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
 119 };
 120
 121 static inline SimdDouble gmx_simdcall simdLoad(const double* m, SimdDoubleTag = {})
 122 {
 123     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 124     svbool_t pg = SVE_DOUBLE_MASK;
 125     return { svld1_f64(pg, m) };
 126 }
 127
 128 static inline SimdDouble gmx_simdcall simdLoad(SimdDouble* m, int offset, SimdDoubleTag = {})
 129 {
 130     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 131     svbool_t pg = SVE_DOUBLE_MASK;
 132     return { svld1_f64(pg, reinterpret_cast<double*>(m) + offset * svcntd()) };
 133 }
 134
 135 static inline SimdDouble gmx_simdcall simdLoadDouble(const double* m)
 136 {
 137     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 138     svbool_t pg = SVE_DOUBLE_MASK;
 139     return { svld1_f64(pg, m) };
 140 }
 141
 142 static inline void gmx_simdcall store(double* m, SimdDouble a)
 143 {
 144     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 145     svbool_t pg = SVE_DOUBLE_MASK;
 146     svst1_f64(pg, m, a.simdInternal_);
 147 }
 148
 149 static inline SimdDouble gmx_simdcall simdLoadU(const double* m, SimdDoubleTag = {})
 150 {
 151     svbool_t pg = SVE_DOUBLE_MASK;
 152     return { svld1_f64(pg, m) };
 153 }
 154
 155 static inline void gmx_simdcall storeU(double* m, SimdDouble a)
 156 {
 157     svbool_t pg = SVE_DOUBLE_MASK;
 158     svst1_f64(pg, m, a.simdInternal_);
 159 }
 160
 161 static inline SimdDouble gmx_simdcall setZeroD()
 162 {
 163     return { svdup_f64(0.0) };
 164 }
 165
 166 static inline SimdDInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdDInt32Tag)
 167 {
 168     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 169     svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
 170     return { svunpklo_s64(svld1_s32(pg, m)) };
 171 }
 172
 173 static inline void gmx_simdcall store(std::int32_t* m, SimdDInt32 a)
 174 {
 175     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 176     svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
 177     svst1_s32(pg, m,
 178               svuzp1(svreinterpret_s32_s64(a.simdInternal_), svreinterpret_s32_s64(a.simdInternal_)));
 179 }
 180
 181 static inline SimdDInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdDInt32Tag)
 182 {
 183     svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
 184     return { svunpklo_s64(svld1_s32(pg, m)) };
 185 }
 186
 187 static inline void gmx_simdcall storeU(std::int32_t* m, SimdDInt32 a)
 188 {
 189     svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
 190     svst1_s32(pg, m,
 191               svuzp1(svreinterpret_s32_s64(a.simdInternal_), svreinterpret_s32_s64(a.simdInternal_)));
 192 }
 193
 194 static inline SimdDInt32 gmx_simdcall setZeroDI()
 195 {
 196     return { svdup_s64(0) };
 197 }
 198
 199 template<int index>
 200 gmx_simdcall static inline std::int32_t extract(SimdDInt32 a)
 201 {
 202     svbool_t pg = svwhilelt_b64(0, index);
 203     return svlasta_s64(pg, a.simdInternal_);
 204 }
 205
 206 template<int index>
 207 gmx_simdcall static inline double extract(SimdDouble a)
 208 {
 209     svbool_t pg = svwhilelt_b64(0, index);
 210     return svlasta_f64(pg, a.simdInternal_);
 211 }
 212
 213 static inline SimdDouble gmx_simdcall operator&(SimdDouble a, SimdDouble b)
 214 {
 215     svbool_t pg = svptrue_b64();
 216     return { svreinterpret_f64_s64(svand_s64_x(pg, svreinterpret_s64_f64(a.simdInternal_),
 217                                                svreinterpret_s64_f64(b.simdInternal_))) };
 218 }
 219
 220 static inline SimdDouble gmx_simdcall andNot(SimdDouble a, SimdDouble b)
 221 {
 222     svbool_t pg = svptrue_b64();
 223     return { svreinterpret_f64_s64(svbic_s64_x(pg, svreinterpret_s64_f64(b.simdInternal_),
 224                                                svreinterpret_s64_f64(a.simdInternal_))) };
 225 }
 226
 227 static inline SimdDouble gmx_simdcall operator|(SimdDouble a, SimdDouble b)
 228 {
 229     svbool_t pg = svptrue_b64();
 230     return { svreinterpret_f64_s64(svorr_s64_x(pg, svreinterpret_s64_f64(a.simdInternal_),
 231                                                svreinterpret_s64_f64(b.simdInternal_))) };
 232 }
 233
 234 static inline SimdDouble gmx_simdcall operator^(SimdDouble a, SimdDouble b)
 235 {
 236     svbool_t pg = svptrue_b64();
 237     return { svreinterpret_f64_s64(sveor_s64_x(pg, svreinterpret_s64_f64(a.simdInternal_),
 238                                                svreinterpret_s64_f64(b.simdInternal_))) };
 239 }
 240
 241 static inline SimdDouble gmx_simdcall operator+(SimdDouble a, SimdDouble b)
 242 {
 243     svbool_t pg = svptrue_b64();
 244     return { svadd_f64_x(pg, a.simdInternal_, b.simdInternal_) };
 245 }
 246
 247 static inline SimdDouble gmx_simdcall operator-(SimdDouble a, SimdDouble b)
 248 {
 249     svbool_t pg = svptrue_b64();
 250     return { svsub_f64_x(pg, a.simdInternal_, b.simdInternal_) };
 251 }
 252
 253 static inline SimdDouble gmx_simdcall operator-(SimdDouble a)
 254 {
 255     svbool_t pg = svptrue_b64();
 256     return { svneg_f64_x(pg, a.simdInternal_) };
 257 }
 258
 259 static inline SimdDouble gmx_simdcall operator*(SimdDouble a, SimdDouble b)
 260 {
 261     svbool_t pg = svptrue_b64();
 262     return { svmul_f64_x(pg, a.simdInternal_, b.simdInternal_) };
 263 }
 264
 265 static inline SimdDouble gmx_simdcall fma(SimdDouble a, SimdDouble b, SimdDouble c)
 266 {
 267     svbool_t pg = svptrue_b64();
 268     return { svmad_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 269 }
 270
 271 static inline SimdDouble gmx_simdcall fms(SimdDouble a, SimdDouble b, SimdDouble c)
 272 {
 273     svbool_t pg = svptrue_b64();
 274     return { svnmsb_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 275 }
 276
 277 static inline SimdDouble gmx_simdcall fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 278 {
 279     svbool_t pg = svptrue_b64();
 280     return { svmsb_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 281 }
 282
 283 static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 284 {
 285     svbool_t pg = svptrue_b64();
 286     return { svnmad_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 287 }
 288
 289 static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
 290 {
 291     return { svrsqrte_f64(x.simdInternal_) };
 292 }
 293
 294 // The SIMD implementation seems to overflow when we square lu for
 295 // values close to FLOAT_MAX, so we fall back on the version in
 296 // simd_math.h, which is probably slightly slower.
 297 #if GMX_SIMD_HAVE_NATIVE_RSQRT_ITER_DOUBLE
 298 static inline SimdDouble gmx_simdcall rsqrtIter(SimdDouble lu, SimdDouble x)
 299 {
 300     return { vmulq_f64(lu.simdInternal_,
 301                        vrsqrtsq_f32(vmulq_f32(lu.simdInternal_, lu.simdInternal_), x.simdInternal_)) };
 302 }
 303 #endif
 304
 305 static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
 306 {
 307     return { svrecpe_f64(x.simdInternal_) };
 308 }
 309
 310 static inline SimdDouble gmx_simdcall rcpIter(SimdDouble lu, SimdDouble x)
 311 {
 312     svbool_t pg = svptrue_b64();
 313     return { svmul_f64_x(pg, lu.simdInternal_, svrecps_f64(lu.simdInternal_, x.simdInternal_)) };
 314 }
 315
 316 static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 317 {
 318     svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
 319     return { svadd_f64_m(pg, a.simdInternal_, b.simdInternal_) };
 320 }
 321
 322 static inline SimdDouble gmx_simdcall maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 323 {
 324     svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
 325     return { svmul_f64_z(pg, a.simdInternal_, b.simdInternal_) };
 326 }
 327
 328 static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 329 {
 330     svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
 331     return { svmad_f64_z(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 332 }
 333
 334 static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
 335 {
 336     svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
 337     // The result will always be correct since we mask the result with m, but
 338     // for debug builds we also want to make sure not to generate FP exceptions
 339 #ifndef NDEBUG
 340     x.simdInternal_ = svsel_f64(pg, x.simdInternal_, svdup_n_f64(1.0));
 341 #endif
 342     return { svreinterpret_f64_u64(svand_n_u64_z(
 343             pg, svreinterpret_u64_f64(svrsqrte_f64(x.simdInternal_)), 0xFFFFFFFFFFFFFFFF)) };
 344 }
 345
 346 static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
 347 {
 348     svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
 349     // The result will always be correct since we mask the result with m, but
 350     // for debug builds we also want to make sure not to generate FP exceptions
 351 #ifndef NDEBUG
 352     x.simdInternal_ = svsel_f64(m, x.simdInternal_, svdup_n_f64(1.0));
 353 #endif
 354     return { svreinterpret_f64_u64(svand_n_u64_z(
 355             pg, svreinterpret_u64_f64(svrecpe_f64(x.simdInternal_)), 0xFFFFFFFFFFFFFFFF)) };
 356 }
 357
 358 static inline SimdDouble gmx_simdcall abs(SimdDouble x)
 359 {
 360     svbool_t pg = svptrue_b64();
 361     return { svabs_f64_x(pg, x.simdInternal_) };
 362 }
 363
 364 static inline SimdDouble gmx_simdcall max(SimdDouble a, SimdDouble b)
 365 {
 366     svbool_t pg = svptrue_b64();
 367     return { svmax_f64_x(pg, a.simdInternal_, b.simdInternal_) };
 368 }
 369
 370 static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b)
 371 {
 372     svbool_t pg = svptrue_b64();
 373     return { svmin_f64_x(pg, a.simdInternal_, b.simdInternal_) };
 374 }
 375
 376 // Round and trunc operations are defined at the end of this file, since they
 377 // need to use double-to-integer and integer-to-double conversions.
 378
 379 template<MathOptimization opt = MathOptimization::Safe>
 380 static inline SimdDouble gmx_simdcall frexp(SimdDouble value, SimdDInt32* exponent)
 381 {
 382     svbool_t        pg           = svptrue_b64();
 383     const svint64_t exponentMask = svdup_n_s64(0x7FF0000000000000LL);
 384     const svint64_t mantissaMask = svdup_n_s64(0x800FFFFFFFFFFFFFLL);
 385     const svint64_t exponentBias = svdup_n_s64(1022LL); // add 1 to make our definition identical to frexp()
 386     const svfloat64_t half = svdup_n_f64(0.5);
 387     svint64_t         iExponent;
 388
 389     iExponent = svand_s64_x(pg, svreinterpret_s64_f64(value.simdInternal_), exponentMask);
 390     // iExponent               = svsub_s64_x(pg, svlsr_n_s64_x(pg, iExponent, 52), exponentBias);
 391     iExponent = svsub_s64_x(
 392             pg, svreinterpret_s64_u64(svlsr_n_u64_x(pg, svreinterpret_u64_s64(iExponent), 52)), exponentBias);
 393
 394     exponent->simdInternal_ = iExponent;
 395
 396     return { svreinterpret_f64_s64(svorr_s64_x(
 397             pg, svand_s64_x(pg, svreinterpret_s64_f64(value.simdInternal_), mantissaMask),
 398             svreinterpret_s64_f64(half))) };
 399 }
 400
 401 template<MathOptimization opt = MathOptimization::Safe>
 402 static inline SimdDouble gmx_simdcall ldexp(SimdDouble value, SimdDInt32 exponent)
 403 {
 404     svbool_t        pg           = svptrue_b64();
 405     const svint64_t exponentBias = svdup_n_s64(1023);
 406     svint64_t       iExponent    = svadd_s64_x(pg, exponent.simdInternal_, exponentBias);
 407
 408     if (opt == MathOptimization::Safe)
 409     {
 410         // Make sure biased argument is not negative
 411         iExponent = svmax_n_s64_x(pg, iExponent, 0);
 412     }
 413
 414     iExponent = svlsl_n_s64_x(pg, iExponent, 52);
 415
 416     return { svmul_f64_x(pg, value.simdInternal_, svreinterpret_f64_s64(iExponent)) };
 417 }
 418
 419 static inline double gmx_simdcall reduce(SimdDouble a)
 420 {
 421     svbool_t pg = svptrue_b64();
 422     return svadda_f64(pg, 0.0f, a.simdInternal_);
 423 }
 424
 425 static inline SimdDBool gmx_simdcall operator==(SimdDouble a, SimdDouble b)
 426 {
 427     svbool_t pg = svptrue_b64();
 428     return { svcmpeq_f64(pg, a.simdInternal_, b.simdInternal_) };
 429 }
 430
 431 static inline SimdDBool gmx_simdcall operator!=(SimdDouble a, SimdDouble b)
 432 {
 433     svbool_t pg = svptrue_b64();
 434     return { svcmpne_f64(pg, a.simdInternal_, b.simdInternal_) };
 435 }
 436
 437 static inline SimdDBool gmx_simdcall operator<(SimdDouble a, SimdDouble b)
 438 {
 439     svbool_t pg = svptrue_b64();
 440     return { svcmplt_f64(pg, a.simdInternal_, b.simdInternal_) };
 441 }
 442
 443 static inline SimdDBool gmx_simdcall operator<=(SimdDouble a, SimdDouble b)
 444 {
 445     svbool_t pg = svptrue_b64();
 446     return { svcmple_f64(pg, a.simdInternal_, b.simdInternal_) };
 447 }
 448
 449 static inline SimdDBool gmx_simdcall testBits(SimdDouble a)
 450 {
 451     svbool_t pg = svptrue_b64();
 452     return { svcmpne_n_s64(pg, svreinterpret_s64_f64(a.simdInternal_), 0) };
 453 }
 454
 455 static inline SimdDBool gmx_simdcall operator&&(SimdDBool a, SimdDBool b)
 456 {
 457     svbool_t pg = svptrue_b64();
 458     return { svand_u64_x(pg, a.simdInternal_, b.simdInternal_) };
 459 }
 460
 461 static inline SimdDBool gmx_simdcall operator||(SimdDBool a, SimdDBool b)
 462 {
 463     svbool_t pg = svptrue_b64();
 464     return { svorr_u64_x(pg, a.simdInternal_, b.simdInternal_) };
 465 }
 466
 467 static inline bool gmx_simdcall anyTrue(SimdDBool a)
 468 {
 469     svbool_t pg = svptrue_b64();
 470     return svptest_any(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
 471 }
 472
 473 static inline bool gmx_simdcall extractFirst(SimdDBool a)
 474 {
 475     svbool_t pg = svptrue_b64();
 476     return svptest_first(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
 477 }
 478
 479 static inline SimdDouble gmx_simdcall selectByMask(SimdDouble a, SimdDBool m)
 480 {
 481     svbool_t pg = svptrue_b64();
 482     return { svreinterpret_f64_u64(svand_u64_x(pg, svreinterpret_u64_f64(a.simdInternal_), m.simdInternal_)) };
 483 }
 484
 485 static inline SimdDouble gmx_simdcall selectByNotMask(SimdDouble a, SimdDBool m)
 486 {
 487     svbool_t pg = svcmpeq_n_u64(svptrue_b64(), m.simdInternal_, 0);
 488     return { svsel_f64(pg, a.simdInternal_, svdup_f64(0.0f)) };
 489 }
 490
 491 static inline SimdDouble gmx_simdcall blend(SimdDouble a, SimdDouble b, SimdDBool sel)
 492 {
 493     svbool_t pg = svcmpne_n_u64(svptrue_b64(), sel.simdInternal_, 0);
 494     return { svsel_f64(pg, b.simdInternal_, a.simdInternal_) };
 495 }
 496
 497 static inline SimdDInt32 gmx_simdcall operator&(SimdDInt32 a, SimdDInt32 b)
 498 {
 499     svbool_t pg = svptrue_b64();
 500     return { svand_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 501 }
 502
 503 static inline SimdDInt32 gmx_simdcall andNot(SimdDInt32 a, SimdDInt32 b)
 504 {
 505     svbool_t pg = svptrue_b64();
 506     return { svbic_s64_x(pg, b.simdInternal_, a.simdInternal_) };
 507 }
 508
 509 static inline SimdDInt32 gmx_simdcall operator|(SimdDInt32 a, SimdDInt32 b)
 510 {
 511     svbool_t pg = svptrue_b64();
 512     return { svorr_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 513 }
 514
 515 static inline SimdDInt32 gmx_simdcall operator^(SimdDInt32 a, SimdDInt32 b)
 516 {
 517     svbool_t pg = svptrue_b64();
 518     return { sveor_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 519 }
 520
 521 static inline SimdDInt32 gmx_simdcall operator+(SimdDInt32 a, SimdDInt32 b)
 522 {
 523     svbool_t pg = svptrue_b64();
 524     return { svadd_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 525 }
 526
 527 static inline SimdDInt32 gmx_simdcall operator-(SimdDInt32 a, SimdDInt32 b)
 528 {
 529     svbool_t pg = svptrue_b64();
 530     return { svsub_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 531 }
 532
 533 static inline SimdDInt32 gmx_simdcall operator*(SimdDInt32 a, SimdDInt32 b)
 534 {
 535     svbool_t pg = svptrue_b64();
 536     return { svmul_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 537 }
 538
 539 static inline SimdDIBool gmx_simdcall operator==(SimdDInt32 a, SimdDInt32 b)
 540 {
 541     svbool_t pg = svptrue_b64();
 542     return { svcmpeq_s64(pg, a.simdInternal_, b.simdInternal_) };
 543 }
 544
 545 static inline SimdDIBool gmx_simdcall testBits(SimdDInt32 a)
 546 {
 547     svbool_t pg = svptrue_b64();
 548     return { svcmpne_n_s64(pg, a.simdInternal_, (int64_t)0) };
 549 }
 550
 551 static inline SimdDIBool gmx_simdcall operator<(SimdDInt32 a, SimdDInt32 b)
 552 {
 553     svbool_t pg = svptrue_b64();
 554     return { svcmplt_s64(pg, a.simdInternal_, b.simdInternal_) };
 555 }
 556
 557 static inline SimdDIBool gmx_simdcall operator&&(SimdDIBool a, SimdDIBool b)
 558 {
 559     svbool_t pg = svptrue_b64();
 560     return { svand_u64_x(pg, a.simdInternal_, b.simdInternal_) };
 561 }
 562
 563 static inline SimdDIBool gmx_simdcall operator||(SimdDIBool a, SimdDIBool b)
 564 {
 565     svbool_t pg = svptrue_b64();
 566     return { svorr_u64_x(pg, a.simdInternal_, b.simdInternal_) };
 567 }
 568
 569 static inline bool gmx_simdcall anyTrue(SimdDIBool a)
 570 {
 571     svbool_t pg = svptrue_b64();
 572     return svptest_any(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
 573 }
 574
 575 static inline SimdDInt32 gmx_simdcall selectByMask(SimdDInt32 a, SimdDIBool m)
 576 {
 577     svbool_t pg = svptrue_b64();
 578     return { svand_s64_x(pg, a.simdInternal_, svreinterpret_s64_u64(m.simdInternal_)) };
 579 }
 580
 581 static inline SimdDInt32 gmx_simdcall selectByNotMask(SimdDInt32 a, SimdDIBool m)
 582 {
 583     svbool_t pg = svcmpeq_n_u64(svptrue_b64(), m.simdInternal_, 0);
 584     return { svadd_n_s64_z(pg, a.simdInternal_, 0) };
 585 }
 586
 587 static inline SimdDInt32 gmx_simdcall blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
 588 {
 589     svbool_t pg = svcmpne_n_u64(svptrue_b64(), sel.simdInternal_, 0);
 590     return { svsel_s64(pg, b.simdInternal_, a.simdInternal_) };
 591 }
 592
 593 static inline SimdDInt32 gmx_simdcall cvtR2I(SimdDouble a)
 594 {
 595     svbool_t pg = svptrue_b64();
 596     return { svcvt_s64_x(pg, svrinta_f64_x(pg, a.simdInternal_)) };
 597 }
 598
 599 static inline SimdDInt32 gmx_simdcall cvttR2I(SimdDouble a)
 600 {
 601     // FIXME ???
 602     svbool_t pg = svptrue_b64();
 603     return { svcvt_s64_x(pg, a.simdInternal_) };
 604 }
 605
 606 static inline SimdDouble gmx_simdcall cvtI2R(SimdDInt32 a)
 607 {
 608     svbool_t pg = svptrue_b64();
 609     return { svcvt_f64_x(pg, a.simdInternal_) };
 610 }
 611
 612 static inline SimdDIBool gmx_simdcall cvtB2IB(SimdDBool a)
 613 {
 614     return { a.simdInternal_ };
 615 }
 616
 617 static inline SimdDBool gmx_simdcall cvtIB2B(SimdDIBool a)
 618 {
 619     return { a.simdInternal_ };
 620 }
 621
 622 static inline SimdDouble gmx_simdcall round(SimdDouble x)
 623 {
 624     svbool_t pg = svptrue_b64();
 625     return { svrinta_f64_x(pg, x.simdInternal_) };
 626 }
 627
 628 static inline SimdDouble gmx_simdcall trunc(SimdDouble x)
 629 {
 630     return cvtI2R(cvttR2I(x));
 631 }
 632
 633 static inline void gmx_simdcall cvtF2DD(SimdFloat gmx_unused f,
 634                                         SimdDouble gmx_unused* d0,
 635                                         SimdDouble gmx_unused* d1)
 636 {
 637     assert(GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH);
 638     svbool_t pg       = svptrue_b32();
 639     d0->simdInternal_ = svcvt_f64_f32_x(pg, svzip1(f.simdInternal_, f.simdInternal_));
 640     d1->simdInternal_ = svcvt_f64_f32_x(pg, svzip2(f.simdInternal_, f.simdInternal_));
 641 }
 642
 643 static inline SimdFloat gmx_simdcall cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
 644 {
 645     svbool_t pg = svptrue_b64();
 646     assert(GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH);
 647     return { svuzp1_f32(svcvt_f32_f64_x(pg, d0.simdInternal_), svcvt_f32_f64_x(pg, d1.simdInternal_)) };
 648 }
 649
 650 } // namespace gmx
 651
 652 #endif // GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H