src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2020 Research Organization for Information Science and Technology (RIST).
   5  * Copyright (c) 2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36
  37 /*
  38  * armv8+sve support to GROMACS was contributed by the Research Organization for
  39  * Information Science and Technology (RIST).
  40  */
  41
  42 #ifndef GMX_SIMD_IMPL_ARM_SVE_SIMD_FLOAT_H
  43 #define GMX_SIMD_IMPL_ARM_SVE_SIMD_FLOAT_H
  44
  45 #include "config.h"
  46
  47 #include <cassert>
  48 #include <cstddef>
  49 #include <cstdint>
  50
  51 #include <arm_sve.h>
  52
  53 #include "gromacs/math/utilities.h"
  54
  55 namespace gmx
  56 {
  57
  58 class SimdFloat
  59 {
  60 public:
  61     SimdFloat() {}
  62
  63     SimdFloat(const float f) { this->simdInternal_ = svdup_f32(f); }
  64
  65     SimdFloat(svfloat32_t simd) : simdInternal_(simd) {}
  66
  67     float32_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
  68 };
  69
  70 class SimdFInt32
  71 {
  72 public:
  73     SimdFInt32() {}
  74
  75     SimdFInt32(const int32_t i) { this->simdInternal_ = svdup_s32(i); }
  76
  77     SimdFInt32(svint32_t simd) : simdInternal_(simd) {}
  78
  79     int32_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
  80 };
  81
  82 class SimdFBool
  83 {
  84 public:
  85     SimdFBool() {}
  86
  87     SimdFBool(const bool b)
  88     {
  89         this->simdInternal_ = svdup_n_u32_x(svptrue_b32(), b ? 0xFFFFFFFF : 0);
  90     }
  91
  92     SimdFBool(svbool_t simd) { this->simdInternal_ = svdup_n_u32_z(simd, 0xFFFFFFFF); }
  93
  94     SimdFBool(svuint32_t simd) : simdInternal_(simd) {}
  95
  96     uint32_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
  97 };
  98
  99 class SimdFIBool
 100 {
 101 public:
 102     SimdFIBool() {}
 103
 104     SimdFIBool(const bool b)
 105     {
 106         this->simdInternal_ = svdup_n_u32_x(svptrue_b32(), b ? 0xFFFFFFFF : 0);
 107     }
 108
 109     SimdFIBool(svbool_t simd) { this->simdInternal_ = svdup_n_u32_z(simd, 0xFFFFFFFF); }
 110
 111     SimdFIBool(svuint32_t simd) : simdInternal_(simd) {}
 112
 113     uint32_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
 114 };
 115
 116 static inline SimdFloat gmx_simdcall simdLoad(const float* m, SimdFloatTag = {})
 117 {
 118     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 119     svbool_t pg = svptrue_b32();
 120     return { svld1_f32(pg, m) };
 121 }
 122
 123 static inline SimdFloat gmx_simdcall simdLoad(SimdFloat* m, int offset, SimdFloatTag = {})
 124 {
 125     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 126     svbool_t pg = svptrue_b32();
 127     return { svld1_f32(pg, reinterpret_cast<float*>(m) + offset * svcntw()) };
 128 }
 129
 130 static inline SimdFloat gmx_simdcall simdLoadFloat(const float* m)
 131 {
 132     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 133     svbool_t pg = svptrue_b32();
 134     return { svld1_f32(pg, m) };
 135 }
 136
 137 static inline void gmx_simdcall store(float* m, SimdFloat a)
 138 {
 139     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 140     svbool_t pg = svptrue_b32();
 141     svst1_f32(pg, m, a.simdInternal_);
 142 }
 143
 144 static inline SimdFloat gmx_simdcall simdLoadU(const float* m, SimdFloatTag = {})
 145 {
 146     svbool_t pg = svptrue_b32();
 147     return { svld1_f32(pg, m) };
 148 }
 149
 150 static inline void gmx_simdcall storeU(float* m, SimdFloat a)
 151 {
 152     svbool_t pg = svptrue_b32();
 153     svst1_f32(pg, m, a.simdInternal_);
 154 }
 155
 156 static inline SimdFloat gmx_simdcall setZeroF()
 157 {
 158     return { svdup_f32(0.0f) };
 159 }
 160
 161 static inline void gmx_simdcall simdIncr(SimdFloat*& p, SimdFloatTag)
 162 {
 163     p = reinterpret_cast<SimdFloat*>(reinterpret_cast<uint64_t>(p) + svcntw());
 164 }
 165
 166 static inline SimdFInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdFInt32Tag)
 167 {
 168     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 169     svbool_t pg = svptrue_b32();
 170     return { svld1_s32(pg, m) };
 171 }
 172
 173 static inline void gmx_simdcall store(std::int32_t* m, SimdFInt32 a)
 174 {
 175     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 176     svbool_t pg = svptrue_b32();
 177     svst1_s32(pg, m, a.simdInternal_);
 178 }
 179
 180 static inline SimdFInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdFInt32Tag)
 181 {
 182     svbool_t pg = svptrue_b32();
 183     return { svld1_s32(pg, m) };
 184 }
 185
 186 static inline void gmx_simdcall storeU(std::int32_t* m, SimdFInt32 a)
 187 {
 188     svbool_t pg = svptrue_b32();
 189     svst1_s32(pg, m, a.simdInternal_);
 190 }
 191
 192 static inline SimdFInt32 gmx_simdcall setZeroFI()
 193 {
 194     return { svdup_s32(0) };
 195 }
 196
 197 template<int index>
 198 gmx_simdcall static inline std::int32_t extract(SimdFInt32 a)
 199 {
 200     svbool_t pg = svwhilelt_b32(0, index);
 201     return svlasta_s32(pg, a.simdInternal_);
 202 }
 203
 204 template<int index>
 205 gmx_simdcall static inline float extract(SimdFloat a)
 206 {
 207     svbool_t pg = svwhilelt_b32(0, index);
 208     return svlasta_f32(pg, a.simdInternal_);
 209 }
 210
 211 static inline SimdFloat gmx_simdcall operator&(SimdFloat a, SimdFloat b)
 212 {
 213     svbool_t pg = svptrue_b32();
 214     return { svreinterpret_f32_s32(svand_s32_x(pg, svreinterpret_s32_f32(a.simdInternal_),
 215                                                svreinterpret_s32_f32(b.simdInternal_))) };
 216 }
 217
 218 static inline SimdFloat gmx_simdcall andNot(SimdFloat a, SimdFloat b)
 219 {
 220     svbool_t pg = svptrue_b32();
 221     return { svreinterpret_f32_s32(svbic_s32_x(pg, svreinterpret_s32_f32(b.simdInternal_),
 222                                                svreinterpret_s32_f32(a.simdInternal_))) };
 223 }
 224
 225 static inline SimdFloat gmx_simdcall operator|(SimdFloat a, SimdFloat b)
 226 {
 227     svbool_t pg = svptrue_b32();
 228     return { svreinterpret_f32_s32(svorr_s32_x(pg, svreinterpret_s32_f32(a.simdInternal_),
 229                                                svreinterpret_s32_f32(b.simdInternal_))) };
 230 }
 231
 232 static inline SimdFloat gmx_simdcall operator^(SimdFloat a, SimdFloat b)
 233 {
 234     svbool_t pg = svptrue_b32();
 235     return { svreinterpret_f32_s32(sveor_s32_x(pg, svreinterpret_s32_f32(a.simdInternal_),
 236                                                svreinterpret_s32_f32(b.simdInternal_))) };
 237 }
 238
 239 static inline SimdFloat gmx_simdcall operator+(SimdFloat a, SimdFloat b)
 240 {
 241     svbool_t pg = svptrue_b32();
 242     return { svadd_f32_x(pg, a.simdInternal_, b.simdInternal_) };
 243 }
 244
 245 static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b)
 246 {
 247     svbool_t pg = svptrue_b32();
 248     return { svsub_f32_x(pg, a.simdInternal_, b.simdInternal_) };
 249 }
 250
 251 static inline SimdFloat gmx_simdcall operator-(SimdFloat a)
 252 {
 253     svbool_t pg = svptrue_b32();
 254     return { svneg_f32_x(pg, a.simdInternal_) };
 255 }
 256
 257 static inline SimdFloat gmx_simdcall operator*(SimdFloat a, SimdFloat b)
 258 {
 259     svbool_t pg = svptrue_b32();
 260     return { svmul_f32_x(pg, a.simdInternal_, b.simdInternal_) };
 261 }
 262
 263 static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c)
 264 {
 265     svbool_t pg = svptrue_b32();
 266     return { svmad_f32_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 267 }
 268
 269 static inline SimdFloat gmx_simdcall fms(SimdFloat a, SimdFloat b, SimdFloat c)
 270 {
 271     svbool_t pg = svptrue_b32();
 272     return { svnmsb_f32_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 273 }
 274
 275 static inline SimdFloat gmx_simdcall fnma(SimdFloat a, SimdFloat b, SimdFloat c)
 276 {
 277     svbool_t pg = svptrue_b32();
 278     return { svmsb_f32_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 279 }
 280
 281 static inline SimdFloat gmx_simdcall fnms(SimdFloat a, SimdFloat b, SimdFloat c)
 282 {
 283     svbool_t pg = svptrue_b32();
 284     return { svnmad_f32_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 285 }
 286
 287 static inline SimdFloat gmx_simdcall rsqrt(SimdFloat x)
 288 {
 289     return { svrsqrte_f32(x.simdInternal_) };
 290 }
 291
 292 // The SIMD implementation seems to overflow when we square lu for
 293 // values close to FLOAT_MAX, so we fall back on the version in
 294 // simd_math.h, which is probably slightly slower.
 295 #if GMX_SIMD_HAVE_NATIVE_RSQRT_ITER_FLOAT
 296 static inline SimdFloat gmx_simdcall rsqrtIter(SimdFloat lu, SimdFloat x)
 297 {
 298     svbool_t    pg = svptrue_b32();
 299     svfloat32_t tmp1, tmp2;
 300     tmp1 = svmul_f32_x(pg, x.simdInternal_, lu.simdInternal_);
 301     tmp2 = svmul_n_f32_x(pg, lu.simdInternal_, -0.5f);
 302     tmp1 = svmad_n_f32_x(pg, tmp1, lu.simdInternal_, -3.0f);
 303     return { svmul_f32_x(pg, tmp1, tmp2) };
 304 }
 305
 306 #endif
 307
 308 static inline SimdFloat gmx_simdcall rcp(SimdFloat x)
 309 {
 310     return { svrecpe_f32(x.simdInternal_) };
 311 }
 312
 313 static inline SimdFloat gmx_simdcall rcpIter(SimdFloat lu, SimdFloat x)
 314 {
 315     svbool_t pg = svptrue_b32();
 316     return { svmul_f32_x(pg, lu.simdInternal_, svrecps_f32(lu.simdInternal_, x.simdInternal_)) };
 317 }
 318
 319 static inline SimdFloat gmx_simdcall maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
 320 {
 321     svbool_t pg = svcmpne_n_u32(svptrue_b32(), m.simdInternal_, 0);
 322     return { svadd_f32_m(pg, a.simdInternal_, b.simdInternal_) };
 323 }
 324
 325 static inline SimdFloat gmx_simdcall maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
 326 {
 327     svbool_t pg = svcmpne_n_u32(svptrue_b32(), m.simdInternal_, 0);
 328     return { svmul_f32_z(pg, a.simdInternal_, b.simdInternal_) };
 329 }
 330
 331 static inline SimdFloat gmx_simdcall maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
 332 {
 333     svbool_t pg = svcmpne_n_u32(svptrue_b32(), m.simdInternal_, 0);
 334     return { svmad_f32_z(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 335 }
 336
 337 static inline SimdFloat gmx_simdcall maskzRsqrt(SimdFloat x, SimdFBool m)
 338 {
 339     svbool_t pg = svcmpne_n_u32(svptrue_b32(), m.simdInternal_, 0);
 340     // The result will always be correct since we mask the result with m, but
 341     // for debug builds we also want to make sure not to generate FP exceptions
 342 #ifndef NDEBUG
 343     x.simdInternal_ = svsel_f32(pg, x.simdInternal_, svdup_n_f32(1.0f));
 344 #endif
 345     return { svreinterpret_f32_u32(
 346             svand_n_u32_z(pg, svreinterpret_u32_f32(svrsqrte_f32(x.simdInternal_)), 0xFFFFFFFF)) };
 347 }
 348
 349 static inline SimdFloat gmx_simdcall maskzRcp(SimdFloat x, SimdFBool m)
 350 {
 351     svbool_t pg = svcmpne_n_u32(svptrue_b32(), m.simdInternal_, 0);
 352     // The result will always be correct since we mask the result with m, but
 353     // for debug builds we also want to make sure not to generate FP exceptions
 354 #ifndef NDEBUG
 355     x.simdInternal_ = svsel_f32(pg, x.simdInternal_, svdup_n_f32(1.0f));
 356 #endif
 357     return { svreinterpret_f32_u32(
 358             svand_n_u32_z(pg, svreinterpret_u32_f32(svrecpe_f32(x.simdInternal_)), 0xFFFFFFFF)) };
 359 }
 360
 361 static inline SimdFloat gmx_simdcall abs(SimdFloat x)
 362 {
 363     svbool_t pg = svptrue_b32();
 364     return { svabs_f32_x(pg, x.simdInternal_) };
 365 }
 366
 367 static inline SimdFloat gmx_simdcall max(SimdFloat a, SimdFloat b)
 368 {
 369     svbool_t pg = svptrue_b32();
 370     return { svmax_f32_x(pg, a.simdInternal_, b.simdInternal_) };
 371 }
 372
 373 static inline SimdFloat gmx_simdcall min(SimdFloat a, SimdFloat b)
 374 {
 375     svbool_t pg = svptrue_b32();
 376     return { svmin_f32_x(pg, a.simdInternal_, b.simdInternal_) };
 377 }
 378
 379 // Round and trunc operations are defined at the end of this file, since they
 380 // need to use float-to-integer and integer-to-float conversions.
 381
 382 template<MathOptimization opt = MathOptimization::Safe>
 383 static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent)
 384 {
 385     svbool_t        pg           = svptrue_b32();
 386     const svint32_t exponentMask = svdup_n_s32(0x7F800000);
 387     const svint32_t mantissaMask = svdup_n_s32(0x807FFFFF);
 388     const svint32_t exponentBias = svdup_n_s32(126); // add 1 to make our definition identical to frexp()
 389     const svfloat32_t half = svdup_n_f32(0.5f);
 390     svint32_t         iExponent;
 391
 392     iExponent = svand_s32_x(pg, svreinterpret_s32_f32(value.simdInternal_), exponentMask);
 393     iExponent = svsub_s32_x(
 394             pg, svreinterpret_s32_u32(svlsr_n_u32_x(pg, svreinterpret_u32_s32(iExponent), 23)), exponentBias);
 395     exponent->simdInternal_ = iExponent;
 396
 397     return { svreinterpret_f32_s32(svorr_s32_x(
 398             pg, svand_s32_x(pg, svreinterpret_s32_f32(value.simdInternal_), mantissaMask),
 399             svreinterpret_s32_f32(half))) };
 400 }
 401
 402 template<MathOptimization opt = MathOptimization::Safe>
 403 static inline SimdFloat gmx_simdcall ldexp(SimdFloat value, SimdFInt32 exponent)
 404 {
 405     svbool_t        pg           = svptrue_b32();
 406     const svint32_t exponentBias = svdup_n_s32(127);
 407     svint32_t       iExponent    = svadd_s32_x(pg, exponent.simdInternal_, exponentBias);
 408
 409     if (opt == MathOptimization::Safe)
 410     {
 411         // Make sure biased argument is not negative
 412         iExponent = svmax_n_s32_x(pg, iExponent, 0);
 413     }
 414
 415     iExponent = svlsl_n_s32_x(pg, iExponent, 23);
 416
 417     return { svmul_f32_x(pg, value.simdInternal_, svreinterpret_f32_s32(iExponent)) };
 418 }
 419
 420 static inline float gmx_simdcall reduce(SimdFloat a)
 421 {
 422     svbool_t pg = svptrue_b32();
 423     return svaddv_f32(pg, a.simdInternal_);
 424 }
 425
 426 static inline SimdFBool gmx_simdcall operator==(SimdFloat a, SimdFloat b)
 427 {
 428     svbool_t pg = svptrue_b32();
 429     return { svcmpeq_f32(pg, a.simdInternal_, b.simdInternal_) };
 430 }
 431
 432 static inline SimdFBool gmx_simdcall operator!=(SimdFloat a, SimdFloat b)
 433 {
 434     svbool_t pg = svptrue_b32();
 435     return { svcmpne_f32(pg, a.simdInternal_, b.simdInternal_) };
 436 }
 437
 438 static inline SimdFBool gmx_simdcall operator<(SimdFloat a, SimdFloat b)
 439 {
 440     svbool_t pg = svptrue_b32();
 441     return { svcmplt_f32(pg, a.simdInternal_, b.simdInternal_) };
 442 }
 443
 444 static inline SimdFBool gmx_simdcall operator<=(SimdFloat a, SimdFloat b)
 445 {
 446     svbool_t pg = svptrue_b32();
 447     return { svcmple_f32(pg, a.simdInternal_, b.simdInternal_) };
 448 }
 449
 450 static inline SimdFBool gmx_simdcall testBits(SimdFloat a)
 451 {
 452     svbool_t pg = svptrue_b32();
 453     return { svcmpne_n_s32(pg, svreinterpret_s32_f32(a.simdInternal_), 0) };
 454 }
 455
 456 static inline SimdFBool gmx_simdcall operator&&(SimdFBool a, SimdFBool b)
 457 {
 458     svbool_t pg = svptrue_b32();
 459     return { svand_u32_x(pg, a.simdInternal_, b.simdInternal_) };
 460 }
 461
 462 static inline SimdFBool gmx_simdcall operator||(SimdFBool a, SimdFBool b)
 463 {
 464     svbool_t pg = svptrue_b32();
 465     return { svorr_u32_x(pg, a.simdInternal_, b.simdInternal_) };
 466 }
 467
 468 static inline bool gmx_simdcall anyTrue(SimdFBool a)
 469 {
 470     svbool_t pg = svptrue_b32();
 471     return svptest_any(pg, svcmpne_n_u32(svptrue_b32(), a.simdInternal_, 0));
 472 }
 473
 474 static inline bool gmx_simdcall extractFirst(SimdFBool a)
 475 {
 476     svbool_t pg = svptrue_b32();
 477     return svptest_first(pg, svcmpne_n_u32(svptrue_b32(), a.simdInternal_, 0));
 478 }
 479
 480 static inline SimdFloat gmx_simdcall selectByMask(SimdFloat a, SimdFBool m)
 481 {
 482     svbool_t pg = svptrue_b32();
 483     return { svreinterpret_f32_u32(svand_u32_x(pg, svreinterpret_u32_f32(a.simdInternal_), m.simdInternal_)) };
 484 }
 485
 486 static inline SimdFloat gmx_simdcall selectByNotMask(SimdFloat a, SimdFBool m)
 487 {
 488     svbool_t pg = svcmpeq_n_u32(svptrue_b32(), m.simdInternal_, 0);
 489     return { svsel_f32(pg, a.simdInternal_, svdup_f32(0.0f)) };
 490 }
 491
 492 static inline SimdFloat gmx_simdcall blend(SimdFloat a, SimdFloat b, SimdFBool sel)
 493 {
 494     svbool_t pg = svcmpne_n_u32(svptrue_b32(), sel.simdInternal_, 0);
 495     return { svsel_f32(pg, b.simdInternal_, a.simdInternal_) };
 496 }
 497
 498 static inline SimdFInt32 gmx_simdcall operator&(SimdFInt32 a, SimdFInt32 b)
 499 {
 500     svbool_t pg = svptrue_b32();
 501     return { svand_s32_x(pg, a.simdInternal_, b.simdInternal_) };
 502 }
 503
 504 static inline SimdFInt32 gmx_simdcall andNot(SimdFInt32 a, SimdFInt32 b)
 505 {
 506     svbool_t pg = svptrue_b32();
 507     return { svbic_s32_x(pg, b.simdInternal_, a.simdInternal_) };
 508 }
 509
 510 static inline SimdFInt32 gmx_simdcall operator|(SimdFInt32 a, SimdFInt32 b)
 511 {
 512     svbool_t pg = svptrue_b32();
 513     return { svorr_s32_x(pg, a.simdInternal_, b.simdInternal_) };
 514 }
 515
 516 static inline SimdFInt32 gmx_simdcall operator^(SimdFInt32 a, SimdFInt32 b)
 517 {
 518     svbool_t pg = svptrue_b32();
 519     return { sveor_s32_x(pg, a.simdInternal_, b.simdInternal_) };
 520 }
 521
 522 static inline SimdFInt32 gmx_simdcall operator+(SimdFInt32 a, SimdFInt32 b)
 523 {
 524     svbool_t pg = svptrue_b32();
 525     return { svadd_s32_x(pg, a.simdInternal_, b.simdInternal_) };
 526 }
 527
 528 static inline SimdFInt32 gmx_simdcall operator-(SimdFInt32 a, SimdFInt32 b)
 529 {
 530     svbool_t pg = svptrue_b32();
 531     return { svsub_s32_x(pg, a.simdInternal_, b.simdInternal_) };
 532 }
 533
 534 static inline SimdFInt32 gmx_simdcall operator*(SimdFInt32 a, SimdFInt32 b)
 535 {
 536     svbool_t pg = svptrue_b32();
 537     return { svmul_s32_x(pg, a.simdInternal_, b.simdInternal_) };
 538 }
 539
 540 static inline SimdFIBool gmx_simdcall operator==(SimdFInt32 a, SimdFInt32 b)
 541 {
 542     svbool_t pg = svptrue_b32();
 543     return { svcmpeq_s32(pg, a.simdInternal_, b.simdInternal_) };
 544 }
 545
 546 static inline SimdFIBool gmx_simdcall testBits(SimdFInt32 a)
 547 {
 548     svbool_t pg = svptrue_b32();
 549     return { svcmpne_n_s32(pg, a.simdInternal_, (int32_t)0) };
 550 }
 551
 552 static inline SimdFIBool gmx_simdcall operator<(SimdFInt32 a, SimdFInt32 b)
 553 {
 554     svbool_t pg = svptrue_b32();
 555     return { svcmplt_s32(pg, a.simdInternal_, b.simdInternal_) };
 556 }
 557
 558 static inline SimdFIBool gmx_simdcall operator&&(SimdFIBool a, SimdFIBool b)
 559 {
 560     svbool_t pg = svptrue_b32();
 561     return { svand_u32_x(pg, a.simdInternal_, b.simdInternal_) };
 562 }
 563
 564 static inline SimdFIBool gmx_simdcall operator||(SimdFIBool a, SimdFIBool b)
 565 {
 566     svbool_t pg = svptrue_b32();
 567     return { svorr_u32_x(pg, a.simdInternal_, b.simdInternal_) };
 568 }
 569
 570 static inline bool gmx_simdcall anyTrue(SimdFIBool a)
 571 {
 572     svbool_t pg = svptrue_b32();
 573     return svptest_any(pg, svcmpne_n_u32(pg, a.simdInternal_, 0));
 574 }
 575
 576 static inline SimdFInt32 gmx_simdcall selectByMask(SimdFInt32 a, SimdFIBool m)
 577 {
 578     svbool_t pg = svptrue_b32();
 579     return { svand_s32_x(pg, a.simdInternal_, svreinterpret_s32_u32(m.simdInternal_)) };
 580 }
 581
 582 static inline SimdFInt32 gmx_simdcall selectByNotMask(SimdFInt32 a, SimdFIBool m)
 583 {
 584     svbool_t pg = svcmpeq_n_u32(svptrue_b32(), m.simdInternal_, 0);
 585     return { svadd_n_s32_z(pg, a.simdInternal_, 0) };
 586 }
 587
 588 static inline SimdFInt32 gmx_simdcall blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
 589 {
 590     svbool_t pg = svcmpne_n_u32(svptrue_b32(), sel.simdInternal_, 0);
 591     return { svsel_s32(pg, b.simdInternal_, a.simdInternal_) };
 592 }
 593
 594 static inline SimdFInt32 gmx_simdcall cvtR2I(SimdFloat a)
 595 {
 596     svbool_t pg = svptrue_b32();
 597     return { svcvt_s32_x(pg, svrinta_f32_x(pg, a.simdInternal_)) };
 598 }
 599
 600 static inline SimdFInt32 gmx_simdcall cvttR2I(SimdFloat a)
 601 {
 602     svbool_t pg = svptrue_b32();
 603     return { svcvt_s32_x(pg, a.simdInternal_) };
 604 }
 605
 606 static inline SimdFloat gmx_simdcall cvtI2R(SimdFInt32 a)
 607 {
 608     svbool_t pg = svptrue_b32();
 609     return { svcvt_f32_x(pg, a.simdInternal_) };
 610 }
 611
 612 static inline SimdFIBool gmx_simdcall cvtB2IB(SimdFBool a)
 613 {
 614     return { a.simdInternal_ };
 615 }
 616
 617 static inline SimdFBool gmx_simdcall cvtIB2B(SimdFIBool a)
 618 {
 619     return { a.simdInternal_ };
 620 }
 621
 622 static inline SimdFloat gmx_simdcall round(SimdFloat x)
 623 {
 624     svbool_t pg = svptrue_b32();
 625     return { svrinta_f32_x(pg, x.simdInternal_) };
 626 }
 627
 628 static inline SimdFloat gmx_simdcall trunc(SimdFloat x)
 629 {
 630     return cvtI2R(cvttR2I(x));
 631 }
 632
 633 } // namespace gmx
 634
 635 #endif // GMX_SIMD_IMPL_ARM_SVE_SIMD_FLOAT_H