src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36
  37 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
  38 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
  39
  40 #include "config.h"
  41
  42 #include "gromacs/math/utilities.h"
  43 #include "gromacs/utility/basedefinitions.h"
  44
  45 #include "impl_ibm_vsx_definitions.h"
  46
  47 namespace gmx
  48 {
  49
  50 class SimdFloat
  51 {
  52 public:
  53     SimdFloat() {}
  54
  55     // gcc-4.9 does not recognize that we use the parameter
  56     SimdFloat(float gmx_unused f) : simdInternal_(vec_splats(f)) {}
  57
  58     // Internal utility constructor to simplify return statements
  59     SimdFloat(__vector float simd) : simdInternal_(simd) {}
  60
  61     __vector float simdInternal_;
  62 };
  63
  64 class SimdFInt32
  65 {
  66 public:
  67     SimdFInt32() {}
  68
  69     // gcc-4.9 does not recognize that we use the parameter
  70     SimdFInt32(std::int32_t gmx_unused i) : simdInternal_(vec_splats(i)) {}
  71
  72     // Internal utility constructor to simplify return statements
  73     SimdFInt32(__vector signed int simd) : simdInternal_(simd) {}
  74
  75     __vector signed int simdInternal_;
  76 };
  77
  78 class SimdFBool
  79 {
  80 public:
  81     SimdFBool() {}
  82
  83     SimdFBool(bool b) :
  84         simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats(b ? 0xFFFFFFFF : 0)))
  85     {
  86     }
  87
  88     // Internal utility constructor to simplify return statements
  89     SimdFBool(__vector vsxBool int simd) : simdInternal_(simd) {}
  90
  91     __vector vsxBool int simdInternal_;
  92 };
  93
  94 class SimdFIBool
  95 {
  96 public:
  97     SimdFIBool() {}
  98
  99     SimdFIBool(bool b) :
 100         simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats(b ? 0xFFFFFFFF : 0)))
 101     {
 102     }
 103
 104     // Internal utility constructor to simplify return statements
 105     SimdFIBool(__vector vsxBool int simd) : simdInternal_(simd) {}
 106
 107     __vector vsxBool int simdInternal_;
 108 };
 109
 110 // Note that the interfaces we use here have been a mess in xlc;
 111 // currently version 13.1.5 is required.
 112
 113 static inline SimdFloat gmx_simdcall simdLoad(const float* m, SimdFloatTag = {})
 114 {
 115     return { *reinterpret_cast<const __vector float*>(m) };
 116 }
 117
 118 static inline void gmx_simdcall store(float* m, SimdFloat a)
 119 {
 120     *reinterpret_cast<__vector float*>(m) = a.simdInternal_;
 121 }
 122
 123 static inline SimdFloat gmx_simdcall simdLoadU(const float* m, SimdFloatTag = {})
 124 {
 125     return
 126     {
 127 #if __GNUC__ < 7
 128         *reinterpret_cast<const __vector float*>(m)
 129 #else
 130         vec_xl(0, m)
 131 #endif
 132     };
 133 }
 134
 135 static inline void gmx_simdcall storeU(float* m, SimdFloat a)
 136 {
 137 #if __GNUC__ < 7
 138     *reinterpret_cast<__vector float*>(m) = a.simdInternal_;
 139 #else
 140     vec_xst(a.simdInternal_, 0, m);
 141 #endif
 142 }
 143
 144 static inline SimdFloat gmx_simdcall setZeroF()
 145 {
 146     return { vec_splats(0.0F) };
 147 }
 148
 149 static inline SimdFInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdFInt32Tag)
 150 {
 151     return { *reinterpret_cast<const __vector int*>(m) };
 152 }
 153
 154 static inline void gmx_simdcall store(std::int32_t* m, SimdFInt32 a)
 155 {
 156     *reinterpret_cast<__vector int*>(m) = a.simdInternal_;
 157 }
 158
 159 static inline SimdFInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdFInt32Tag)
 160 {
 161     return
 162     {
 163 #if __GNUC__ < 7
 164         *reinterpret_cast<const __vector int*>(m)
 165 #else
 166         vec_xl(0, m)
 167 #endif
 168     };
 169 }
 170
 171 static inline void gmx_simdcall storeU(std::int32_t* m, SimdFInt32 a)
 172 {
 173 #if __GNUC__ < 7
 174     *reinterpret_cast<__vector int*>(m) = a.simdInternal_;
 175 #else
 176     vec_xst(a.simdInternal_, 0, m);
 177 #endif
 178 }
 179
 180 static inline SimdFInt32 gmx_simdcall setZeroFI()
 181 {
 182     return { vec_splats(static_cast<int>(0)) };
 183 }
 184
 185 // gcc-4.9 does not detect that vec_extract() uses its argument
 186 template<int index>
 187 static inline std::int32_t gmx_simdcall extract(SimdFInt32 gmx_unused a)
 188 {
 189     return vec_extract(a.simdInternal_, index);
 190 }
 191
 192 static inline SimdFloat gmx_simdcall operator&(SimdFloat a, SimdFloat b)
 193 {
 194     return { vec_and(a.simdInternal_, b.simdInternal_) };
 195 }
 196
 197 static inline SimdFloat gmx_simdcall andNot(SimdFloat a, SimdFloat b)
 198 {
 199     return { vec_andc(b.simdInternal_, a.simdInternal_) };
 200 }
 201
 202 static inline SimdFloat gmx_simdcall operator|(SimdFloat a, SimdFloat b)
 203 {
 204     return { vec_or(a.simdInternal_, b.simdInternal_) };
 205 }
 206
 207 static inline SimdFloat gmx_simdcall operator^(SimdFloat a, SimdFloat b)
 208 {
 209     return { vec_xor(a.simdInternal_, b.simdInternal_) };
 210 }
 211
 212 static inline SimdFloat gmx_simdcall operator+(SimdFloat a, SimdFloat b)
 213 {
 214     return { vec_add(a.simdInternal_, b.simdInternal_) };
 215 }
 216
 217 static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b)
 218 {
 219     return { vec_sub(a.simdInternal_, b.simdInternal_) };
 220 }
 221
 222 static inline SimdFloat gmx_simdcall operator-(SimdFloat x)
 223 {
 224     return { -x.simdInternal_ };
 225 }
 226
 227 static inline SimdFloat gmx_simdcall operator*(SimdFloat a, SimdFloat b)
 228 {
 229     return { vec_mul(a.simdInternal_, b.simdInternal_) };
 230 }
 231
 232 static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c)
 233 {
 234     return { vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 235 }
 236
 237 static inline SimdFloat gmx_simdcall fms(SimdFloat a, SimdFloat b, SimdFloat c)
 238 {
 239     return { vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 240 }
 241
 242 static inline SimdFloat gmx_simdcall fnma(SimdFloat a, SimdFloat b, SimdFloat c)
 243 {
 244     return { vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 245 }
 246
 247 static inline SimdFloat gmx_simdcall fnms(SimdFloat a, SimdFloat b, SimdFloat c)
 248 {
 249     return { vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 250 }
 251
 252 static inline SimdFloat gmx_simdcall rsqrt(SimdFloat x)
 253 {
 254     return { vec_rsqrte(x.simdInternal_) };
 255 }
 256
 257 static inline SimdFloat gmx_simdcall rcp(SimdFloat x)
 258 {
 259     return { vec_re(x.simdInternal_) };
 260 }
 261
 262 static inline SimdFloat gmx_simdcall maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
 263 {
 264     return { vec_add(a.simdInternal_,
 265                      vec_and(b.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))) };
 266 }
 267
 268 static inline SimdFloat gmx_simdcall maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
 269 {
 270     SimdFloat prod = a * b;
 271
 272     return { vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 273 }
 274
 275 static inline SimdFloat gmx_simdcall maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
 276 {
 277     SimdFloat prod = fma(a, b, c);
 278
 279     return { vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 280 }
 281
 282 static inline SimdFloat gmx_simdcall maskzRsqrt(SimdFloat x, SimdFBool m)
 283 {
 284 #ifndef NDEBUG
 285     x.simdInternal_ = vec_sel(vec_splats(1.0F), x.simdInternal_, m.simdInternal_);
 286 #endif
 287     return { vec_and(vec_rsqrte(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_)) };
 288 }
 289
 290 static inline SimdFloat gmx_simdcall maskzRcp(SimdFloat x, SimdFBool m)
 291 {
 292 #ifndef NDEBUG
 293     x.simdInternal_ = vec_sel(vec_splats(1.0F), x.simdInternal_, m.simdInternal_);
 294 #endif
 295     return { vec_and(vec_re(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_)) };
 296 }
 297
 298 static inline SimdFloat gmx_simdcall abs(SimdFloat x)
 299 {
 300     return { vec_abs(x.simdInternal_) };
 301 }
 302
 303 static inline SimdFloat gmx_simdcall max(SimdFloat a, SimdFloat b)
 304 {
 305     return { vec_max(a.simdInternal_, b.simdInternal_) };
 306 }
 307
 308 static inline SimdFloat gmx_simdcall min(SimdFloat a, SimdFloat b)
 309 {
 310     return { vec_min(a.simdInternal_, b.simdInternal_) };
 311 }
 312
 313 static inline SimdFloat gmx_simdcall round(SimdFloat x)
 314 {
 315     return { vec_round(x.simdInternal_) };
 316 }
 317
 318 static inline SimdFloat gmx_simdcall trunc(SimdFloat x)
 319 {
 320     return { vec_trunc(x.simdInternal_) };
 321 }
 322
 323 template<MathOptimization opt = MathOptimization::Safe>
 324 static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent)
 325 {
 326     const __vector float exponentMask = reinterpret_cast<__vector float>(vec_splats(0x7F800000U));
 327     const __vector signed int exponentBias = vec_splats(126);
 328     const __vector float      half         = vec_splats(0.5F);
 329     __vector signed int       iExponent;
 330
 331     __vector vsxBool int valueIsZero =
 332             vec_cmpeq(value.simdInternal_, reinterpret_cast<__vector float>(vec_splats(0.0)));
 333
 334     iExponent = reinterpret_cast<__vector signed int>(vec_and(value.simdInternal_, exponentMask));
 335     iExponent = vec_sub(vec_sr(iExponent, vec_splats(23U)), exponentBias);
 336     iExponent = vec_andc(iExponent, reinterpret_cast<__vector int>(valueIsZero));
 337
 338     __vector float result = vec_or(vec_andc(value.simdInternal_, exponentMask), half);
 339     result                = vec_sel(result, value.simdInternal_, valueIsZero);
 340
 341     exponent->simdInternal_ = iExponent;
 342
 343     return { result };
 344 }
 345
 346 template<MathOptimization opt = MathOptimization::Safe>
 347 static inline SimdFloat gmx_simdcall ldexp(SimdFloat value, SimdFInt32 exponent)
 348 {
 349     const __vector signed int exponentBias = vec_splats(127);
 350     __vector signed int       iExponent;
 351
 352     iExponent = vec_add(exponent.simdInternal_, exponentBias);
 353
 354     if (opt == MathOptimization::Safe)
 355     {
 356         // Make sure biased argument is not negative
 357         iExponent = vec_max(iExponent, vec_splat_s32(0));
 358     }
 359
 360     iExponent = vec_sl(iExponent, vec_splats(23U));
 361
 362     return { vec_mul(value.simdInternal_, reinterpret_cast<__vector float>(iExponent)) };
 363 }
 364
 365 static inline float gmx_simdcall reduce(SimdFloat x)
 366 {
 367     const __vector unsigned char perm1 = { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
 368     const __vector unsigned char perm2 = { 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3 };
 369
 370     x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm1));
 371     x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm2));
 372     return vec_extract(x.simdInternal_, 0);
 373 }
 374
 375 static inline SimdFBool gmx_simdcall operator==(SimdFloat a, SimdFloat b)
 376 {
 377     return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
 378 }
 379
 380 static inline SimdFBool gmx_simdcall operator!=(SimdFloat a, SimdFloat b)
 381 {
 382     return { vec_or(vec_cmpgt(a.simdInternal_, b.simdInternal_),
 383                     vec_cmplt(a.simdInternal_, b.simdInternal_)) };
 384 }
 385
 386 static inline SimdFBool gmx_simdcall operator<(SimdFloat a, SimdFloat b)
 387 {
 388     return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
 389 }
 390
 391 static inline SimdFBool gmx_simdcall operator<=(SimdFloat a, SimdFloat b)
 392 {
 393     return { vec_cmple(a.simdInternal_, b.simdInternal_) };
 394 }
 395
 396 static inline SimdFBool gmx_simdcall testBits(SimdFloat a)
 397 {
 398     return { vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U)) };
 399 }
 400
 401 static inline SimdFBool gmx_simdcall operator&&(SimdFBool a, SimdFBool b)
 402 {
 403     return { vec_and(a.simdInternal_, b.simdInternal_) };
 404 }
 405
 406 static inline SimdFBool gmx_simdcall operator||(SimdFBool a, SimdFBool b)
 407 {
 408     return { vec_or(a.simdInternal_, b.simdInternal_) };
 409 }
 410
 411 static inline bool gmx_simdcall anyTrue(SimdFBool a)
 412 {
 413     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 414 }
 415
 416 static inline SimdFloat gmx_simdcall selectByMask(SimdFloat a, SimdFBool m)
 417 {
 418     return { vec_and(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 419 }
 420
 421 static inline SimdFloat gmx_simdcall selectByNotMask(SimdFloat a, SimdFBool m)
 422 {
 423     return { vec_andc(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 424 }
 425
 426 static inline SimdFloat gmx_simdcall blend(SimdFloat a, SimdFloat b, SimdFBool sel)
 427 {
 428     return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
 429 }
 430
 431 static inline SimdFInt32 gmx_simdcall operator&(SimdFInt32 a, SimdFInt32 b)
 432 {
 433     return { vec_and(a.simdInternal_, b.simdInternal_) };
 434 }
 435
 436 static inline SimdFInt32 gmx_simdcall andNot(SimdFInt32 a, SimdFInt32 b)
 437 {
 438     return { vec_andc(b.simdInternal_, a.simdInternal_) };
 439 }
 440
 441 static inline SimdFInt32 gmx_simdcall operator|(SimdFInt32 a, SimdFInt32 b)
 442 {
 443     return { vec_or(a.simdInternal_, b.simdInternal_) };
 444 }
 445
 446 static inline SimdFInt32 gmx_simdcall operator^(SimdFInt32 a, SimdFInt32 b)
 447 {
 448     return { vec_xor(a.simdInternal_, b.simdInternal_) };
 449 }
 450
 451 static inline SimdFInt32 gmx_simdcall operator+(SimdFInt32 a, SimdFInt32 b)
 452 {
 453     return { vec_add(a.simdInternal_, b.simdInternal_) };
 454 }
 455
 456 static inline SimdFInt32 gmx_simdcall operator-(SimdFInt32 a, SimdFInt32 b)
 457 {
 458     return { vec_sub(a.simdInternal_, b.simdInternal_) };
 459 }
 460
 461 static inline SimdFInt32 gmx_simdcall operator*(SimdFInt32 a, SimdFInt32 b)
 462 {
 463     return { a.simdInternal_ * b.simdInternal_ };
 464 }
 465
 466 static inline SimdFIBool gmx_simdcall operator==(SimdFInt32 a, SimdFInt32 b)
 467 {
 468     return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
 469 }
 470
 471 static inline SimdFIBool gmx_simdcall testBits(SimdFInt32 a)
 472 {
 473     return { vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U)) };
 474 }
 475
 476 static inline SimdFIBool gmx_simdcall operator<(SimdFInt32 a, SimdFInt32 b)
 477 {
 478     return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
 479 }
 480
 481 static inline SimdFIBool gmx_simdcall operator&&(SimdFIBool a, SimdFIBool b)
 482 {
 483     return { vec_and(a.simdInternal_, b.simdInternal_) };
 484 }
 485
 486 static inline SimdFIBool gmx_simdcall operator||(SimdFIBool a, SimdFIBool b)
 487 {
 488     return { vec_or(a.simdInternal_, b.simdInternal_) };
 489 }
 490
 491 static inline bool gmx_simdcall anyTrue(SimdFIBool a)
 492 {
 493     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 494 }
 495
 496 static inline SimdFInt32 gmx_simdcall selectByMask(SimdFInt32 a, SimdFIBool m)
 497 {
 498     return { vec_and(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
 499 }
 500
 501 static inline SimdFInt32 gmx_simdcall selectByNotMask(SimdFInt32 a, SimdFIBool m)
 502 {
 503     return { vec_andc(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
 504 }
 505
 506 static inline SimdFInt32 gmx_simdcall blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
 507 {
 508     return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
 509 }
 510
 511 static inline SimdFInt32 gmx_simdcall cvtR2I(SimdFloat a)
 512 {
 513     return { vec_cts(vec_round(a.simdInternal_), 0) };
 514 }
 515
 516 static inline SimdFInt32 gmx_simdcall cvttR2I(SimdFloat a)
 517 {
 518     return { vec_cts(a.simdInternal_, 0) };
 519 }
 520
 521 static inline SimdFloat gmx_simdcall cvtI2R(SimdFInt32 a)
 522 {
 523     return { vec_ctf(a.simdInternal_, 0) };
 524 }
 525
 526 static inline SimdFIBool gmx_simdcall cvtB2IB(SimdFBool a)
 527 {
 528     return { a.simdInternal_ };
 529 }
 530
 531 static inline SimdFBool gmx_simdcall cvtIB2B(SimdFIBool a)
 532 {
 533     return { a.simdInternal_ };
 534 }
 535
 536 static inline SimdFloat gmx_simdcall copysign(SimdFloat x, SimdFloat y)
 537 {
 538 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 539     __vector float res;
 540     __asm__("xvcpsgnsp %x0,%x1,%x2" : "=wf"(res) : "wf"(y.simdInternal_), "wf"(x.simdInternal_));
 541     return { res };
 542 #else
 543     return { vec_cpsgn(y.simdInternal_, x.simdInternal_) };
 544 #endif
 545 }
 546
 547 } // namespace gmx
 548
 549 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H