src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36
  37 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
  38 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
  39
  40 #include "config.h"
  41
  42 #include "gromacs/math/utilities.h"
  43 #include "gromacs/utility/basedefinitions.h"
  44
  45 #include "impl_ibm_vsx_definitions.h"
  46
  47 namespace gmx
  48 {
  49
  50 class SimdDouble
  51 {
  52 public:
  53     SimdDouble() {}
  54
  55     // gcc-4.9 does not recognize that we use the parameter
  56     SimdDouble(double gmx_unused d) : simdInternal_(vec_splats(d)) {}
  57
  58     // Internal utility constructor to simplify return statements
  59     SimdDouble(__vector double simd) : simdInternal_(simd) {}
  60
  61     __vector double simdInternal_;
  62 };
  63
  64 class SimdDInt32
  65 {
  66 public:
  67     SimdDInt32() {}
  68
  69     // gcc-4.9 does not recognize that we use the parameter
  70     SimdDInt32(std::int32_t gmx_unused i) : simdInternal_(vec_splats(i)) {}
  71
  72     // Internal utility constructor to simplify return statements
  73     SimdDInt32(__vector signed int simd) : simdInternal_(simd) {}
  74
  75     __vector signed int simdInternal_;
  76 };
  77
  78 class SimdDBool
  79 {
  80 public:
  81     SimdDBool() {}
  82
  83     SimdDBool(bool b) :
  84         simdInternal_(reinterpret_cast<__vector vsxBool long long>(vec_splats(b ? 0xFFFFFFFFFFFFFFFFULL : 0)))
  85     {
  86     }
  87
  88     // Internal utility constructor to simplify return statements
  89     SimdDBool(__vector vsxBool long long simd) : simdInternal_(simd) {}
  90
  91     __vector vsxBool long long simdInternal_;
  92 };
  93
  94 class SimdDIBool
  95 {
  96 public:
  97     SimdDIBool() {}
  98
  99     SimdDIBool(bool b) :
 100         simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats(b ? 0xFFFFFFFF : 0)))
 101     {
 102     }
 103
 104     // Internal utility constructor to simplify return statements
 105     SimdDIBool(__vector vsxBool int simd) : simdInternal_(simd) {}
 106
 107     __vector vsxBool int simdInternal_;
 108 };
 109
 110 // Note that the interfaces we use here have been a mess in xlc;
 111 // currently version 13.1.5 is required.
 112
 113 static inline SimdDouble gmx_simdcall simdLoad(const double* m, SimdDoubleTag = {})
 114 {
 115     return
 116     {
 117 #if defined(__ibmxl__)
 118         vec_ld(0, m)
 119 #else
 120 #    if __GNUC__ < 7
 121         *reinterpret_cast<const __vector double*>(m)
 122 #    else
 123         vec_vsx_ld(0, m)
 124 #    endif
 125 #endif
 126     };
 127 }
 128
 129 static inline void gmx_simdcall store(double* m, SimdDouble a)
 130 {
 131 #if defined(__ibmxl__)
 132     vec_st(a.simdInternal_, 0, m);
 133 #else
 134 #    if __GNUC__ < 7
 135     *reinterpret_cast<__vector double*>(m) = a.simdInternal_;
 136 #    else
 137     vec_vsx_st(a.simdInternal_, 0, m);
 138 #    endif
 139 #endif
 140 }
 141
 142 static inline SimdDouble gmx_simdcall simdLoadU(const double* m, SimdDoubleTag = {})
 143 {
 144     return
 145     {
 146 #if defined(__ibmxl__)
 147         vec_xl(0, m)
 148 #else
 149 #    if __GNUC__ < 7
 150         *reinterpret_cast<const __vector double*>(m)
 151 #    else
 152         vec_vsx_ld(0, m)
 153 #    endif
 154 #endif
 155     };
 156 }
 157
 158 static inline void gmx_simdcall storeU(double* m, SimdDouble a)
 159 {
 160 #if defined(__ibmxl__)
 161     vec_xst(a.simdInternal_, 0, m);
 162 #else
 163 #    if __GNUC__ < 7
 164     *reinterpret_cast<__vector double*>(m) = a.simdInternal_;
 165 #    else
 166     vec_vsx_st(a.simdInternal_, 0, m);
 167 #    endif
 168 #endif
 169 }
 170
 171 static inline SimdDouble gmx_simdcall setZeroD()
 172 {
 173     return { vec_splats(0.0) };
 174 }
 175
 176 static inline SimdDInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdDInt32Tag)
 177 {
 178     __vector signed int          t0, t1;
 179     const __vector unsigned char perm = { 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19, 16, 17, 18, 19 };
 180     t0                                = vec_splats(m[0]);
 181     t1                                = vec_splats(m[1]);
 182     return { vec_perm(t0, t1, perm) };
 183 }
 184
 185 // gcc-4.9 does not understand that arguments to vec_extract() are used
 186 static inline void gmx_simdcall store(std::int32_t* m, SimdDInt32 gmx_unused x)
 187 {
 188     m[0] = vec_extract(x.simdInternal_, 0);
 189     m[1] = vec_extract(x.simdInternal_, 2);
 190 }
 191
 192 static inline SimdDInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdDInt32Tag)
 193 {
 194     return simdLoad(m, SimdDInt32Tag());
 195 }
 196
 197 static inline void gmx_simdcall storeU(std::int32_t* m, SimdDInt32 a)
 198 {
 199     return store(m, a);
 200 }
 201
 202 static inline SimdDInt32 gmx_simdcall setZeroDI()
 203 {
 204     return { vec_splats(static_cast<int>(0)) };
 205 }
 206
 207 // gcc-4.9 does not detect that vec_extract() uses its argument
 208 template<int index>
 209 static inline std::int32_t gmx_simdcall extract(SimdDInt32 gmx_unused a)
 210 {
 211     return vec_extract(a.simdInternal_, 2 * index);
 212 }
 213
 214 static inline SimdDouble gmx_simdcall operator&(SimdDouble a, SimdDouble b)
 215 {
 216     return { vec_and(a.simdInternal_, b.simdInternal_) };
 217 }
 218
 219 static inline SimdDouble gmx_simdcall andNot(SimdDouble a, SimdDouble b)
 220 {
 221     return { vec_andc(b.simdInternal_, a.simdInternal_) };
 222 }
 223
 224 static inline SimdDouble gmx_simdcall operator|(SimdDouble a, SimdDouble b)
 225 {
 226     return { vec_or(a.simdInternal_, b.simdInternal_) };
 227 }
 228
 229 static inline SimdDouble gmx_simdcall operator^(SimdDouble a, SimdDouble b)
 230 {
 231     return { vec_xor(a.simdInternal_, b.simdInternal_) };
 232 }
 233
 234 static inline SimdDouble gmx_simdcall operator+(SimdDouble a, SimdDouble b)
 235 {
 236     return { vec_add(a.simdInternal_, b.simdInternal_) };
 237 }
 238
 239 static inline SimdDouble gmx_simdcall operator-(SimdDouble a, SimdDouble b)
 240 {
 241     return { vec_sub(a.simdInternal_, b.simdInternal_) };
 242 }
 243
 244 static inline SimdDouble gmx_simdcall operator-(SimdDouble x)
 245 {
 246     return { -x.simdInternal_ };
 247 }
 248
 249 static inline SimdDouble gmx_simdcall operator*(SimdDouble a, SimdDouble b)
 250 {
 251     return { vec_mul(a.simdInternal_, b.simdInternal_) };
 252 }
 253
 254 static inline SimdDouble gmx_simdcall fma(SimdDouble a, SimdDouble b, SimdDouble c)
 255 {
 256     return { vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 257 }
 258
 259 static inline SimdDouble gmx_simdcall fms(SimdDouble a, SimdDouble b, SimdDouble c)
 260 {
 261     return { vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 262 }
 263
 264 static inline SimdDouble gmx_simdcall fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 265 {
 266     return { vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 267 }
 268
 269 static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 270 {
 271     return { vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 272 }
 273
 274 static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
 275 {
 276     return { vec_rsqrte(x.simdInternal_) };
 277 }
 278
 279 static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
 280 {
 281     return { vec_re(x.simdInternal_) };
 282 }
 283
 284 static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 285 {
 286     return { vec_add(a.simdInternal_,
 287                      vec_and(b.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_))) };
 288 }
 289
 290 static inline SimdDouble gmx_simdcall maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 291 {
 292     SimdDouble prod = a * b;
 293
 294     return { vec_and(prod.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
 295 }
 296
 297 static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 298 {
 299     SimdDouble prod = fma(a, b, c);
 300
 301     return { vec_and(prod.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
 302 }
 303
 304 static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
 305 {
 306 #ifndef NDEBUG
 307     x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
 308 #endif
 309     return { vec_and(vec_rsqrte(x.simdInternal_), reinterpret_cast<__vector double>(m.simdInternal_)) };
 310 }
 311
 312 static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
 313 {
 314 #ifndef NDEBUG
 315     x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
 316 #endif
 317     return { vec_and(vec_re(x.simdInternal_), reinterpret_cast<__vector double>(m.simdInternal_)) };
 318 }
 319
 320 static inline SimdDouble gmx_simdcall abs(SimdDouble x)
 321 {
 322     return { vec_abs(x.simdInternal_) };
 323 }
 324
 325 static inline SimdDouble gmx_simdcall max(SimdDouble a, SimdDouble b)
 326 {
 327     return { vec_max(a.simdInternal_, b.simdInternal_) };
 328 }
 329
 330 static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b)
 331 {
 332     return { vec_min(a.simdInternal_, b.simdInternal_) };
 333 }
 334
 335 static inline SimdDouble gmx_simdcall round(SimdDouble x)
 336 {
 337 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 338     // gcc up to at least version 4.9 does not have vec_round() in double precision - use inline asm
 339     __vector double res;
 340     __asm__("xvrdpi %x0,%x1" : "=wd"(res) : "wd"(x.simdInternal_));
 341     return { res };
 342 #else
 343     return { vec_round(x.simdInternal_) };
 344 #endif
 345 }
 346
 347 static inline SimdDouble gmx_simdcall trunc(SimdDouble x)
 348 {
 349     return { vec_trunc(x.simdInternal_) };
 350 }
 351
 352 template<MathOptimization opt = MathOptimization::Safe>
 353 static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent)
 354 {
 355     const __vector double exponentMask =
 356             reinterpret_cast<__vector double>(vec_splats(0x7FF0000000000000ULL));
 357     const __vector signed int exponentBias = vec_splats(1022);
 358     const __vector double     half         = vec_splats(0.5);
 359     __vector signed int       iExponent;
 360
 361     __vector vsxBool long long valueIsZero =
 362             vec_cmpeq(value.simdInternal_, reinterpret_cast<__vector double>(vec_splats(0.0)));
 363
 364     iExponent = reinterpret_cast<__vector signed int>(vec_and(value.simdInternal_, exponentMask));
 365     // The data is in the upper half of each double (corresponding to elements 1 and 3).
 366     // First shift 52-32=20bits, and then permute to swap element 0 with 1 and element 2 with 3
 367     // For big endian they are in opposite order, so then we simply skip the swap.
 368     iExponent = vec_sr(iExponent, vec_splats(20U));
 369 #ifndef __BIG_ENDIAN__
 370     const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
 371     iExponent                         = vec_perm(iExponent, iExponent, perm);
 372 #endif
 373     iExponent = vec_sub(iExponent, exponentBias);
 374     iExponent = vec_andc(iExponent, reinterpret_cast<__vector int>(valueIsZero));
 375
 376     __vector double result = vec_or(vec_andc(value.simdInternal_, exponentMask), half);
 377     result                 = vec_sel(result, value.simdInternal_, valueIsZero);
 378
 379     exponent->simdInternal_ = iExponent;
 380
 381     return { result };
 382 }
 383
 384 template<MathOptimization opt = MathOptimization::Safe>
 385 static inline SimdDouble ldexp(SimdDouble value, SimdDInt32 exponent)
 386 {
 387     const __vector signed int exponentBias = vec_splats(1023);
 388     __vector signed int       iExponent;
 389 #ifdef __BIG_ENDIAN__
 390     const __vector unsigned char perm = { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 16, 17, 18, 19 };
 391 #else
 392     const __vector unsigned char perm = { 16, 17, 18, 19, 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11 };
 393 #endif
 394
 395     iExponent = vec_add(exponent.simdInternal_, exponentBias);
 396
 397     if (opt == MathOptimization::Safe)
 398     {
 399         // Make sure biased argument is not negative
 400         iExponent = vec_max(iExponent, vec_splat_s32(0));
 401     }
 402
 403     // exponent is now present in pairs of integers; 0011.
 404     // Elements 0/2 already correspond to the upper half of each double,
 405     // so we only need to shift by another 52-32=20 bits.
 406     // The remaining elements are set to zero.
 407     iExponent = vec_sl(iExponent, vec_splats(20U));
 408     iExponent = vec_perm(iExponent, vec_splats(0), perm);
 409
 410     return { vec_mul(value.simdInternal_, reinterpret_cast<__vector double>(iExponent)) };
 411 }
 412
 413 static inline double gmx_simdcall reduce(SimdDouble x)
 414 {
 415     const __vector unsigned char perm = { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
 416 #ifdef __xlC__
 417     /* old xlc version 12 does not understand vec_perm() with double arguments */
 418     x.simdInternal_ = vec_add(
 419             x.simdInternal_, reinterpret_cast<__vector double>(vec_perm(
 420                                      reinterpret_cast<__vector signed int>(x.simdInternal_),
 421                                      reinterpret_cast<__vector signed int>(x.simdInternal_), perm)));
 422 #else
 423     x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm));
 424 #endif
 425     return vec_extract(x.simdInternal_, 0);
 426 }
 427
 428 static inline SimdDBool gmx_simdcall operator==(SimdDouble a, SimdDouble b)
 429 {
 430     return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
 431 }
 432
 433 static inline SimdDBool gmx_simdcall operator!=(SimdDouble a, SimdDouble b)
 434 {
 435     return { reinterpret_cast<__vector vsxBool long long>(vec_or(
 436             reinterpret_cast<__vector signed int>(vec_cmpgt(a.simdInternal_, b.simdInternal_)),
 437             reinterpret_cast<__vector signed int>(vec_cmplt(a.simdInternal_, b.simdInternal_)))) };
 438 }
 439
 440 static inline SimdDBool gmx_simdcall operator<(SimdDouble a, SimdDouble b)
 441 {
 442     return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
 443 }
 444
 445 static inline SimdDBool gmx_simdcall operator<=(SimdDouble a, SimdDouble b)
 446 {
 447     return { vec_cmple(a.simdInternal_, b.simdInternal_) };
 448 }
 449
 450 static inline SimdDBool gmx_simdcall testBits(SimdDouble a)
 451 {
 452 #ifdef __POWER8_VECTOR__
 453     // Power8 VSX has proper support for operations on long long integers
 454     return { vec_cmpgt(reinterpret_cast<__vector unsigned long long>(a.simdInternal_), vec_splats(0ULL)) };
 455 #else
 456     // No support for long long operations.
 457     // Start with comparing 32-bit subfields bitwise by casting to integers
 458     __vector vsxBool int tmp =
 459             vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U));
 460
 461     // Shuffle low/high 32-bit fields of tmp into tmp2
 462     const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
 463     __vector vsxBool int tmp2 = vec_perm(tmp, tmp, perm);
 464
 465     // Return the or:d parts of tmp & tmp2
 466     return { reinterpret_cast<__vector vsxBool long long>(vec_or(tmp, tmp2)) };
 467 #endif
 468 }
 469
 470 static inline SimdDBool gmx_simdcall operator&&(SimdDBool a, SimdDBool b)
 471 {
 472     return { reinterpret_cast<__vector vsxBool long long>(
 473             vec_and(reinterpret_cast<__vector signed int>(a.simdInternal_),
 474                     reinterpret_cast<__vector signed int>(b.simdInternal_))) };
 475 }
 476
 477 static inline SimdDBool gmx_simdcall operator||(SimdDBool a, SimdDBool b)
 478 {
 479     return { reinterpret_cast<__vector vsxBool long long>(
 480             vec_or(reinterpret_cast<__vector signed int>(a.simdInternal_),
 481                    reinterpret_cast<__vector signed int>(b.simdInternal_))) };
 482 }
 483
 484 static inline bool gmx_simdcall anyTrue(SimdDBool a)
 485 {
 486     return vec_any_ne(reinterpret_cast<__vector vsxBool int>(a.simdInternal_),
 487                       reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 488 }
 489
 490 static inline SimdDouble gmx_simdcall selectByMask(SimdDouble a, SimdDBool m)
 491 {
 492     return { vec_and(a.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
 493 }
 494
 495 static inline SimdDouble gmx_simdcall selectByNotMask(SimdDouble a, SimdDBool m)
 496 {
 497     return { vec_andc(a.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
 498 }
 499
 500 static inline SimdDouble gmx_simdcall blend(SimdDouble a, SimdDouble b, SimdDBool sel)
 501 {
 502     return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
 503 }
 504
 505 static inline SimdDInt32 gmx_simdcall operator&(SimdDInt32 a, SimdDInt32 b)
 506 {
 507     return { vec_and(a.simdInternal_, b.simdInternal_) };
 508 }
 509
 510 static inline SimdDInt32 gmx_simdcall andNot(SimdDInt32 a, SimdDInt32 b)
 511 {
 512     return { vec_andc(b.simdInternal_, a.simdInternal_) };
 513 }
 514
 515 static inline SimdDInt32 gmx_simdcall operator|(SimdDInt32 a, SimdDInt32 b)
 516 {
 517     return { vec_or(a.simdInternal_, b.simdInternal_) };
 518 }
 519
 520 static inline SimdDInt32 gmx_simdcall operator^(SimdDInt32 a, SimdDInt32 b)
 521 {
 522     return { vec_xor(a.simdInternal_, b.simdInternal_) };
 523 }
 524
 525 static inline SimdDInt32 gmx_simdcall operator+(SimdDInt32 a, SimdDInt32 b)
 526 {
 527     return { vec_add(a.simdInternal_, b.simdInternal_) };
 528 }
 529
 530 static inline SimdDInt32 gmx_simdcall operator-(SimdDInt32 a, SimdDInt32 b)
 531 {
 532     return { vec_sub(a.simdInternal_, b.simdInternal_) };
 533 }
 534
 535 static inline SimdDInt32 gmx_simdcall operator*(SimdDInt32 a, SimdDInt32 b)
 536 {
 537     return { a.simdInternal_ * b.simdInternal_ };
 538 }
 539
 540 static inline SimdDIBool gmx_simdcall operator==(SimdDInt32 a, SimdDInt32 b)
 541 {
 542     return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
 543 }
 544
 545 static inline SimdDIBool gmx_simdcall testBits(SimdDInt32 a)
 546 {
 547     return { vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U)) };
 548 }
 549
 550 static inline SimdDIBool gmx_simdcall operator<(SimdDInt32 a, SimdDInt32 b)
 551 {
 552     return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
 553 }
 554
 555 static inline SimdDIBool gmx_simdcall operator&&(SimdDIBool a, SimdDIBool b)
 556 {
 557     return { vec_and(a.simdInternal_, b.simdInternal_) };
 558 }
 559
 560 static inline SimdDIBool gmx_simdcall operator||(SimdDIBool a, SimdDIBool b)
 561 {
 562     return { vec_or(a.simdInternal_, b.simdInternal_) };
 563 }
 564
 565 static inline bool gmx_simdcall anyTrue(SimdDIBool a)
 566 {
 567     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 568 }
 569
 570 static inline SimdDInt32 gmx_simdcall selectByMask(SimdDInt32 a, SimdDIBool m)
 571 {
 572     return { vec_and(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
 573 }
 574
 575 static inline SimdDInt32 gmx_simdcall selectByNotMask(SimdDInt32 a, SimdDIBool m)
 576 {
 577     return { vec_andc(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
 578 }
 579
 580 static inline SimdDInt32 gmx_simdcall blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
 581 {
 582     return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
 583 }
 584
 585 static inline SimdDInt32 gmx_simdcall cvttR2I(SimdDouble a)
 586 {
 587 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 588     // gcc up to at least version 6.1 is missing intrinsics for converting double to/from int - use inline asm
 589     const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
 590     __vector double              ix;
 591
 592     __asm__("xvcvdpsxws %x0,%x1" : "=wa"(ix) : "wd"(a.simdInternal_));
 593
 594     return { reinterpret_cast<__vector signed int>(vec_perm(ix, ix, perm)) };
 595 #else
 596     return { vec_cts(a.simdInternal_, 0) };
 597 #endif
 598 }
 599
 600 static inline SimdDInt32 gmx_simdcall cvtR2I(SimdDouble a)
 601 {
 602     return cvttR2I(round(a));
 603 }
 604
 605 static inline SimdDouble gmx_simdcall cvtI2R(SimdDInt32 a)
 606 {
 607 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 608     // gcc up to at least version 4.9 is missing intrinsics for converting double to/from int - use inline asm
 609     __vector double x;
 610 #    ifndef __BIG_ENDIAN__
 611     const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
 612     a.simdInternal_                   = vec_perm(a.simdInternal_, a.simdInternal_, perm);
 613 #    endif
 614
 615     __asm__("xvcvsxwdp %x0,%x1" : "=wd"(x) : "wa"(a.simdInternal_));
 616
 617     return { x };
 618 #else
 619     return { vec_ctd(a.simdInternal_, 0) };
 620 #endif
 621 }
 622
 623 static inline SimdDIBool gmx_simdcall cvtB2IB(SimdDBool a)
 624 {
 625     return { reinterpret_cast<__vector vsxBool int>(a.simdInternal_) };
 626 }
 627
 628 static inline SimdDBool gmx_simdcall cvtIB2B(SimdDIBool a)
 629 {
 630     return { reinterpret_cast<__vector vsxBool long long>(a.simdInternal_) };
 631 }
 632
 633 static inline void gmx_simdcall cvtF2DD(SimdFloat f, SimdDouble* d0, SimdDouble* d1)
 634 {
 635     __vector float fA, fB;
 636     fA = vec_mergeh(f.simdInternal_, f.simdInternal_); /* 0011 */
 637     fB = vec_mergel(f.simdInternal_, f.simdInternal_); /* 2233 */
 638 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 639     // gcc-4.9 is missing double-to-float/float-to-double conversions.
 640     __asm__("xvcvspdp %x0,%x1" : "=wd"(d0->simdInternal_) : "wf"(fA));
 641     __asm__("xvcvspdp %x0,%x1" : "=wd"(d1->simdInternal_) : "wf"(fB));
 642 #else
 643     d0->simdInternal_ = vec_cvf(fA); /* 01 */
 644     d1->simdInternal_ = vec_cvf(fB); /* 23 */
 645 #endif
 646 }
 647
 648 static inline SimdFloat gmx_simdcall cvtDD2F(SimdDouble d0, SimdDouble d1)
 649 {
 650     __vector float fA, fB, fC, fD, fE;
 651 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 652     // gcc-4.9 is missing double-to-float/float-to-double conversions.
 653     __asm__("xvcvdpsp %x0,%x1" : "=wf"(fA) : "wd"(d0.simdInternal_));
 654     __asm__("xvcvdpsp %x0,%x1" : "=wf"(fB) : "wd"(d1.simdInternal_));
 655 #else
 656     fA = vec_cvf(d0.simdInternal_);  /* 0x1x */
 657     fB = vec_cvf(d1.simdInternal_);  /* 2x3x */
 658 #endif
 659     fC = vec_mergeh(fA, fB); /* 02xx */
 660     fD = vec_mergel(fA, fB); /* 13xx */
 661     fE = vec_mergeh(fC, fD); /* 0123 */
 662     return { fE };
 663 }
 664
 665 static inline SimdDouble gmx_simdcall copysign(SimdDouble x, SimdDouble y)
 666 {
 667 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 668     __vector double res;
 669     __asm__("xvcpsgndp %x0,%x1,%x2" : "=wd"(res) : "wd"(y.simdInternal_), "wd"(x.simdInternal_));
 670     return { res };
 671 #else
 672     return { vec_cpsgn(y.simdInternal_, x.simdInternal_) };
 673 #endif
 674 }
 675
 676 } // namespace gmx
 677
 678 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H