src/gromacs/simd/impl_ibm_vmx/impl_ibm_vmx_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VMX_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_VMX_SIMD_FLOAT_H
  38
  39 #include "config.h"
  40
  41 #include <cstdint>
  42
  43 #include "gromacs/math/utilities.h"
  44 #include "gromacs/utility/basedefinitions.h"
  45
  46 #include "impl_ibm_vmx_definitions.h"
  47
  48 namespace gmx
  49 {
  50
  51 class SimdFloat
  52 {
  53     public:
  54         SimdFloat() {}
  55
  56         SimdFloat(float f)
  57         {
  58             __vector unsigned char perm;
  59
  60             simdInternal_ = vec_lde(0, const_cast<float *>(&f));
  61             perm          = vec_lvsl(0, const_cast<float *>(&f));
  62             simdInternal_ = vec_perm(simdInternal_, simdInternal_, perm);
  63             simdInternal_ = vec_splat(simdInternal_, 0);
  64         }
  65
  66         // Internal utility constructor to simplify return statements
  67         SimdFloat(__vector float simd) : simdInternal_(simd) {}
  68
  69         __vector float  simdInternal_;
  70 };
  71
  72 class SimdFInt32
  73 {
  74     public:
  75         SimdFInt32() {}
  76
  77         SimdFInt32(std::int32_t i)
  78         {
  79             __vector unsigned char perm;
  80
  81             simdInternal_ = vec_lde(0, const_cast<int *>(&i));
  82             perm          = vec_lvsl(0, const_cast<int *>(&i));
  83             simdInternal_ = vec_perm(simdInternal_, simdInternal_, perm);
  84             simdInternal_ = vec_splat(simdInternal_, 0);
  85         }
  86
  87
  88         // Internal utility constructor to simplify return statements
  89         SimdFInt32(__vector signed int simd) : simdInternal_(simd) {}
  90
  91         __vector signed int  simdInternal_;
  92 };
  93
  94 class SimdFBool
  95 {
  96     public:
  97         SimdFBool() {}
  98
  99         SimdFBool(bool b) : simdInternal_( reinterpret_cast<__vector vmxBool int>(vec_splat_u32( b ? 0xFFFFFFFF : 0))) {}
 100
 101         // Internal utility constructor to simplify return statements
 102         SimdFBool(__vector vmxBool int simd) : simdInternal_(simd) {}
 103
 104         __vector vmxBool int  simdInternal_;
 105 };
 106
 107 class SimdFIBool
 108 {
 109     public:
 110         SimdFIBool() {}
 111
 112         SimdFIBool(bool b) : simdInternal_( reinterpret_cast<__vector vmxBool int>(vec_splat_u32( b ? 0xFFFFFFFF : 0))) {}
 113
 114         // Internal utility constructor to simplify return statements
 115         SimdFIBool(__vector vmxBool int simd) : simdInternal_(simd) {}
 116
 117         __vector vmxBool int  simdInternal_;
 118 };
 119
 120 static inline SimdFloat gmx_simdcall
 121 simdLoad(const float *m, SimdFloatTag = {})
 122 {
 123     return {
 124                vec_ld(0, const_cast<float *>(m))
 125     };
 126 }
 127
 128 static inline void gmx_simdcall
 129 store(float *m, SimdFloat a)
 130 {
 131     vec_st(a.simdInternal_, 0, const_cast<float *>(m));
 132 }
 133
 134 static inline SimdFloat gmx_simdcall
 135 setZeroF()
 136 {
 137     return {
 138                reinterpret_cast<__vector float>(vec_splat_u32(0))
 139     };
 140 }
 141
 142 static inline SimdFInt32 gmx_simdcall
 143 simdLoad(const std::int32_t * m, SimdFInt32Tag)
 144 {
 145     return {
 146                vec_ld(0, const_cast<int *>(m))
 147     };
 148 }
 149
 150 static inline void gmx_simdcall
 151 store(std::int32_t * m, SimdFInt32 a)
 152 {
 153     vec_st(a.simdInternal_, 0, const_cast<int *>(m));
 154 }
 155
 156 static inline SimdFInt32 gmx_simdcall
 157 setZeroFI()
 158 {
 159     return {
 160                vec_splat_s32(0)
 161     };
 162 }
 163
 164 static inline SimdFloat gmx_simdcall
 165 operator&(SimdFloat a, SimdFloat b)
 166 {
 167     return {
 168                vec_and(a.simdInternal_, b.simdInternal_)
 169     };
 170 }
 171
 172 static inline SimdFloat gmx_simdcall
 173 andNot(SimdFloat a, SimdFloat b)
 174 {
 175     return {
 176                vec_andc(b.simdInternal_, a.simdInternal_)
 177     };
 178 }
 179
 180 static inline SimdFloat gmx_simdcall
 181 operator|(SimdFloat a, SimdFloat b)
 182 {
 183     return {
 184                vec_or(a.simdInternal_, b.simdInternal_)
 185     };
 186 }
 187
 188 static inline SimdFloat gmx_simdcall
 189 operator^(SimdFloat a, SimdFloat b)
 190 {
 191     return {
 192                vec_xor(a.simdInternal_, b.simdInternal_)
 193     };
 194 }
 195
 196 static inline SimdFloat gmx_simdcall
 197 operator+(SimdFloat a, SimdFloat b)
 198 {
 199     return {
 200                vec_add(a.simdInternal_, b.simdInternal_)
 201     };
 202 }
 203
 204 static inline SimdFloat gmx_simdcall
 205 operator-(SimdFloat a, SimdFloat b)
 206 {
 207     return {
 208                vec_sub(a.simdInternal_, b.simdInternal_)
 209     };
 210 }
 211
 212 static inline SimdFloat gmx_simdcall
 213 operator-(SimdFloat x)
 214 {
 215     return {
 216                vec_xor(x.simdInternal_, reinterpret_cast<__vector float>(vec_sl(vec_splat_u32(-1), vec_splat_u32(-1))))
 217     };
 218 }
 219
 220 static inline SimdFloat gmx_simdcall
 221 operator*(SimdFloat a, SimdFloat b)
 222 {
 223     return {
 224                vec_madd(a.simdInternal_, b.simdInternal_, reinterpret_cast<__vector float>(vec_splat_u32(0)) )
 225     };
 226 }
 227
 228 static inline SimdFloat gmx_simdcall
 229 fma(SimdFloat a, SimdFloat b, SimdFloat c)
 230 {
 231     return {
 232                vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 233     };
 234 }
 235
 236 static inline SimdFloat gmx_simdcall
 237 fms(SimdFloat a, SimdFloat b, SimdFloat c)
 238 {
 239     return {
 240                vec_madd(a.simdInternal_, b.simdInternal_, -c.simdInternal_)
 241     };
 242 }
 243
 244 static inline SimdFloat gmx_simdcall
 245 fnma(SimdFloat a, SimdFloat b, SimdFloat c)
 246 {
 247     return {
 248                vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 249     };
 250 }
 251
 252 static inline SimdFloat gmx_simdcall
 253 fnms(SimdFloat a, SimdFloat b, SimdFloat c)
 254 {
 255     return {
 256                -vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 257     };
 258 }
 259
 260 static inline SimdFloat gmx_simdcall
 261 rsqrt(SimdFloat x)
 262 {
 263     return {
 264                vec_rsqrte(x.simdInternal_)
 265     };
 266 }
 267
 268 static inline SimdFloat gmx_simdcall
 269 rcp(SimdFloat x)
 270 {
 271     return {
 272                vec_re(x.simdInternal_)
 273     };
 274 }
 275
 276 static inline SimdFloat gmx_simdcall
 277 maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
 278 {
 279     return {
 280                vec_add(a.simdInternal_, vec_and(b.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)))
 281     };
 282 }
 283
 284 static inline SimdFloat gmx_simdcall
 285 maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
 286 {
 287     SimdFloat prod = a * b;
 288
 289     return {
 290                vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))
 291     };
 292 }
 293
 294 static inline SimdFloat gmx_simdcall
 295 maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
 296 {
 297     SimdFloat prod = fma(a, b, c);
 298
 299     return {
 300                vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))
 301     };
 302 }
 303
 304 static inline SimdFloat gmx_simdcall
 305 maskzRsqrt(SimdFloat x, SimdFBool m)
 306 {
 307 #ifndef NDEBUG
 308     SimdFloat one(1.0F);
 309     x.simdInternal_ = vec_sel(one.simdInternal_, x.simdInternal_, m.simdInternal_);
 310 #endif
 311     return {
 312                vec_and(vec_rsqrte(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_))
 313     };
 314 }
 315
 316 static inline SimdFloat gmx_simdcall
 317 maskzRcp(SimdFloat x, SimdFBool m)
 318 {
 319 #ifndef NDEBUG
 320     SimdFloat one(1.0F);
 321     x.simdInternal_ = vec_sel(one.simdInternal_, x.simdInternal_, m.simdInternal_);
 322 #endif
 323     return {
 324                vec_and(vec_re(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_))
 325     };
 326 }
 327
 328 static inline SimdFloat gmx_simdcall
 329 abs(SimdFloat x)
 330 {
 331     return {
 332                vec_abs( x.simdInternal_ )
 333     };
 334 }
 335
 336 static inline SimdFloat gmx_simdcall
 337 max(SimdFloat a, SimdFloat b)
 338 {
 339     return {
 340                vec_max(a.simdInternal_, b.simdInternal_)
 341     };
 342 }
 343
 344 static inline SimdFloat gmx_simdcall
 345 min(SimdFloat a, SimdFloat b)
 346 {
 347     return {
 348                vec_min(a.simdInternal_, b.simdInternal_)
 349     };
 350 }
 351
 352 static inline SimdFloat gmx_simdcall
 353 round(SimdFloat x)
 354 {
 355     return {
 356                vec_round( x.simdInternal_ )
 357     };
 358 }
 359
 360 static inline SimdFloat gmx_simdcall
 361 trunc(SimdFloat x)
 362 {
 363     return {
 364                vec_trunc( x.simdInternal_ )
 365     };
 366 }
 367
 368 static inline SimdFloat gmx_simdcall
 369 frexp(SimdFloat value, SimdFInt32 * exponent)
 370 {
 371     // Generate constants without memory operations
 372     const __vector signed int exponentMask = vec_sl(vec_add(vec_splat_s32(15), vec_sl(vec_splat_s32(15), vec_splat_u32(4))),
 373                                                     vec_add(vec_splat_u32(15), vec_splat_u32(8)));                  // 0x7F800000
 374     const __vector signed int exponentBias = vec_sub(vec_sl(vec_splat_s32(1), vec_splat_u32(7)), vec_splat_s32(2)); // 126
 375     const SimdFloat           half(0.5F);
 376     __vector signed int       iExponent;
 377
 378     iExponent               = vec_and(reinterpret_cast<__vector signed int>(value.simdInternal_), exponentMask);
 379     iExponent               = vec_sr(iExponent, vec_add(vec_splat_u32(15), vec_splat_u32(8)));
 380     iExponent               = vec_sub(iExponent, exponentBias);
 381     exponent->simdInternal_ = iExponent;
 382
 383     return {
 384                vec_or( vec_andc(value.simdInternal_, reinterpret_cast<__vector float>(exponentMask)), half.simdInternal_)
 385     };
 386 }
 387
 388 template <MathOptimization opt = MathOptimization::Safe>
 389 static inline SimdFloat gmx_simdcall
 390 ldexp(SimdFloat value, SimdFInt32 exponent)
 391 {
 392     const __vector signed int exponentBias = vec_sub(vec_sl(vec_splat_s32(1), vec_splat_u32(7)), vec_splat_s32(1)); // 127
 393     __vector signed int       iExponent;
 394
 395     iExponent  = vec_add(exponent.simdInternal_, exponentBias);
 396
 397     if (opt == MathOptimization::Safe)
 398     {
 399         // Make sure biased argument is not negative
 400         iExponent = vec_max(iExponent, vec_splat_s32(0));
 401     }
 402
 403     iExponent  = vec_sl(iExponent, vec_add(vec_splat_u32(15), vec_splat_u32(8)));
 404
 405     return {
 406                vec_madd(value.simdInternal_, reinterpret_cast<__vector float>(iExponent), reinterpret_cast<__vector float>(vec_splat_u32(0)))
 407     };
 408 }
 409
 410 static inline float gmx_simdcall
 411 reduce(SimdFloat a)
 412 {
 413     __vector float c = a.simdInternal_;
 414     float          res;
 415
 416     // calculate sum
 417     c = vec_add(c, vec_sld(c, c, 8));
 418     c = vec_add(c, vec_sld(c, c, 4));
 419     vec_ste(c, 0, &res);
 420     return res;
 421 }
 422
 423 static inline SimdFBool gmx_simdcall
 424 operator==(SimdFloat a, SimdFloat b)
 425 {
 426     return {
 427                vec_cmpeq(a.simdInternal_, b.simdInternal_)
 428     };
 429 }
 430
 431 static inline SimdFBool gmx_simdcall
 432 operator!=(SimdFloat a, SimdFloat b)
 433 {
 434     return {
 435                vec_or(vec_cmpgt(a.simdInternal_, b.simdInternal_),
 436                       vec_cmplt(a.simdInternal_, b.simdInternal_))
 437     };
 438 }
 439
 440 static inline SimdFBool gmx_simdcall
 441 operator<(SimdFloat a, SimdFloat b)
 442 {
 443     return {
 444                vec_cmplt(a.simdInternal_, b.simdInternal_)
 445     };
 446 }
 447
 448 static inline SimdFBool gmx_simdcall
 449 operator<=(SimdFloat a, SimdFloat b)
 450 {
 451     return {
 452                vec_cmple(a.simdInternal_, b.simdInternal_)
 453     };
 454 }
 455
 456 static inline SimdFBool gmx_simdcall
 457 testBits(SimdFloat a)
 458 {
 459     return {
 460                vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splat_u32(0))
 461     };
 462 }
 463
 464 static inline SimdFBool gmx_simdcall
 465 operator&&(SimdFBool a, SimdFBool b)
 466 {
 467     return {
 468                vec_and(a.simdInternal_, b.simdInternal_)
 469     };
 470 }
 471
 472 static inline SimdFBool gmx_simdcall
 473 operator||(SimdFBool a, SimdFBool b)
 474 {
 475     return {
 476                vec_or(a.simdInternal_, b.simdInternal_)
 477     };
 478 }
 479
 480 static inline bool gmx_simdcall
 481 anyTrue(SimdFBool a)
 482 {
 483     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vmxBool int>(vec_splat_u32(0)));
 484 }
 485
 486 static inline SimdFloat gmx_simdcall
 487 selectByMask(SimdFloat a, SimdFBool m)
 488 {
 489     return {
 490                vec_and(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))
 491     };
 492 }
 493
 494 static inline SimdFloat gmx_simdcall
 495 selectByNotMask(SimdFloat a, SimdFBool m)
 496 {
 497     return {
 498                vec_andc(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))
 499     };
 500 }
 501
 502 static inline SimdFloat gmx_simdcall
 503 blend(SimdFloat a, SimdFloat b, SimdFBool sel)
 504 {
 505     return {
 506                vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 507     };
 508 }
 509
 510 static inline SimdFInt32 gmx_simdcall
 511 operator&(SimdFInt32 a, SimdFInt32 b)
 512 {
 513     return {
 514                vec_and(a.simdInternal_, b.simdInternal_)
 515     };
 516 }
 517
 518 static inline SimdFInt32 gmx_simdcall
 519 andNot(SimdFInt32 a, SimdFInt32 b)
 520 {
 521     return {
 522                vec_andc(b.simdInternal_, a.simdInternal_)
 523     };
 524 }
 525
 526 static inline SimdFInt32 gmx_simdcall
 527 operator|(SimdFInt32 a, SimdFInt32 b)
 528 {
 529     return {
 530                vec_or(a.simdInternal_, b.simdInternal_)
 531     };
 532 }
 533
 534 static inline SimdFInt32 gmx_simdcall
 535 operator^(SimdFInt32 a, SimdFInt32 b)
 536 {
 537     return {
 538                vec_xor(a.simdInternal_, b.simdInternal_)
 539     };
 540 }
 541
 542 static inline SimdFInt32 gmx_simdcall
 543 operator+(SimdFInt32 a, SimdFInt32 b)
 544 {
 545     return {
 546                vec_add(a.simdInternal_, b.simdInternal_)
 547     };
 548 }
 549
 550 static inline SimdFInt32 gmx_simdcall
 551 operator-(SimdFInt32 a, SimdFInt32 b)
 552 {
 553     return {
 554                vec_sub(a.simdInternal_, b.simdInternal_)
 555     };
 556 }
 557
 558 static inline SimdFInt32 gmx_simdcall
 559 operator*(SimdFInt32 a, SimdFInt32 b)
 560 {
 561     return {
 562                a.simdInternal_ * b.simdInternal_
 563     };
 564 }
 565
 566 static inline SimdFIBool gmx_simdcall
 567 operator==(SimdFInt32 a, SimdFInt32 b)
 568 {
 569     return {
 570                vec_cmpeq(a.simdInternal_, b.simdInternal_)
 571     };
 572 }
 573
 574 static inline SimdFIBool gmx_simdcall
 575 testBits(SimdFInt32 a)
 576 {
 577     return {
 578                vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splat_u32(0))
 579     };
 580 }
 581
 582 static inline SimdFIBool gmx_simdcall
 583 operator<(SimdFInt32 a, SimdFInt32 b)
 584 {
 585     return {
 586                vec_cmplt(a.simdInternal_, b.simdInternal_)
 587     };
 588 }
 589
 590 static inline SimdFIBool gmx_simdcall
 591 operator&&(SimdFIBool a, SimdFIBool b)
 592 {
 593     return {
 594                vec_and(a.simdInternal_, b.simdInternal_)
 595     };
 596 }
 597
 598 static inline SimdFIBool gmx_simdcall
 599 operator||(SimdFIBool a, SimdFIBool b)
 600 {
 601     return {
 602                vec_or(a.simdInternal_, b.simdInternal_)
 603     };
 604 }
 605
 606 static inline bool gmx_simdcall
 607 anyTrue(SimdFIBool a)
 608 {
 609     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vmxBool int>(vec_splat_u32(0)));
 610 }
 611
 612 static inline SimdFInt32 gmx_simdcall
 613 selectByMask(SimdFInt32 a, SimdFIBool m)
 614 {
 615     return {
 616                vec_and(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_))
 617     };
 618 }
 619
 620 static inline SimdFInt32 gmx_simdcall
 621 selectByNotMask(SimdFInt32 a, SimdFIBool m)
 622 {
 623     return {
 624                vec_andc(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_))
 625     };
 626 }
 627
 628 static inline SimdFInt32 gmx_simdcall
 629 blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
 630 {
 631     return {
 632                vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 633     };
 634 }
 635
 636 static inline SimdFInt32 gmx_simdcall
 637 cvtR2I(SimdFloat a)
 638 {
 639     return {
 640                vec_cts(vec_round(a.simdInternal_), 0)
 641     };
 642 }
 643
 644 static inline SimdFInt32 gmx_simdcall
 645 cvttR2I(SimdFloat a)
 646 {
 647     return {
 648                vec_cts(a.simdInternal_, 0)
 649     };
 650 }
 651
 652 static inline SimdFloat gmx_simdcall
 653 cvtI2R(SimdFInt32 a)
 654 {
 655     return {
 656                vec_ctf(a.simdInternal_, 0)
 657     };
 658 }
 659
 660 static inline SimdFIBool gmx_simdcall
 661 cvtB2IB(SimdFBool a)
 662 {
 663     return {
 664                a.simdInternal_
 665     };
 666 }
 667
 668 static inline SimdFBool gmx_simdcall
 669 cvtIB2B(SimdFIBool a)
 670 {
 671     return {
 672                a.simdInternal_
 673     };
 674 }
 675
 676 }      // namespace gmx
 677
 678 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VMX_SIMD_FLOAT_H