src/gromacs/listed_forces/bonded.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /*! \internal \file
  39  *
  40  * \brief This file defines low-level functions necessary for
  41  * computing energies and forces for listed interactions.
  42  *
  43  * \author Mark Abraham <mark.j.abraham@gmail.com>
  44  *
  45  * \ingroup module_listed_forces
  46  */
  47 #include "gmxpre.h"
  48
  49 #include "bonded.h"
  50
  51 #include "config.h"
  52
  53 #include <cassert>
  54 #include <cmath>
  55
  56 #include <algorithm>
  57
  58 #include "gromacs/gmxlib/nrnb.h"
  59 #include "gromacs/listed_forces/disre.h"
  60 #include "gromacs/listed_forces/orires.h"
  61 #include "gromacs/math/functions.h"
  62 #include "gromacs/math/units.h"
  63 #include "gromacs/math/utilities.h"
  64 #include "gromacs/math/vec.h"
  65 #include "gromacs/mdtypes/fcdata.h"
  66 #include "gromacs/mdtypes/mdatom.h"
  67 #include "gromacs/pbcutil/ishift.h"
  68 #include "gromacs/pbcutil/mshift.h"
  69 #include "gromacs/pbcutil/pbc.h"
  70 #include "gromacs/pbcutil/pbc_simd.h"
  71 #include "gromacs/simd/simd.h"
  72 #include "gromacs/simd/simd_math.h"
  73 #include "gromacs/simd/vector_operations.h"
  74 #include "gromacs/utility/basedefinitions.h"
  75 #include "gromacs/utility/enumerationhelpers.h"
  76 #include "gromacs/utility/fatalerror.h"
  77 #include "gromacs/utility/real.h"
  78 #include "gromacs/utility/smalloc.h"
  79
  80 #include "listed_internal.h"
  81 #include "restcbt.h"
  82
  83 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  84
  85 namespace
  86 {
  87
  88 //! Type of CPU function to compute a bonded interaction.
  89 using BondedFunction = real (*)(int              nbonds,
  90                                 const t_iatom    iatoms[],
  91                                 const t_iparams  iparams[],
  92                                 const rvec       x[],
  93                                 rvec4            f[],
  94                                 rvec             fshift[],
  95                                 const t_pbc*     pbc,
  96                                 const t_graph*   g,
  97                                 real             lambda,
  98                                 real*            dvdlambda,
  99                                 const t_mdatoms* md,
 100                                 t_fcdata*        fcd,
 101                                 int*             ddgatindex);
 102
 103 /*! \brief Mysterious CMAP coefficient matrix */
 104 const int cmap_coeff_matrix[] = {
 105     1,  0,  -3, 2,  0,  0, 0,  0,  -3, 0,  9,  -6, 2, 0,  -6, 4,  0,  0,  0, 0,  0, 0, 0,  0,
 106     3,  0,  -9, 6,  -2, 0, 6,  -4, 0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  9, -6, 0, 0, -6, 4,
 107     0,  0,  3,  -2, 0,  0, 0,  0,  0,  0,  -9, 6,  0, 0,  6,  -4, 0,  0,  0, 0,  1, 0, -3, 2,
 108     -2, 0,  6,  -4, 1,  0, -3, 2,  0,  0,  0,  0,  0, 0,  0,  0,  -1, 0,  3, -2, 1, 0, -3, 2,
 109     0,  0,  0,  0,  0,  0, 0,  0,  0,  0,  -3, 2,  0, 0,  3,  -2, 0,  0,  0, 0,  0, 0, 3,  -2,
 110     0,  0,  -6, 4,  0,  0, 3,  -2, 0,  1,  -2, 1,  0, 0,  0,  0,  0,  -3, 6, -3, 0, 2, -4, 2,
 111     0,  0,  0,  0,  0,  0, 0,  0,  0,  3,  -6, 3,  0, -2, 4,  -2, 0,  0,  0, 0,  0, 0, 0,  0,
 112     0,  0,  -3, 3,  0,  0, 2,  -2, 0,  0,  -1, 1,  0, 0,  0,  0,  0,  0,  3, -3, 0, 0, -2, 2,
 113     0,  0,  0,  0,  0,  1, -2, 1,  0,  -2, 4,  -2, 0, 1,  -2, 1,  0,  0,  0, 0,  0, 0, 0,  0,
 114     0,  -1, 2,  -1, 0,  1, -2, 1,  0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  1, -1, 0, 0, -1, 1,
 115     0,  0,  0,  0,  0,  0, -1, 1,  0,  0,  2,  -2, 0, 0,  -1, 1
 116 };
 117
 118
 119 /*! \brief Compute dx = xi - xj, modulo PBC if non-NULL
 120  *
 121  * \todo This kind of code appears in many places. Consolidate it */
 122 int pbc_rvec_sub(const t_pbc* pbc, const rvec xi, const rvec xj, rvec dx)
 123 {
 124     if (pbc)
 125     {
 126         return pbc_dx_aiuc(pbc, xi, xj, dx);
 127     }
 128     else
 129     {
 130         rvec_sub(xi, xj, dx);
 131         return CENTRAL;
 132     }
 133 }
 134
 135 } // namespace
 136
 137 //! \cond
 138 real bond_angle(const rvec   xi,
 139                 const rvec   xj,
 140                 const rvec   xk,
 141                 const t_pbc* pbc,
 142                 rvec         r_ij,
 143                 rvec         r_kj,
 144                 real*        costh,
 145                 int*         t1,
 146                 int*         t2)
 147 /* Return value is the angle between the bonds i-j and j-k */
 148 {
 149     /* 41 FLOPS */
 150     real th;
 151
 152     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 153     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 154
 155     *costh = cos_angle(r_ij, r_kj); /* 25               */
 156     th     = std::acos(*costh);     /* 10               */
 157     /* 41 TOTAL */
 158     return th;
 159 }
 160
 161 real dih_angle(const rvec   xi,
 162                const rvec   xj,
 163                const rvec   xk,
 164                const rvec   xl,
 165                const t_pbc* pbc,
 166                rvec         r_ij,
 167                rvec         r_kj,
 168                rvec         r_kl,
 169                rvec         m,
 170                rvec         n,
 171                int*         t1,
 172                int*         t2,
 173                int*         t3)
 174 {
 175     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
 176     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 177     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
 178
 179     cprod(r_ij, r_kj, m);        /*  9        */
 180     cprod(r_kj, r_kl, n);        /*  9          */
 181     real phi  = gmx_angle(m, n); /* 49 (assuming 25 for atan2) */
 182     real ipr  = iprod(r_ij, n);  /*  5        */
 183     real sign = (ipr < 0.0) ? -1.0 : 1.0;
 184     phi       = sign * phi; /*  1               */
 185     /* 82 TOTAL */
 186     return phi;
 187 }
 188 //! \endcond
 189
 190 void make_dp_periodic(real* dp) /* 1 flop? */
 191 {
 192     /* dp cannot be outside (-pi,pi) */
 193     if (*dp >= M_PI)
 194     {
 195         *dp -= 2 * M_PI;
 196     }
 197     else if (*dp < -M_PI)
 198     {
 199         *dp += 2 * M_PI;
 200     }
 201 }
 202
 203 namespace
 204 {
 205
 206 /*! \brief Spreads and accumulates the bonded forces to the two atoms and adds the virial contribution when needed
 207  *
 208  * When \p g==nullptr, \p shiftIndex is used as the periodic shift.
 209  * When \p g!=nullptr, the graph is used to compute the periodic shift.
 210  */
 211 template<BondedKernelFlavor flavor>
 212 inline void spreadBondForces(const real     bondForce,
 213                              const rvec     dx,
 214                              const int      ai,
 215                              const int      aj,
 216                              rvec4*         f,
 217                              int            shiftIndex,
 218                              const t_graph* g,
 219                              rvec*          fshift)
 220 {
 221     if (computeVirial(flavor) && g)
 222     {
 223         ivec dt;
 224         ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 225         shiftIndex = IVEC2IS(dt);
 226     }
 227
 228     for (int m = 0; m < DIM; m++) /*  15          */
 229     {
 230         const real fij = bondForce * dx[m];
 231         f[ai][m] += fij;
 232         f[aj][m] -= fij;
 233         if (computeVirial(flavor))
 234         {
 235             fshift[shiftIndex][m] += fij;
 236             fshift[CENTRAL][m] -= fij;
 237         }
 238     }
 239 }
 240
 241 /*! \brief Morse potential bond
 242  *
 243  * By Frank Everdij. Three parameters needed:
 244  *
 245  * b0 = equilibrium distance in nm
 246  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 247  * cb = well depth in kJ/mol
 248  *
 249  * Note: the potential is referenced to be +cb at infinite separation
 250  *       and zero at the equilibrium distance!
 251  */
 252 template<BondedKernelFlavor flavor>
 253 real morse_bonds(int             nbonds,
 254                  const t_iatom   forceatoms[],
 255                  const t_iparams forceparams[],
 256                  const rvec      x[],
 257                  rvec4           f[],
 258                  rvec            fshift[],
 259                  const t_pbc*    pbc,
 260                  const t_graph*  g,
 261                  real            lambda,
 262                  real*           dvdlambda,
 263                  const t_mdatoms gmx_unused* md,
 264                  t_fcdata gmx_unused* fcd,
 265                  int gmx_unused* global_atom_index)
 266 {
 267     const real one = 1.0;
 268     const real two = 2.0;
 269     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, vtot;
 270     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 271     rvec       dx;
 272     int        i, ki, type, ai, aj;
 273
 274     vtot = 0.0;
 275     for (i = 0; (i < nbonds);)
 276     {
 277         type = forceatoms[i++];
 278         ai   = forceatoms[i++];
 279         aj   = forceatoms[i++];
 280
 281         b0A = forceparams[type].morse.b0A;
 282         beA = forceparams[type].morse.betaA;
 283         cbA = forceparams[type].morse.cbA;
 284
 285         b0B = forceparams[type].morse.b0B;
 286         beB = forceparams[type].morse.betaB;
 287         cbB = forceparams[type].morse.cbB;
 288
 289         L1 = one - lambda;            /* 1 */
 290         b0 = L1 * b0A + lambda * b0B; /* 3 */
 291         be = L1 * beA + lambda * beB; /* 3 */
 292         cb = L1 * cbA + lambda * cbB; /* 3 */
 293
 294         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 295         dr2  = iprod(dx, dx);                       /*   5          */
 296         dr   = dr2 * gmx::invsqrt(dr2);             /*  10          */
 297         temp = std::exp(-be * (dr - b0));           /*  12          */
 298
 299         if (temp == one)
 300         {
 301             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 302             *dvdlambda += cbB - cbA;
 303             continue;
 304         }
 305
 306         omtemp   = one - temp;                                      /*   1          */
 307         cbomtemp = cb * omtemp;                                     /*   1          */
 308         vbond    = cbomtemp * omtemp;                               /*   1          */
 309         fbond    = -two * be * temp * cbomtemp * gmx::invsqrt(dr2); /*   9          */
 310         vtot += vbond;                                              /*   1          */
 311
 312         *dvdlambda += (cbB - cbA) * omtemp * omtemp
 313                       - (2 - 2 * omtemp) * omtemp * cb * ((b0B - b0A) * be - (beB - beA) * (dr - b0)); /* 15 */
 314
 315         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 316     }                                                                  /*  83 TOTAL    */
 317     return vtot;
 318 }
 319
 320 //! \cond
 321 template<BondedKernelFlavor flavor>
 322 real cubic_bonds(int             nbonds,
 323                  const t_iatom   forceatoms[],
 324                  const t_iparams forceparams[],
 325                  const rvec      x[],
 326                  rvec4           f[],
 327                  rvec            fshift[],
 328                  const t_pbc*    pbc,
 329                  const t_graph*  g,
 330                  real gmx_unused lambda,
 331                  real gmx_unused* dvdlambda,
 332                  const t_mdatoms gmx_unused* md,
 333                  t_fcdata gmx_unused* fcd,
 334                  int gmx_unused* global_atom_index)
 335 {
 336     const real three = 3.0;
 337     const real two   = 2.0;
 338     real       kb, b0, kcub;
 339     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, vtot;
 340     rvec       dx;
 341     int        i, ki, type, ai, aj;
 342
 343     vtot = 0.0;
 344     for (i = 0; (i < nbonds);)
 345     {
 346         type = forceatoms[i++];
 347         ai   = forceatoms[i++];
 348         aj   = forceatoms[i++];
 349
 350         b0   = forceparams[type].cubic.b0;
 351         kb   = forceparams[type].cubic.kb;
 352         kcub = forceparams[type].cubic.kcub;
 353
 354         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 355         dr2 = iprod(dx, dx);                       /*   5          */
 356
 357         if (dr2 == 0.0)
 358         {
 359             continue;
 360         }
 361
 362         dr     = dr2 * gmx::invsqrt(dr2); /*  10          */
 363         dist   = dr - b0;
 364         kdist  = kb * dist;
 365         kdist2 = kdist * dist;
 366
 367         vbond = kdist2 + kcub * kdist2 * dist;
 368         fbond = -(two * kdist + three * kdist2 * kcub) / dr;
 369
 370         vtot += vbond; /* 21 */
 371
 372         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 373     }                                                                  /*  54 TOTAL    */
 374     return vtot;
 375 }
 376
 377 template<BondedKernelFlavor flavor>
 378 real FENE_bonds(int             nbonds,
 379                 const t_iatom   forceatoms[],
 380                 const t_iparams forceparams[],
 381                 const rvec      x[],
 382                 rvec4           f[],
 383                 rvec            fshift[],
 384                 const t_pbc*    pbc,
 385                 const t_graph*  g,
 386                 real gmx_unused lambda,
 387                 real gmx_unused* dvdlambda,
 388                 const t_mdatoms gmx_unused* md,
 389                 t_fcdata gmx_unused* fcd,
 390                 int*                 global_atom_index)
 391 {
 392     const real half = 0.5;
 393     const real one  = 1.0;
 394     real       bm, kb;
 395     real       dr2, bm2, omdr2obm2, fbond, vbond, vtot;
 396     rvec       dx;
 397     int        i, ki, type, ai, aj;
 398
 399     vtot = 0.0;
 400     for (i = 0; (i < nbonds);)
 401     {
 402         type = forceatoms[i++];
 403         ai   = forceatoms[i++];
 404         aj   = forceatoms[i++];
 405
 406         bm = forceparams[type].fene.bm;
 407         kb = forceparams[type].fene.kb;
 408
 409         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 410         dr2 = iprod(dx, dx);                       /*   5          */
 411
 412         if (dr2 == 0.0)
 413         {
 414             continue;
 415         }
 416
 417         bm2 = bm * bm;
 418
 419         if (dr2 >= bm2)
 420         {
 421             gmx_fatal(FARGS, "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d", dr2, bm2,
 422                       glatnr(global_atom_index, ai), glatnr(global_atom_index, aj));
 423         }
 424
 425         omdr2obm2 = one - dr2 / bm2;
 426
 427         vbond = -half * kb * bm2 * std::log(omdr2obm2);
 428         fbond = -kb / omdr2obm2;
 429
 430         vtot += vbond; /* 35 */
 431
 432         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 433     }                                                                  /*  58 TOTAL    */
 434     return vtot;
 435 }
 436
 437 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
 438 {
 439     const real half = 0.5;
 440     real       L1, kk, x0, dx, dx2;
 441     real       v, f, dvdlambda;
 442
 443     L1 = 1.0 - lambda;
 444     kk = L1 * kA + lambda * kB;
 445     x0 = L1 * xA + lambda * xB;
 446
 447     dx  = x - x0;
 448     dx2 = dx * dx;
 449
 450     f         = -kk * dx;
 451     v         = half * kk * dx2;
 452     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
 453
 454     *F = f;
 455     *V = v;
 456
 457     return dvdlambda;
 458
 459     /* That was 19 flops */
 460 }
 461
 462
 463 template<BondedKernelFlavor flavor>
 464 real bonds(int             nbonds,
 465            const t_iatom   forceatoms[],
 466            const t_iparams forceparams[],
 467            const rvec      x[],
 468            rvec4           f[],
 469            rvec            fshift[],
 470            const t_pbc*    pbc,
 471            const t_graph*  g,
 472            real            lambda,
 473            real*           dvdlambda,
 474            const t_mdatoms gmx_unused* md,
 475            t_fcdata gmx_unused* fcd,
 476            int gmx_unused* global_atom_index)
 477 {
 478     int  i, ki, ai, aj, type;
 479     real dr, dr2, fbond, vbond, vtot;
 480     rvec dx;
 481
 482     vtot = 0.0;
 483     for (i = 0; (i < nbonds);)
 484     {
 485         type = forceatoms[i++];
 486         ai   = forceatoms[i++];
 487         aj   = forceatoms[i++];
 488
 489         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 490         dr2 = iprod(dx, dx);                       /*   5               */
 491         dr  = std::sqrt(dr2);                      /*  10               */
 492
 493         *dvdlambda += harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
 494                                forceparams[type].harmonic.rA, forceparams[type].harmonic.rB, dr,
 495                                lambda, &vbond, &fbond); /*  19  */
 496
 497         if (dr2 == 0.0)
 498         {
 499             continue;
 500         }
 501
 502
 503         vtot += vbond;              /* 1*/
 504         fbond *= gmx::invsqrt(dr2); /*   6              */
 505
 506         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 507     }                                                                  /* 59 TOTAL      */
 508     return vtot;
 509 }
 510
 511 template<BondedKernelFlavor flavor>
 512 real restraint_bonds(int             nbonds,
 513                      const t_iatom   forceatoms[],
 514                      const t_iparams forceparams[],
 515                      const rvec      x[],
 516                      rvec4           f[],
 517                      rvec            fshift[],
 518                      const t_pbc*    pbc,
 519                      const t_graph*  g,
 520                      real            lambda,
 521                      real*           dvdlambda,
 522                      const t_mdatoms gmx_unused* md,
 523                      t_fcdata gmx_unused* fcd,
 524                      int gmx_unused* global_atom_index)
 525 {
 526     int  i, ki, ai, aj, type;
 527     real dr, dr2, fbond, vbond, vtot;
 528     real L1;
 529     real low, dlow, up1, dup1, up2, dup2, k, dk;
 530     real drh, drh2;
 531     rvec dx;
 532
 533     L1 = 1.0 - lambda;
 534
 535     vtot = 0.0;
 536     for (i = 0; (i < nbonds);)
 537     {
 538         type = forceatoms[i++];
 539         ai   = forceatoms[i++];
 540         aj   = forceatoms[i++];
 541
 542         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 543         dr2 = iprod(dx, dx);                       /*   5               */
 544         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 545
 546         low  = L1 * forceparams[type].restraint.lowA + lambda * forceparams[type].restraint.lowB;
 547         dlow = -forceparams[type].restraint.lowA + forceparams[type].restraint.lowB;
 548         up1  = L1 * forceparams[type].restraint.up1A + lambda * forceparams[type].restraint.up1B;
 549         dup1 = -forceparams[type].restraint.up1A + forceparams[type].restraint.up1B;
 550         up2  = L1 * forceparams[type].restraint.up2A + lambda * forceparams[type].restraint.up2B;
 551         dup2 = -forceparams[type].restraint.up2A + forceparams[type].restraint.up2B;
 552         k    = L1 * forceparams[type].restraint.kA + lambda * forceparams[type].restraint.kB;
 553         dk   = -forceparams[type].restraint.kA + forceparams[type].restraint.kB;
 554         /* 24 */
 555
 556         if (dr < low)
 557         {
 558             drh   = dr - low;
 559             drh2  = drh * drh;
 560             vbond = 0.5 * k * drh2;
 561             fbond = -k * drh;
 562             *dvdlambda += 0.5 * dk * drh2 - k * dlow * drh;
 563         } /* 11 */
 564         else if (dr <= up1)
 565         {
 566             vbond = 0;
 567             fbond = 0;
 568         }
 569         else if (dr <= up2)
 570         {
 571             drh   = dr - up1;
 572             drh2  = drh * drh;
 573             vbond = 0.5 * k * drh2;
 574             fbond = -k * drh;
 575             *dvdlambda += 0.5 * dk * drh2 - k * dup1 * drh;
 576         } /* 11 */
 577         else
 578         {
 579             drh   = dr - up2;
 580             vbond = k * (up2 - up1) * (0.5 * (up2 - up1) + drh);
 581             fbond = -k * (up2 - up1);
 582             *dvdlambda += dk * (up2 - up1) * (0.5 * (up2 - up1) + drh)
 583                           + k * (dup2 - dup1) * (up2 - up1 + drh) - k * (up2 - up1) * dup2;
 584         }
 585
 586         if (dr2 == 0.0)
 587         {
 588             continue;
 589         }
 590
 591         vtot += vbond;              /* 1*/
 592         fbond *= gmx::invsqrt(dr2); /*   6              */
 593
 594         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 595     }                                                                  /* 59 TOTAL      */
 596
 597     return vtot;
 598 }
 599
 600 template<BondedKernelFlavor flavor>
 601 real polarize(int              nbonds,
 602               const t_iatom    forceatoms[],
 603               const t_iparams  forceparams[],
 604               const rvec       x[],
 605               rvec4            f[],
 606               rvec             fshift[],
 607               const t_pbc*     pbc,
 608               const t_graph*   g,
 609               real             lambda,
 610               real*            dvdlambda,
 611               const t_mdatoms* md,
 612               t_fcdata gmx_unused* fcd,
 613               int gmx_unused* global_atom_index)
 614 {
 615     int  i, ki, ai, aj, type;
 616     real dr, dr2, fbond, vbond, vtot, ksh;
 617     rvec dx;
 618
 619     vtot = 0.0;
 620     for (i = 0; (i < nbonds);)
 621     {
 622         type = forceatoms[i++];
 623         ai   = forceatoms[i++];
 624         aj   = forceatoms[i++];
 625         ksh  = gmx::square(md->chargeA[aj]) * ONE_4PI_EPS0 / forceparams[type].polarize.alpha;
 626
 627         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 628         dr2 = iprod(dx, dx);                       /*   5               */
 629         dr  = std::sqrt(dr2);                      /*  10               */
 630
 631         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 632
 633         if (dr2 == 0.0)
 634         {
 635             continue;
 636         }
 637
 638         vtot += vbond;              /* 1*/
 639         fbond *= gmx::invsqrt(dr2); /*   6              */
 640
 641         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 642     }                                                                  /* 59 TOTAL      */
 643     return vtot;
 644 }
 645
 646 template<BondedKernelFlavor flavor>
 647 real anharm_polarize(int              nbonds,
 648                      const t_iatom    forceatoms[],
 649                      const t_iparams  forceparams[],
 650                      const rvec       x[],
 651                      rvec4            f[],
 652                      rvec             fshift[],
 653                      const t_pbc*     pbc,
 654                      const t_graph*   g,
 655                      real             lambda,
 656                      real*            dvdlambda,
 657                      const t_mdatoms* md,
 658                      t_fcdata gmx_unused* fcd,
 659                      int gmx_unused* global_atom_index)
 660 {
 661     int  i, ki, ai, aj, type;
 662     real dr, dr2, fbond, vbond, vtot, ksh, khyp, drcut, ddr, ddr3;
 663     rvec dx;
 664
 665     vtot = 0.0;
 666     for (i = 0; (i < nbonds);)
 667     {
 668         type = forceatoms[i++];
 669         ai   = forceatoms[i++];
 670         aj   = forceatoms[i++];
 671         ksh = gmx::square(md->chargeA[aj]) * ONE_4PI_EPS0 / forceparams[type].anharm_polarize.alpha; /* 7*/
 672         khyp  = forceparams[type].anharm_polarize.khyp;
 673         drcut = forceparams[type].anharm_polarize.drcut;
 674
 675         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 676         dr2 = iprod(dx, dx);                       /*   5               */
 677         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 678
 679         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 680
 681         if (dr2 == 0.0)
 682         {
 683             continue;
 684         }
 685
 686         if (dr > drcut)
 687         {
 688             ddr  = dr - drcut;
 689             ddr3 = ddr * ddr * ddr;
 690             vbond += khyp * ddr * ddr3;
 691             fbond -= 4 * khyp * ddr3;
 692         }
 693         fbond *= gmx::invsqrt(dr2); /*   6              */
 694         vtot += vbond;              /* 1*/
 695
 696         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 697     }                                                                  /* 72 TOTAL      */
 698     return vtot;
 699 }
 700
 701 template<BondedKernelFlavor flavor>
 702 real water_pol(int             nbonds,
 703                const t_iatom   forceatoms[],
 704                const t_iparams forceparams[],
 705                const rvec      x[],
 706                rvec4           f[],
 707                rvec gmx_unused fshift[],
 708                const t_pbc gmx_unused* pbc,
 709                const t_graph gmx_unused* g,
 710                real gmx_unused lambda,
 711                real gmx_unused* dvdlambda,
 712                const t_mdatoms gmx_unused* md,
 713                t_fcdata gmx_unused* fcd,
 714                int gmx_unused* global_atom_index)
 715 {
 716     /* This routine implements anisotropic polarizibility for water, through
 717      * a shell connected to a dummy with spring constant that differ in the
 718      * three spatial dimensions in the molecular frame.
 719      */
 720     int  i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
 721     ivec dt;
 722     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 723     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 724
 725     vtot = 0.0;
 726     if (nbonds > 0)
 727     {
 728         type0  = forceatoms[0];
 729         aS     = forceatoms[5];
 730         qS     = md->chargeA[aS];
 731         kk[XX] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_x;
 732         kk[YY] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_y;
 733         kk[ZZ] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_z;
 734         r_HH   = 1.0 / forceparams[type0].wpol.rHH;
 735         for (i = 0; (i < nbonds); i += 6)
 736         {
 737             type = forceatoms[i];
 738             if (type != type0)
 739             {
 740                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d", type, type0,
 741                           __FILE__, __LINE__);
 742             }
 743             aO  = forceatoms[i + 1];
 744             aH1 = forceatoms[i + 2];
 745             aH2 = forceatoms[i + 3];
 746             aD  = forceatoms[i + 4];
 747             aS  = forceatoms[i + 5];
 748
 749             /* Compute vectors describing the water frame */
 750             pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
 751             pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
 752             pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
 753             pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
 754             ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
 755             cprod(dOH1, dOH2, nW);
 756
 757             /* Compute inverse length of normal vector
 758              * (this one could be precomputed, but I'm too lazy now)
 759              */
 760             r_nW = gmx::invsqrt(iprod(nW, nW));
 761             /* This is for precision, but does not make a big difference,
 762              * it can go later.
 763              */
 764             r_OD = gmx::invsqrt(iprod(dOD, dOD));
 765
 766             /* Normalize the vectors in the water frame */
 767             svmul(r_nW, nW, nW);
 768             svmul(r_HH, dHH, dHH);
 769             svmul(r_OD, dOD, dOD);
 770
 771             /* Compute displacement of shell along components of the vector */
 772             dx[ZZ] = iprod(dDS, dOD);
 773             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 774             for (m = 0; (m < DIM); m++)
 775             {
 776                 proj[m] = dDS[m] - dx[ZZ] * dOD[m];
 777             }
 778
 779             /*dx[XX] = iprod(dDS,nW);
 780                dx[YY] = iprod(dDS,dHH);*/
 781             dx[XX] = iprod(proj, nW);
 782             for (m = 0; (m < DIM); m++)
 783             {
 784                 proj[m] -= dx[XX] * nW[m];
 785             }
 786             dx[YY] = iprod(proj, dHH);
 787             /* Now compute the forces and energy */
 788             kdx[XX] = kk[XX] * dx[XX];
 789             kdx[YY] = kk[YY] * dx[YY];
 790             kdx[ZZ] = kk[ZZ] * dx[ZZ];
 791             vtot += iprod(dx, kdx);
 792
 793             if (computeVirial(flavor) && g)
 794             {
 795                 ivec_sub(SHIFT_IVEC(g, aS), SHIFT_IVEC(g, aD), dt);
 796                 ki = IVEC2IS(dt);
 797             }
 798
 799             for (m = 0; (m < DIM); m++)
 800             {
 801                 /* This is a tensor operation but written out for speed */
 802                 tx  = nW[m] * kdx[XX];
 803                 ty  = dHH[m] * kdx[YY];
 804                 tz  = dOD[m] * kdx[ZZ];
 805                 fij = -tx - ty - tz;
 806                 f[aS][m] += fij;
 807                 f[aD][m] -= fij;
 808                 if (computeVirial(flavor))
 809                 {
 810                     fshift[ki][m] += fij;
 811                     fshift[CENTRAL][m] -= fij;
 812                 }
 813             }
 814         }
 815     }
 816     return 0.5 * vtot;
 817 }
 818
 819 template<BondedKernelFlavor flavor>
 820 static real
 821 do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj, const t_pbc* pbc, real qq, rvec fshift[], real afac)
 822 {
 823     rvec r12;
 824     real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff;
 825     int  m, t;
 826
 827     t = pbc_rvec_sub(pbc, xi, xj, r12); /*  3 */
 828
 829     r12sq  = iprod(r12, r12);                                                     /*  5 */
 830     r12_1  = gmx::invsqrt(r12sq);                                                 /*  5 */
 831     r12bar = afac / r12_1;                                                        /*  5 */
 832     v0     = qq * ONE_4PI_EPS0 * r12_1;                                           /*  2 */
 833     ebar   = std::exp(-r12bar);                                                   /*  5 */
 834     v1     = (1 - (1 + 0.5 * r12bar) * ebar);                                     /*  4 */
 835     fscal  = ((v0 * r12_1) * v1 - v0 * 0.5 * afac * ebar * (r12bar + 1)) * r12_1; /* 9 */
 836
 837     for (m = 0; (m < DIM); m++)
 838     {
 839         fff = fscal * r12[m];
 840         fi[m] += fff;
 841         fj[m] -= fff;
 842         if (computeVirial(flavor))
 843         {
 844             fshift[t][m] += fff;
 845             fshift[CENTRAL][m] -= fff;
 846         }
 847     } /* 15 */
 848
 849     return v0 * v1; /* 1 */
 850     /* 54 */
 851 }
 852
 853 template<BondedKernelFlavor flavor>
 854 real thole_pol(int             nbonds,
 855                const t_iatom   forceatoms[],
 856                const t_iparams forceparams[],
 857                const rvec      x[],
 858                rvec4           f[],
 859                rvec            fshift[],
 860                const t_pbc*    pbc,
 861                const t_graph gmx_unused* g,
 862                real gmx_unused lambda,
 863                real gmx_unused* dvdlambda,
 864                const t_mdatoms* md,
 865                t_fcdata gmx_unused* fcd,
 866                int gmx_unused* global_atom_index)
 867 {
 868     /* Interaction between two pairs of particles with opposite charge */
 869     int  i, type, a1, da1, a2, da2;
 870     real q1, q2, qq, a, al1, al2, afac;
 871     real V = 0;
 872
 873     for (i = 0; (i < nbonds);)
 874     {
 875         type = forceatoms[i++];
 876         a1   = forceatoms[i++];
 877         da1  = forceatoms[i++];
 878         a2   = forceatoms[i++];
 879         da2  = forceatoms[i++];
 880         q1   = md->chargeA[da1];
 881         q2   = md->chargeA[da2];
 882         a    = forceparams[type].thole.a;
 883         al1  = forceparams[type].thole.alpha1;
 884         al2  = forceparams[type].thole.alpha2;
 885         qq   = q1 * q2;
 886         afac = a * gmx::invsixthroot(al1 * al2);
 887         V += do_1_thole<flavor>(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 888         V += do_1_thole<flavor>(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 889         V += do_1_thole<flavor>(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 890         V += do_1_thole<flavor>(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 891     }
 892     /* 290 flops */
 893     return V;
 894 }
 895
 896 // Avoid gcc 386 -O3 code generation bug in this function (see Redmine
 897 // #3205 for more information)
 898 #if defined(__GNUC__) && defined(__i386__) && defined(__OPTIMIZE__)
 899 #    pragma GCC push_options
 900 #    pragma GCC optimize("O1")
 901 #    define avoid_gcc_i386_o3_code_generation_bug
 902 #endif
 903
 904 template<BondedKernelFlavor flavor>
 905 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
 906 angles(int             nbonds,
 907        const t_iatom   forceatoms[],
 908        const t_iparams forceparams[],
 909        const rvec      x[],
 910        rvec4           f[],
 911        rvec            fshift[],
 912        const t_pbc*    pbc,
 913        const t_graph*  g,
 914        real            lambda,
 915        real*           dvdlambda,
 916        const t_mdatoms gmx_unused* md,
 917        t_fcdata gmx_unused* fcd,
 918        int gmx_unused* global_atom_index)
 919 {
 920     int  i, ai, aj, ak, t1, t2, type;
 921     rvec r_ij, r_kj;
 922     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
 923     ivec jt, dt_ij, dt_kj;
 924
 925     vtot = 0.0;
 926     for (i = 0; i < nbonds;)
 927     {
 928         type = forceatoms[i++];
 929         ai   = forceatoms[i++];
 930         aj   = forceatoms[i++];
 931         ak   = forceatoms[i++];
 932
 933         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
 934
 935         *dvdlambda += harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
 936                                forceparams[type].harmonic.rA * DEG2RAD,
 937                                forceparams[type].harmonic.rB * DEG2RAD, theta, lambda, &va, &dVdt); /*  21  */
 938         vtot += va;
 939
 940         cos_theta2 = gmx::square(cos_theta);
 941         if (cos_theta2 < 1)
 942         {
 943             int  m;
 944             real st, sth;
 945             real cik, cii, ckk;
 946             real nrkj2, nrij2;
 947             real nrkj_1, nrij_1;
 948             rvec f_i, f_j, f_k;
 949
 950             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
 951             sth   = st * cos_theta;                      /*   1         */
 952             nrij2 = iprod(r_ij, r_ij);                   /*   5         */
 953             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
 954
 955             nrij_1 = gmx::invsqrt(nrij2); /*  10                */
 956             nrkj_1 = gmx::invsqrt(nrkj2); /*  10                */
 957
 958             cik = st * nrij_1 * nrkj_1;  /*   2         */
 959             cii = sth * nrij_1 * nrij_1; /*   2         */
 960             ckk = sth * nrkj_1 * nrkj_1; /*   2         */
 961
 962             for (m = 0; m < DIM; m++)
 963             { /*  39            */
 964                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
 965                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
 966                 f_j[m] = -f_i[m] - f_k[m];
 967                 f[ai][m] += f_i[m];
 968                 f[aj][m] += f_j[m];
 969                 f[ak][m] += f_k[m];
 970             }
 971             if (computeVirial(flavor))
 972             {
 973                 if (g != nullptr)
 974                 {
 975                     copy_ivec(SHIFT_IVEC(g, aj), jt);
 976
 977                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
 978                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
 979                     t1 = IVEC2IS(dt_ij);
 980                     t2 = IVEC2IS(dt_kj);
 981                 }
 982                 rvec_inc(fshift[t1], f_i);
 983                 rvec_inc(fshift[CENTRAL], f_j);
 984                 rvec_inc(fshift[t2], f_k);
 985             }
 986         } /* 161 TOTAL  */
 987     }
 988
 989     return vtot;
 990 }
 991
 992 #ifdef avoid_gcc_i386_o3_code_generation_bug
 993 #    pragma GCC pop_options
 994 #    undef avoid_gcc_i386_o3_code_generation_bug
 995 #endif
 996
 997 #if GMX_SIMD_HAVE_REAL
 998
 999 /* As angles, but using SIMD to calculate many angles at once.
1000  * This routines does not calculate energies and shift forces.
1001  */
1002 template<BondedKernelFlavor flavor>
1003 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1004 angles(int             nbonds,
1005        const t_iatom   forceatoms[],
1006        const t_iparams forceparams[],
1007        const rvec      x[],
1008        rvec4           f[],
1009        rvec gmx_unused fshift[],
1010        const t_pbc*    pbc,
1011        const t_graph gmx_unused* g,
1012        real gmx_unused lambda,
1013        real gmx_unused* dvdlambda,
1014        const t_mdatoms gmx_unused* md,
1015        t_fcdata gmx_unused* fcd,
1016        int gmx_unused* global_atom_index)
1017 {
1018     const int                                nfa1 = 4;
1019     int                                      i, iu, s;
1020     int                                      type;
1021     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1022     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1023     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1024     alignas(GMX_SIMD_ALIGNMENT) real         coeff[2 * GMX_SIMD_REAL_WIDTH];
1025     SimdReal                                 deg2rad_S(DEG2RAD);
1026     SimdReal                                 xi_S, yi_S, zi_S;
1027     SimdReal                                 xj_S, yj_S, zj_S;
1028     SimdReal                                 xk_S, yk_S, zk_S;
1029     SimdReal                                 k_S, theta0_S;
1030     SimdReal                                 rijx_S, rijy_S, rijz_S;
1031     SimdReal                                 rkjx_S, rkjy_S, rkjz_S;
1032     SimdReal                                 one_S(1.0);
1033     SimdReal min_one_plus_eps_S(-1.0 + 2.0 * GMX_REAL_EPS); // Smallest number > -1
1034
1035     SimdReal                         rij_rkj_S;
1036     SimdReal                         nrij2_S, nrij_1_S;
1037     SimdReal                         nrkj2_S, nrkj_1_S;
1038     SimdReal                         cos_S, invsin_S;
1039     SimdReal                         theta_S;
1040     SimdReal                         st_S, sth_S;
1041     SimdReal                         cik_S, cii_S, ckk_S;
1042     SimdReal                         f_ix_S, f_iy_S, f_iz_S;
1043     SimdReal                         f_kx_S, f_ky_S, f_kz_S;
1044     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1045
1046     set_pbc_simd(pbc, pbc_simd);
1047
1048     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1049     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
1050     {
1051         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1052          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1053          */
1054         iu = i;
1055         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1056         {
1057             type  = forceatoms[iu];
1058             ai[s] = forceatoms[iu + 1];
1059             aj[s] = forceatoms[iu + 2];
1060             ak[s] = forceatoms[iu + 3];
1061
1062             /* At the end fill the arrays with the last atoms and 0 params */
1063             if (i + s * nfa1 < nbonds)
1064             {
1065                 coeff[s]                       = forceparams[type].harmonic.krA;
1066                 coeff[GMX_SIMD_REAL_WIDTH + s] = forceparams[type].harmonic.rA;
1067
1068                 if (iu + nfa1 < nbonds)
1069                 {
1070                     iu += nfa1;
1071                 }
1072             }
1073             else
1074             {
1075                 coeff[s]                       = 0;
1076                 coeff[GMX_SIMD_REAL_WIDTH + s] = 0;
1077             }
1078         }
1079
1080         /* Store the non PBC corrected distances packed and aligned */
1081         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1082         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1083         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1084         rijx_S = xi_S - xj_S;
1085         rijy_S = yi_S - yj_S;
1086         rijz_S = zi_S - zj_S;
1087         rkjx_S = xk_S - xj_S;
1088         rkjy_S = yk_S - yj_S;
1089         rkjz_S = zk_S - zj_S;
1090
1091         k_S      = load<SimdReal>(coeff);
1092         theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * deg2rad_S;
1093
1094         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1095         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1096
1097         rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1098
1099         nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1100         nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1101
1102         nrij_1_S = invsqrt(nrij2_S);
1103         nrkj_1_S = invsqrt(nrkj2_S);
1104
1105         cos_S = rij_rkj_S * nrij_1_S * nrkj_1_S;
1106
1107         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1108          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1109          * This also ensures that rounding errors would cause the argument
1110          * of simdAcos to be < -1.
1111          * Note that we do not take precautions for cos(0)=1, so the outer
1112          * atoms in an angle should not be on top of each other.
1113          */
1114         cos_S = max(cos_S, min_one_plus_eps_S);
1115
1116         theta_S = acos(cos_S);
1117
1118         invsin_S = invsqrt(one_S - cos_S * cos_S);
1119
1120         st_S  = k_S * (theta0_S - theta_S) * invsin_S;
1121         sth_S = st_S * cos_S;
1122
1123         cik_S = st_S * nrij_1_S * nrkj_1_S;
1124         cii_S = sth_S * nrij_1_S * nrij_1_S;
1125         ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1126
1127         f_ix_S = cii_S * rijx_S;
1128         f_ix_S = fnma(cik_S, rkjx_S, f_ix_S);
1129         f_iy_S = cii_S * rijy_S;
1130         f_iy_S = fnma(cik_S, rkjy_S, f_iy_S);
1131         f_iz_S = cii_S * rijz_S;
1132         f_iz_S = fnma(cik_S, rkjz_S, f_iz_S);
1133         f_kx_S = ckk_S * rkjx_S;
1134         f_kx_S = fnma(cik_S, rijx_S, f_kx_S);
1135         f_ky_S = ckk_S * rkjy_S;
1136         f_ky_S = fnma(cik_S, rijy_S, f_ky_S);
1137         f_kz_S = ckk_S * rkjz_S;
1138         f_kz_S = fnma(cik_S, rijz_S, f_kz_S);
1139
1140         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1141         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S,
1142                                  f_iz_S + f_kz_S);
1143         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1144     }
1145
1146     return 0;
1147 }
1148
1149 #endif // GMX_SIMD_HAVE_REAL
1150
1151 template<BondedKernelFlavor flavor>
1152 real linear_angles(int             nbonds,
1153                    const t_iatom   forceatoms[],
1154                    const t_iparams forceparams[],
1155                    const rvec      x[],
1156                    rvec4           f[],
1157                    rvec            fshift[],
1158                    const t_pbc*    pbc,
1159                    const t_graph*  g,
1160                    real            lambda,
1161                    real*           dvdlambda,
1162                    const t_mdatoms gmx_unused* md,
1163                    t_fcdata gmx_unused* fcd,
1164                    int gmx_unused* global_atom_index)
1165 {
1166     int  i, m, ai, aj, ak, t1, t2, type;
1167     rvec f_i, f_j, f_k;
1168     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1169     ivec jt, dt_ij, dt_kj;
1170     rvec r_ij, r_kj, r_ik, dx;
1171
1172     L1   = 1 - lambda;
1173     vtot = 0.0;
1174     for (i = 0; (i < nbonds);)
1175     {
1176         type = forceatoms[i++];
1177         ai   = forceatoms[i++];
1178         aj   = forceatoms[i++];
1179         ak   = forceatoms[i++];
1180
1181         kA   = forceparams[type].linangle.klinA;
1182         kB   = forceparams[type].linangle.klinB;
1183         klin = L1 * kA + lambda * kB;
1184
1185         aA = forceparams[type].linangle.aA;
1186         aB = forceparams[type].linangle.aB;
1187         a  = L1 * aA + lambda * aB;
1188         b  = 1 - a;
1189
1190         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1191         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1192         rvec_sub(r_ij, r_kj, r_ik);
1193
1194         dr2 = 0;
1195         for (m = 0; (m < DIM); m++)
1196         {
1197             dr = -a * r_ij[m] - b * r_kj[m];
1198             dr2 += dr * dr;
1199             dx[m]  = dr;
1200             f_i[m] = a * klin * dr;
1201             f_k[m] = b * klin * dr;
1202             f_j[m] = -(f_i[m] + f_k[m]);
1203             f[ai][m] += f_i[m];
1204             f[aj][m] += f_j[m];
1205             f[ak][m] += f_k[m];
1206         }
1207         va = 0.5 * klin * dr2;
1208         *dvdlambda += 0.5 * (kB - kA) * dr2 + klin * (aB - aA) * iprod(dx, r_ik);
1209
1210         vtot += va;
1211
1212         if (computeVirial(flavor))
1213         {
1214             if (g)
1215             {
1216                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1217
1218                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1219                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1220                 t1 = IVEC2IS(dt_ij);
1221                 t2 = IVEC2IS(dt_kj);
1222             }
1223             rvec_inc(fshift[t1], f_i);
1224             rvec_inc(fshift[CENTRAL], f_j);
1225             rvec_inc(fshift[t2], f_k);
1226         }
1227     } /* 57 TOTAL       */
1228     return vtot;
1229 }
1230
1231 template<BondedKernelFlavor flavor>
1232 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1233 urey_bradley(int             nbonds,
1234              const t_iatom   forceatoms[],
1235              const t_iparams forceparams[],
1236              const rvec      x[],
1237              rvec4           f[],
1238              rvec            fshift[],
1239              const t_pbc*    pbc,
1240              const t_graph*  g,
1241              real            lambda,
1242              real*           dvdlambda,
1243              const t_mdatoms gmx_unused* md,
1244              t_fcdata gmx_unused* fcd,
1245              int gmx_unused* global_atom_index)
1246 {
1247     int  i, m, ai, aj, ak, t1, t2, type, ki;
1248     rvec r_ij, r_kj, r_ik;
1249     real cos_theta, cos_theta2, theta;
1250     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1251     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1252     ivec jt, dt_ij, dt_kj, dt_ik;
1253
1254     vtot = 0.0;
1255     for (i = 0; (i < nbonds);)
1256     {
1257         type = forceatoms[i++];
1258         ai   = forceatoms[i++];
1259         aj   = forceatoms[i++];
1260         ak   = forceatoms[i++];
1261         th0A = forceparams[type].u_b.thetaA * DEG2RAD;
1262         kthA = forceparams[type].u_b.kthetaA;
1263         r13A = forceparams[type].u_b.r13A;
1264         kUBA = forceparams[type].u_b.kUBA;
1265         th0B = forceparams[type].u_b.thetaB * DEG2RAD;
1266         kthB = forceparams[type].u_b.kthetaB;
1267         r13B = forceparams[type].u_b.r13B;
1268         kUBB = forceparams[type].u_b.kUBB;
1269
1270         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1271
1272         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1273         vtot += va;
1274
1275         ki  = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik); /*   3      */
1276         dr2 = iprod(r_ik, r_ik);                     /*   5             */
1277         dr  = dr2 * gmx::invsqrt(dr2);               /*  10             */
1278
1279         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1280
1281         cos_theta2 = gmx::square(cos_theta); /*   1             */
1282         if (cos_theta2 < 1)
1283         {
1284             real st, sth;
1285             real cik, cii, ckk;
1286             real nrkj2, nrij2;
1287             rvec f_i, f_j, f_k;
1288
1289             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1290             sth   = st * cos_theta;                      /*   1         */
1291             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1292             nrij2 = iprod(r_ij, r_ij);
1293
1294             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1295             cii = sth / nrij2;                      /*  10              */
1296             ckk = sth / nrkj2;                      /*  10              */
1297
1298             for (m = 0; (m < DIM); m++) /*  39          */
1299             {
1300                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1301                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1302                 f_j[m] = -f_i[m] - f_k[m];
1303                 f[ai][m] += f_i[m];
1304                 f[aj][m] += f_j[m];
1305                 f[ak][m] += f_k[m];
1306             }
1307             if (computeVirial(flavor))
1308             {
1309                 if (g)
1310                 {
1311                     copy_ivec(SHIFT_IVEC(g, aj), jt);
1312
1313                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1314                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1315                     t1 = IVEC2IS(dt_ij);
1316                     t2 = IVEC2IS(dt_kj);
1317                 }
1318                 rvec_inc(fshift[t1], f_i);
1319                 rvec_inc(fshift[CENTRAL], f_j);
1320                 rvec_inc(fshift[t2], f_k);
1321             }
1322         } /* 161 TOTAL  */
1323         /* Time for the bond calculations */
1324         if (dr2 == 0.0)
1325         {
1326             continue;
1327         }
1328
1329         vtot += vbond;              /* 1*/
1330         fbond *= gmx::invsqrt(dr2); /*   6              */
1331
1332         if (computeVirial(flavor) && g)
1333         {
1334             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), dt_ik);
1335             ki = IVEC2IS(dt_ik);
1336         }
1337         for (m = 0; (m < DIM); m++) /*  15              */
1338         {
1339             fik = fbond * r_ik[m];
1340             f[ai][m] += fik;
1341             f[ak][m] -= fik;
1342             if (computeVirial(flavor))
1343             {
1344                 fshift[ki][m] += fik;
1345                 fshift[CENTRAL][m] -= fik;
1346             }
1347         }
1348     }
1349     return vtot;
1350 }
1351
1352 #if GMX_SIMD_HAVE_REAL
1353
1354 /* As urey_bradley, but using SIMD to calculate many potentials at once.
1355  * This routines does not calculate energies and shift forces.
1356  */
1357 template<BondedKernelFlavor flavor>
1358 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1359 urey_bradley(int             nbonds,
1360              const t_iatom   forceatoms[],
1361              const t_iparams forceparams[],
1362              const rvec      x[],
1363              rvec4           f[],
1364              rvec gmx_unused fshift[],
1365              const t_pbc*    pbc,
1366              const t_graph gmx_unused* g,
1367              real gmx_unused lambda,
1368              real gmx_unused* dvdlambda,
1369              const t_mdatoms gmx_unused* md,
1370              t_fcdata gmx_unused* fcd,
1371              int gmx_unused* global_atom_index)
1372 {
1373     constexpr int                            nfa1 = 4;
1374     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1375     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1376     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1377     alignas(GMX_SIMD_ALIGNMENT) real         coeff[4 * GMX_SIMD_REAL_WIDTH];
1378     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1379
1380     set_pbc_simd(pbc, pbc_simd);
1381
1382     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1383     for (int i = 0; i < nbonds; i += GMX_SIMD_REAL_WIDTH * nfa1)
1384     {
1385         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1386          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1387          */
1388         int iu = i;
1389         for (int s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1390         {
1391             const int type = forceatoms[iu];
1392             ai[s]          = forceatoms[iu + 1];
1393             aj[s]          = forceatoms[iu + 2];
1394             ak[s]          = forceatoms[iu + 3];
1395
1396             /* At the end fill the arrays with the last atoms and 0 params */
1397             if (i + s * nfa1 < nbonds)
1398             {
1399                 coeff[s]                           = forceparams[type].u_b.kthetaA;
1400                 coeff[GMX_SIMD_REAL_WIDTH + s]     = forceparams[type].u_b.thetaA;
1401                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = forceparams[type].u_b.kUBA;
1402                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = forceparams[type].u_b.r13A;
1403
1404                 if (iu + nfa1 < nbonds)
1405                 {
1406                     iu += nfa1;
1407                 }
1408             }
1409             else
1410             {
1411                 coeff[s]                           = 0;
1412                 coeff[GMX_SIMD_REAL_WIDTH + s]     = 0;
1413                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = 0;
1414                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = 0;
1415             }
1416         }
1417
1418         SimdReal xi_S, yi_S, zi_S;
1419         SimdReal xj_S, yj_S, zj_S;
1420         SimdReal xk_S, yk_S, zk_S;
1421
1422         /* Store the non PBC corrected distances packed and aligned */
1423         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1424         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1425         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1426         SimdReal rijx_S = xi_S - xj_S;
1427         SimdReal rijy_S = yi_S - yj_S;
1428         SimdReal rijz_S = zi_S - zj_S;
1429         SimdReal rkjx_S = xk_S - xj_S;
1430         SimdReal rkjy_S = yk_S - yj_S;
1431         SimdReal rkjz_S = zk_S - zj_S;
1432         SimdReal rikx_S = xi_S - xk_S;
1433         SimdReal riky_S = yi_S - yk_S;
1434         SimdReal rikz_S = zi_S - zk_S;
1435
1436         const SimdReal ktheta_S = load<SimdReal>(coeff);
1437         const SimdReal theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * DEG2RAD;
1438         const SimdReal kUB_S    = load<SimdReal>(coeff + 2 * GMX_SIMD_REAL_WIDTH);
1439         const SimdReal r13_S    = load<SimdReal>(coeff + 3 * GMX_SIMD_REAL_WIDTH);
1440
1441         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1442         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1443         pbc_correct_dx_simd(&rikx_S, &riky_S, &rikz_S, pbc_simd);
1444
1445         const SimdReal rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1446
1447         const SimdReal dr2_S = iprod(rikx_S, riky_S, rikz_S, rikx_S, riky_S, rikz_S);
1448
1449         const SimdReal nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1450         const SimdReal nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1451
1452         const SimdReal nrij_1_S = invsqrt(nrij2_S);
1453         const SimdReal nrkj_1_S = invsqrt(nrkj2_S);
1454         const SimdReal invdr2_S = invsqrt(dr2_S);
1455         const SimdReal dr_S     = dr2_S * invdr2_S;
1456
1457         constexpr real min_one_plus_eps = -1.0 + 2.0 * GMX_REAL_EPS; // Smallest number > -1
1458
1459         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1460          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1461          * This also ensures that rounding errors would cause the argument
1462          * of simdAcos to be < -1.
1463          * Note that we do not take precautions for cos(0)=1, so the bonds
1464          * in an angle should not align at an angle of 0 degrees.
1465          */
1466         const SimdReal cos_S = max(rij_rkj_S * nrij_1_S * nrkj_1_S, min_one_plus_eps);
1467
1468         const SimdReal theta_S  = acos(cos_S);
1469         const SimdReal invsin_S = invsqrt(1.0 - cos_S * cos_S);
1470         const SimdReal st_S     = ktheta_S * (theta0_S - theta_S) * invsin_S;
1471         const SimdReal sth_S    = st_S * cos_S;
1472
1473         const SimdReal cik_S = st_S * nrij_1_S * nrkj_1_S;
1474         const SimdReal cii_S = sth_S * nrij_1_S * nrij_1_S;
1475         const SimdReal ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1476
1477         const SimdReal sUB_S = kUB_S * (r13_S - dr_S) * invdr2_S;
1478
1479         const SimdReal f_ikx_S = sUB_S * rikx_S;
1480         const SimdReal f_iky_S = sUB_S * riky_S;
1481         const SimdReal f_ikz_S = sUB_S * rikz_S;
1482
1483         const SimdReal f_ix_S = fnma(cik_S, rkjx_S, cii_S * rijx_S) + f_ikx_S;
1484         const SimdReal f_iy_S = fnma(cik_S, rkjy_S, cii_S * rijy_S) + f_iky_S;
1485         const SimdReal f_iz_S = fnma(cik_S, rkjz_S, cii_S * rijz_S) + f_ikz_S;
1486         const SimdReal f_kx_S = fnma(cik_S, rijx_S, ckk_S * rkjx_S) - f_ikx_S;
1487         const SimdReal f_ky_S = fnma(cik_S, rijy_S, ckk_S * rkjy_S) - f_iky_S;
1488         const SimdReal f_kz_S = fnma(cik_S, rijz_S, ckk_S * rkjz_S) - f_ikz_S;
1489
1490         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1491         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S,
1492                                  f_iz_S + f_kz_S);
1493         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1494     }
1495
1496     return 0;
1497 }
1498
1499 #endif // GMX_SIMD_HAVE_REAL
1500
1501 template<BondedKernelFlavor flavor>
1502 real quartic_angles(int             nbonds,
1503                     const t_iatom   forceatoms[],
1504                     const t_iparams forceparams[],
1505                     const rvec      x[],
1506                     rvec4           f[],
1507                     rvec            fshift[],
1508                     const t_pbc*    pbc,
1509                     const t_graph*  g,
1510                     real gmx_unused lambda,
1511                     real gmx_unused* dvdlambda,
1512                     const t_mdatoms gmx_unused* md,
1513                     t_fcdata gmx_unused* fcd,
1514                     int gmx_unused* global_atom_index)
1515 {
1516     int  i, j, ai, aj, ak, t1, t2, type;
1517     rvec r_ij, r_kj;
1518     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1519     ivec jt, dt_ij, dt_kj;
1520
1521     vtot = 0.0;
1522     for (i = 0; (i < nbonds);)
1523     {
1524         type = forceatoms[i++];
1525         ai   = forceatoms[i++];
1526         aj   = forceatoms[i++];
1527         ak   = forceatoms[i++];
1528
1529         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1530
1531         dt = theta - forceparams[type].qangle.theta * DEG2RAD; /* 2          */
1532
1533         dVdt = 0;
1534         va   = forceparams[type].qangle.c[0];
1535         dtp  = 1.0;
1536         for (j = 1; j <= 4; j++)
1537         {
1538             c = forceparams[type].qangle.c[j];
1539             dVdt -= j * c * dtp;
1540             dtp *= dt;
1541             va += c * dtp;
1542         }
1543         /* 20 */
1544
1545         vtot += va;
1546
1547         cos_theta2 = gmx::square(cos_theta); /*   1             */
1548         if (cos_theta2 < 1)
1549         {
1550             int  m;
1551             real st, sth;
1552             real cik, cii, ckk;
1553             real nrkj2, nrij2;
1554             rvec f_i, f_j, f_k;
1555
1556             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1557             sth   = st * cos_theta;                      /*   1         */
1558             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1559             nrij2 = iprod(r_ij, r_ij);
1560
1561             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1562             cii = sth / nrij2;                      /*  10              */
1563             ckk = sth / nrkj2;                      /*  10              */
1564
1565             for (m = 0; (m < DIM); m++) /*  39          */
1566             {
1567                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1568                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1569                 f_j[m] = -f_i[m] - f_k[m];
1570                 f[ai][m] += f_i[m];
1571                 f[aj][m] += f_j[m];
1572                 f[ak][m] += f_k[m];
1573             }
1574
1575             if (computeVirial(flavor))
1576             {
1577                 if (g)
1578                 {
1579                     copy_ivec(SHIFT_IVEC(g, aj), jt);
1580
1581                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1582                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1583                     t1 = IVEC2IS(dt_ij);
1584                     t2 = IVEC2IS(dt_kj);
1585                 }
1586                 rvec_inc(fshift[t1], f_i);
1587                 rvec_inc(fshift[CENTRAL], f_j);
1588                 rvec_inc(fshift[t2], f_k);
1589             }
1590         } /* 153 TOTAL  */
1591     }
1592     return vtot;
1593 }
1594
1595
1596 #if GMX_SIMD_HAVE_REAL
1597
1598 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1599  * also calculates the pre-factor required for the dihedral force update.
1600  * Note that bv and buf should be register aligned.
1601  */
1602 inline void dih_angle_simd(const rvec* x,
1603                            const int*  ai,
1604                            const int*  aj,
1605                            const int*  ak,
1606                            const int*  al,
1607                            const real* pbc_simd,
1608                            SimdReal*   phi_S,
1609                            SimdReal*   mx_S,
1610                            SimdReal*   my_S,
1611                            SimdReal*   mz_S,
1612                            SimdReal*   nx_S,
1613                            SimdReal*   ny_S,
1614                            SimdReal*   nz_S,
1615                            SimdReal*   nrkj_m2_S,
1616                            SimdReal*   nrkj_n2_S,
1617                            SimdReal*   p_S,
1618                            SimdReal*   q_S)
1619 {
1620     SimdReal xi_S, yi_S, zi_S;
1621     SimdReal xj_S, yj_S, zj_S;
1622     SimdReal xk_S, yk_S, zk_S;
1623     SimdReal xl_S, yl_S, zl_S;
1624     SimdReal rijx_S, rijy_S, rijz_S;
1625     SimdReal rkjx_S, rkjy_S, rkjz_S;
1626     SimdReal rklx_S, rkly_S, rklz_S;
1627     SimdReal cx_S, cy_S, cz_S;
1628     SimdReal cn_S;
1629     SimdReal s_S;
1630     SimdReal ipr_S;
1631     SimdReal iprm_S, iprn_S;
1632     SimdReal nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1633     SimdReal toler_S;
1634     SimdReal nrkj2_min_S;
1635     SimdReal real_eps_S;
1636
1637     /* Used to avoid division by zero.
1638      * We take into acount that we multiply the result by real_eps_S.
1639      */
1640     nrkj2_min_S = SimdReal(GMX_REAL_MIN / (2 * GMX_REAL_EPS));
1641
1642     /* The value of the last significant bit (GMX_REAL_EPS is half of that) */
1643     real_eps_S = SimdReal(2 * GMX_REAL_EPS);
1644
1645     /* Store the non PBC corrected distances packed and aligned */
1646     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1647     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1648     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1649     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), al, &xl_S, &yl_S, &zl_S);
1650     rijx_S = xi_S - xj_S;
1651     rijy_S = yi_S - yj_S;
1652     rijz_S = zi_S - zj_S;
1653     rkjx_S = xk_S - xj_S;
1654     rkjy_S = yk_S - yj_S;
1655     rkjz_S = zk_S - zj_S;
1656     rklx_S = xk_S - xl_S;
1657     rkly_S = yk_S - yl_S;
1658     rklz_S = zk_S - zl_S;
1659
1660     pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1661     pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1662     pbc_correct_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc_simd);
1663
1664     cprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S, mx_S, my_S, mz_S);
1665
1666     cprod(rkjx_S, rkjy_S, rkjz_S, rklx_S, rkly_S, rklz_S, nx_S, ny_S, nz_S);
1667
1668     cprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S, &cx_S, &cy_S, &cz_S);
1669
1670     cn_S = sqrt(norm2(cx_S, cy_S, cz_S));
1671
1672     s_S = iprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1673
1674     /* Determine the dihedral angle, the sign might need correction */
1675     *phi_S = atan2(cn_S, s_S);
1676
1677     ipr_S = iprod(rijx_S, rijy_S, rijz_S, *nx_S, *ny_S, *nz_S);
1678
1679     iprm_S = norm2(*mx_S, *my_S, *mz_S);
1680     iprn_S = norm2(*nx_S, *ny_S, *nz_S);
1681
1682     nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1683
1684     /* Avoid division by zero. When zero, the result is multiplied by 0
1685      * anyhow, so the 3 max below do not affect the final result.
1686      */
1687     nrkj2_S  = max(nrkj2_S, nrkj2_min_S);
1688     nrkj_1_S = invsqrt(nrkj2_S);
1689     nrkj_2_S = nrkj_1_S * nrkj_1_S;
1690     nrkj_S   = nrkj2_S * nrkj_1_S;
1691
1692     toler_S = nrkj2_S * real_eps_S;
1693
1694     /* Here the plain-C code uses a conditional, but we can't do that in SIMD.
1695      * So we take a max with the tolerance instead. Since we multiply with
1696      * m or n later, the max does not affect the results.
1697      */
1698     iprm_S     = max(iprm_S, toler_S);
1699     iprn_S     = max(iprn_S, toler_S);
1700     *nrkj_m2_S = nrkj_S * inv(iprm_S);
1701     *nrkj_n2_S = nrkj_S * inv(iprn_S);
1702
1703     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1704     *phi_S = copysign(*phi_S, ipr_S);
1705     *p_S   = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1706     *p_S   = *p_S * nrkj_2_S;
1707
1708     *q_S = iprod(rklx_S, rkly_S, rklz_S, rkjx_S, rkjy_S, rkjz_S);
1709     *q_S = *q_S * nrkj_2_S;
1710 }
1711
1712 #endif // GMX_SIMD_HAVE_REAL
1713
1714 } // namespace
1715
1716 template<BondedKernelFlavor flavor>
1717 void do_dih_fup(int            i,
1718                 int            j,
1719                 int            k,
1720                 int            l,
1721                 real           ddphi,
1722                 rvec           r_ij,
1723                 rvec           r_kj,
1724                 rvec           r_kl,
1725                 rvec           m,
1726                 rvec           n,
1727                 rvec4          f[],
1728                 rvec           fshift[],
1729                 const t_pbc*   pbc,
1730                 const t_graph* g,
1731                 const rvec     x[],
1732                 int            t1,
1733                 int            t2,
1734                 int            t3)
1735 {
1736     /* 143 FLOPS */
1737     rvec f_i, f_j, f_k, f_l;
1738     rvec uvec, vvec, svec, dx_jl;
1739     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1740     real a, b, p, q, toler;
1741     ivec jt, dt_ij, dt_kj, dt_lj;
1742
1743     iprm  = iprod(m, m);       /*  5    */
1744     iprn  = iprod(n, n);       /*  5    */
1745     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1746     toler = nrkj2 * GMX_REAL_EPS;
1747     if ((iprm > toler) && (iprn > toler))
1748     {
1749         nrkj_1 = gmx::invsqrt(nrkj2);  /* 10    */
1750         nrkj_2 = nrkj_1 * nrkj_1;      /*  1    */
1751         nrkj   = nrkj2 * nrkj_1;       /*  1    */
1752         a      = -ddphi * nrkj / iprm; /* 11    */
1753         svmul(a, m, f_i);              /*  3    */
1754         b = ddphi * nrkj / iprn;       /* 11    */
1755         svmul(b, n, f_l);              /*  3  */
1756         p = iprod(r_ij, r_kj);         /*  5    */
1757         p *= nrkj_2;                   /*  1    */
1758         q = iprod(r_kl, r_kj);         /*  5    */
1759         q *= nrkj_2;                   /*  1    */
1760         svmul(p, f_i, uvec);           /*  3    */
1761         svmul(q, f_l, vvec);           /*  3    */
1762         rvec_sub(uvec, vvec, svec);    /*  3    */
1763         rvec_sub(f_i, svec, f_j);      /*  3    */
1764         rvec_add(f_l, svec, f_k);      /*  3    */
1765         rvec_inc(f[i], f_i);           /*  3    */
1766         rvec_dec(f[j], f_j);           /*  3    */
1767         rvec_dec(f[k], f_k);           /*  3    */
1768         rvec_inc(f[l], f_l);           /*  3    */
1769
1770         if (computeVirial(flavor))
1771         {
1772             if (g)
1773             {
1774                 copy_ivec(SHIFT_IVEC(g, j), jt);
1775                 ivec_sub(SHIFT_IVEC(g, i), jt, dt_ij);
1776                 ivec_sub(SHIFT_IVEC(g, k), jt, dt_kj);
1777                 ivec_sub(SHIFT_IVEC(g, l), jt, dt_lj);
1778                 t1 = IVEC2IS(dt_ij);
1779                 t2 = IVEC2IS(dt_kj);
1780                 t3 = IVEC2IS(dt_lj);
1781             }
1782             else if (pbc)
1783             {
1784                 t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1785             }
1786             else
1787             {
1788                 t3 = CENTRAL;
1789             }
1790
1791             rvec_inc(fshift[t1], f_i);
1792             rvec_dec(fshift[CENTRAL], f_j);
1793             rvec_dec(fshift[t2], f_k);
1794             rvec_inc(fshift[t3], f_l);
1795         }
1796     }
1797     /* 112 TOTAL    */
1798 }
1799
1800 namespace
1801 {
1802
1803 #if GMX_SIMD_HAVE_REAL
1804 /* As do_dih_fup_noshiftf above, but with SIMD and pre-calculated pre-factors */
1805 inline void gmx_simdcall do_dih_fup_noshiftf_simd(const int* ai,
1806                                                   const int* aj,
1807                                                   const int* ak,
1808                                                   const int* al,
1809                                                   SimdReal   p,
1810                                                   SimdReal   q,
1811                                                   SimdReal   f_i_x,
1812                                                   SimdReal   f_i_y,
1813                                                   SimdReal   f_i_z,
1814                                                   SimdReal   mf_l_x,
1815                                                   SimdReal   mf_l_y,
1816                                                   SimdReal   mf_l_z,
1817                                                   rvec4      f[])
1818 {
1819     SimdReal sx    = p * f_i_x + q * mf_l_x;
1820     SimdReal sy    = p * f_i_y + q * mf_l_y;
1821     SimdReal sz    = p * f_i_z + q * mf_l_z;
1822     SimdReal f_j_x = f_i_x - sx;
1823     SimdReal f_j_y = f_i_y - sy;
1824     SimdReal f_j_z = f_i_z - sz;
1825     SimdReal f_k_x = mf_l_x - sx;
1826     SimdReal f_k_y = mf_l_y - sy;
1827     SimdReal f_k_z = mf_l_z - sz;
1828     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_i_x, f_i_y, f_i_z);
1829     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_j_x, f_j_y, f_j_z);
1830     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_k_x, f_k_y, f_k_z);
1831     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), al, mf_l_x, mf_l_y, mf_l_z);
1832 }
1833 #endif // GMX_SIMD_HAVE_REAL
1834
1835 /*! \brief Computes and returns the proper dihedral force
1836  *
1837  * With the appropriate kernel flavor, also computes and accumulates
1838  * the energy and dV/dlambda.
1839  */
1840 template<BondedKernelFlavor flavor>
1841 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* dvdlambda)
1842 {
1843     const real L1   = 1.0 - lambda;
1844     const real ph0  = (L1 * phiA + lambda * phiB) * DEG2RAD;
1845     const real dph0 = (phiB - phiA) * DEG2RAD;
1846     const real cp   = L1 * cpA + lambda * cpB;
1847
1848     const real mdphi = mult * phi - ph0;
1849     const real sdphi = std::sin(mdphi);
1850     const real ddphi = -cp * mult * sdphi;
1851     if (computeEnergy(flavor))
1852     {
1853         const real v1 = 1 + std::cos(mdphi);
1854         *V += cp * v1;
1855
1856         *dvdlambda += (cpB - cpA) * v1 + cp * dph0 * sdphi;
1857     }
1858
1859     return ddphi;
1860     /* That was 40 flops */
1861 }
1862
1863 /*! \brief Similar to \p dopdihs(), except for a minus sign and a different treatment of mult/phi0 */
1864 real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* F)
1865 {
1866     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1867     real L1   = 1.0 - lambda;
1868     real ph0  = (L1 * phiA + lambda * phiB) * DEG2RAD;
1869     real dph0 = (phiB - phiA) * DEG2RAD;
1870     real cp   = L1 * cpA + lambda * cpB;
1871
1872     mdphi = mult * (phi - ph0);
1873     sdphi = std::sin(mdphi);
1874     ddphi = cp * mult * sdphi;
1875     v1    = 1.0 - std::cos(mdphi);
1876     v     = cp * v1;
1877
1878     dvdlambda = (cpB - cpA) * v1 + cp * dph0 * sdphi;
1879
1880     *V = v;
1881     *F = ddphi;
1882
1883     return dvdlambda;
1884
1885     /* That was 40 flops */
1886 }
1887
1888 template<BondedKernelFlavor flavor>
1889 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1890 pdihs(int             nbonds,
1891       const t_iatom   forceatoms[],
1892       const t_iparams forceparams[],
1893       const rvec      x[],
1894       rvec4           f[],
1895       rvec            fshift[],
1896       const t_pbc*    pbc,
1897       const t_graph*  g,
1898       real            lambda,
1899       real*           dvdlambda,
1900       const t_mdatoms gmx_unused* md,
1901       t_fcdata gmx_unused* fcd,
1902       int gmx_unused* global_atom_index)
1903 {
1904     int  t1, t2, t3;
1905     rvec r_ij, r_kj, r_kl, m, n;
1906
1907     real vtot = 0.0;
1908
1909     for (int i = 0; i < nbonds;)
1910     {
1911         const int ai = forceatoms[i + 1];
1912         const int aj = forceatoms[i + 2];
1913         const int ak = forceatoms[i + 3];
1914         const int al = forceatoms[i + 4];
1915
1916         const real phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1,
1917                                    &t2, &t3); /*  84      */
1918
1919         /* Loop over dihedrals working on the same atoms,
1920          * so we avoid recalculating angles and distributing forces.
1921          */
1922         real ddphi_tot = 0;
1923         do
1924         {
1925             const int type = forceatoms[i];
1926             ddphi_tot += dopdihs<flavor>(forceparams[type].pdihs.cpA, forceparams[type].pdihs.cpB,
1927                                          forceparams[type].pdihs.phiA, forceparams[type].pdihs.phiB,
1928                                          forceparams[type].pdihs.mult, phi, lambda, &vtot, dvdlambda);
1929
1930             i += 5;
1931         } while (i < nbonds && forceatoms[i + 1] == ai && forceatoms[i + 2] == aj
1932                  && forceatoms[i + 3] == ak && forceatoms[i + 4] == al);
1933
1934         do_dih_fup<flavor>(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x,
1935                            t1, t2, t3); /* 112          */
1936     }                                   /* 223 TOTAL  */
1937
1938     return vtot;
1939 }
1940
1941 #if GMX_SIMD_HAVE_REAL
1942
1943 /* As pdihs above, but using SIMD to calculate multiple dihedrals at once */
1944 template<BondedKernelFlavor flavor>
1945 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1946 pdihs(int             nbonds,
1947       const t_iatom   forceatoms[],
1948       const t_iparams forceparams[],
1949       const rvec      x[],
1950       rvec4           f[],
1951       rvec gmx_unused fshift[],
1952       const t_pbc*    pbc,
1953       const t_graph gmx_unused* g,
1954       real gmx_unused lambda,
1955       real gmx_unused* dvdlambda,
1956       const t_mdatoms gmx_unused* md,
1957       t_fcdata gmx_unused* fcd,
1958       int gmx_unused* global_atom_index)
1959 {
1960     const int                                nfa1 = 5;
1961     int                                      i, iu, s;
1962     int                                      type;
1963     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1964     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1965     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1966     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
1967     alignas(GMX_SIMD_ALIGNMENT) real         buf[3 * GMX_SIMD_REAL_WIDTH];
1968     real *                                   cp, *phi0, *mult;
1969     SimdReal                                 deg2rad_S(DEG2RAD);
1970     SimdReal                                 p_S, q_S;
1971     SimdReal                                 phi0_S, phi_S;
1972     SimdReal                                 mx_S, my_S, mz_S;
1973     SimdReal                                 nx_S, ny_S, nz_S;
1974     SimdReal                                 nrkj_m2_S, nrkj_n2_S;
1975     SimdReal                                 cp_S, mdphi_S, mult_S;
1976     SimdReal                                 sin_S, cos_S;
1977     SimdReal                                 mddphi_S;
1978     SimdReal                                 sf_i_S, msf_l_S;
1979     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1980
1981     /* Extract aligned pointer for parameters and variables */
1982     cp   = buf + 0 * GMX_SIMD_REAL_WIDTH;
1983     phi0 = buf + 1 * GMX_SIMD_REAL_WIDTH;
1984     mult = buf + 2 * GMX_SIMD_REAL_WIDTH;
1985
1986     set_pbc_simd(pbc, pbc_simd);
1987
1988     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
1989     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
1990     {
1991         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
1992          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1993          */
1994         iu = i;
1995         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1996         {
1997             type  = forceatoms[iu];
1998             ai[s] = forceatoms[iu + 1];
1999             aj[s] = forceatoms[iu + 2];
2000             ak[s] = forceatoms[iu + 3];
2001             al[s] = forceatoms[iu + 4];
2002
2003             /* At the end fill the arrays with the last atoms and 0 params */
2004             if (i + s * nfa1 < nbonds)
2005             {
2006                 cp[s]   = forceparams[type].pdihs.cpA;
2007                 phi0[s] = forceparams[type].pdihs.phiA;
2008                 mult[s] = forceparams[type].pdihs.mult;
2009
2010                 if (iu + nfa1 < nbonds)
2011                 {
2012                     iu += nfa1;
2013                 }
2014             }
2015             else
2016             {
2017                 cp[s]   = 0;
2018                 phi0[s] = 0;
2019                 mult[s] = 0;
2020             }
2021         }
2022
2023         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2024         dih_angle_simd(x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S,
2025                        &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
2026
2027         cp_S   = load<SimdReal>(cp);
2028         phi0_S = load<SimdReal>(phi0) * deg2rad_S;
2029         mult_S = load<SimdReal>(mult);
2030
2031         mdphi_S = fms(mult_S, phi_S, phi0_S);
2032
2033         /* Calculate GMX_SIMD_REAL_WIDTH sines at once */
2034         sincos(mdphi_S, &sin_S, &cos_S);
2035         mddphi_S = cp_S * mult_S * sin_S;
2036         sf_i_S   = mddphi_S * nrkj_m2_S;
2037         msf_l_S  = mddphi_S * nrkj_n2_S;
2038
2039         /* After this m?_S will contain f[i] */
2040         mx_S = sf_i_S * mx_S;
2041         my_S = sf_i_S * my_S;
2042         mz_S = sf_i_S * mz_S;
2043
2044         /* After this m?_S will contain -f[l] */
2045         nx_S = msf_l_S * nx_S;
2046         ny_S = msf_l_S * ny_S;
2047         nz_S = msf_l_S * nz_S;
2048
2049         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
2050     }
2051
2052     return 0;
2053 }
2054
2055 /* This is mostly a copy of the SIMD flavor of pdihs above, but with using
2056  * the RB potential instead of a harmonic potential.
2057  * This function can replace rbdihs() when no energy and virial are needed.
2058  */
2059 template<BondedKernelFlavor flavor>
2060 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
2061 rbdihs(int             nbonds,
2062        const t_iatom   forceatoms[],
2063        const t_iparams forceparams[],
2064        const rvec      x[],
2065        rvec4           f[],
2066        rvec gmx_unused fshift[],
2067        const t_pbc*    pbc,
2068        const t_graph gmx_unused* g,
2069        real gmx_unused lambda,
2070        real gmx_unused* dvdlambda,
2071        const t_mdatoms gmx_unused* md,
2072        t_fcdata gmx_unused* fcd,
2073        int gmx_unused* global_atom_index)
2074 {
2075     const int                                nfa1 = 5;
2076     int                                      i, iu, s, j;
2077     int                                      type;
2078     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
2079     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
2080     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
2081     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
2082     alignas(GMX_SIMD_ALIGNMENT) real         parm[NR_RBDIHS * GMX_SIMD_REAL_WIDTH];
2083
2084     SimdReal                         p_S, q_S;
2085     SimdReal                         phi_S;
2086     SimdReal                         ddphi_S, cosfac_S;
2087     SimdReal                         mx_S, my_S, mz_S;
2088     SimdReal                         nx_S, ny_S, nz_S;
2089     SimdReal                         nrkj_m2_S, nrkj_n2_S;
2090     SimdReal                         parm_S, c_S;
2091     SimdReal                         sin_S, cos_S;
2092     SimdReal                         sf_i_S, msf_l_S;
2093     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
2094
2095     SimdReal pi_S(M_PI);
2096     SimdReal one_S(1.0);
2097
2098     set_pbc_simd(pbc, pbc_simd);
2099
2100     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2101     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
2102     {
2103         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2104          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2105          */
2106         iu = i;
2107         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2108         {
2109             type  = forceatoms[iu];
2110             ai[s] = forceatoms[iu + 1];
2111             aj[s] = forceatoms[iu + 2];
2112             ak[s] = forceatoms[iu + 3];
2113             al[s] = forceatoms[iu + 4];
2114
2115             /* At the end fill the arrays with the last atoms and 0 params */
2116             if (i + s * nfa1 < nbonds)
2117             {
2118                 /* We don't need the first parameter, since that's a constant
2119                  * which only affects the energies, not the forces.
2120                  */
2121                 for (j = 1; j < NR_RBDIHS; j++)
2122                 {
2123                     parm[j * GMX_SIMD_REAL_WIDTH + s] = forceparams[type].rbdihs.rbcA[j];
2124                 }
2125
2126                 if (iu + nfa1 < nbonds)
2127                 {
2128                     iu += nfa1;
2129                 }
2130             }
2131             else
2132             {
2133                 for (j = 1; j < NR_RBDIHS; j++)
2134                 {
2135                     parm[j * GMX_SIMD_REAL_WIDTH + s] = 0;
2136                 }
2137             }
2138         }
2139
2140         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2141         dih_angle_simd(x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S,
2142                        &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
2143
2144         /* Change to polymer convention */
2145         phi_S = phi_S - pi_S;
2146
2147         sincos(phi_S, &sin_S, &cos_S);
2148
2149         ddphi_S  = setZero();
2150         c_S      = one_S;
2151         cosfac_S = one_S;
2152         for (j = 1; j < NR_RBDIHS; j++)
2153         {
2154             parm_S   = load<SimdReal>(parm + j * GMX_SIMD_REAL_WIDTH);
2155             ddphi_S  = fma(c_S * parm_S, cosfac_S, ddphi_S);
2156             cosfac_S = cosfac_S * cos_S;
2157             c_S      = c_S + one_S;
2158         }
2159
2160         /* Note that here we do not use the minus sign which is present
2161          * in the normal RB code. This is corrected for through (m)sf below.
2162          */
2163         ddphi_S = ddphi_S * sin_S;
2164
2165         sf_i_S  = ddphi_S * nrkj_m2_S;
2166         msf_l_S = ddphi_S * nrkj_n2_S;
2167
2168         /* After this m?_S will contain f[i] */
2169         mx_S = sf_i_S * mx_S;
2170         my_S = sf_i_S * my_S;
2171         mz_S = sf_i_S * mz_S;
2172
2173         /* After this m?_S will contain -f[l] */
2174         nx_S = msf_l_S * nx_S;
2175         ny_S = msf_l_S * ny_S;
2176         nz_S = msf_l_S * nz_S;
2177
2178         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
2179     }
2180
2181     return 0;
2182 }
2183
2184 #endif // GMX_SIMD_HAVE_REAL
2185
2186
2187 template<BondedKernelFlavor flavor>
2188 real idihs(int             nbonds,
2189            const t_iatom   forceatoms[],
2190            const t_iparams forceparams[],
2191            const rvec      x[],
2192            rvec4           f[],
2193            rvec            fshift[],
2194            const t_pbc*    pbc,
2195            const t_graph*  g,
2196            real            lambda,
2197            real*           dvdlambda,
2198            const t_mdatoms gmx_unused* md,
2199            t_fcdata gmx_unused* fcd,
2200            int gmx_unused* global_atom_index)
2201 {
2202     int  i, type, ai, aj, ak, al;
2203     int  t1, t2, t3;
2204     real phi, phi0, dphi0, ddphi, vtot;
2205     rvec r_ij, r_kj, r_kl, m, n;
2206     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2207
2208     L1        = 1.0 - lambda;
2209     dvdl_term = 0;
2210     vtot      = 0.0;
2211     for (i = 0; (i < nbonds);)
2212     {
2213         type = forceatoms[i++];
2214         ai   = forceatoms[i++];
2215         aj   = forceatoms[i++];
2216         ak   = forceatoms[i++];
2217         al   = forceatoms[i++];
2218
2219         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2220
2221         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2222          * force changes if we just apply a normal harmonic.
2223          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2224          * This means we will never have the periodicity problem, unless
2225          * the dihedral is Pi away from phiO, which is very unlikely due to
2226          * the potential.
2227          */
2228         kA = forceparams[type].harmonic.krA;
2229         kB = forceparams[type].harmonic.krB;
2230         pA = forceparams[type].harmonic.rA;
2231         pB = forceparams[type].harmonic.rB;
2232
2233         kk    = L1 * kA + lambda * kB;
2234         phi0  = (L1 * pA + lambda * pB) * DEG2RAD;
2235         dphi0 = (pB - pA) * DEG2RAD;
2236
2237         dp = phi - phi0;
2238
2239         make_dp_periodic(&dp);
2240
2241         dp2 = dp * dp;
2242
2243         vtot += 0.5 * kk * dp2;
2244         ddphi = -kk * dp;
2245
2246         dvdl_term += 0.5 * (kB - kA) * dp2 - kk * dphi0 * dp;
2247
2248         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x, t1,
2249                            t2, t3); /* 112              */
2250         /* 218 TOTAL    */
2251     }
2252
2253     *dvdlambda += dvdl_term;
2254     return vtot;
2255 }
2256
2257 /*! \brief Computes angle restraints of two different types */
2258 template<BondedKernelFlavor flavor>
2259 real low_angres(int             nbonds,
2260                 const t_iatom   forceatoms[],
2261                 const t_iparams forceparams[],
2262                 const rvec      x[],
2263                 rvec4           f[],
2264                 rvec            fshift[],
2265                 const t_pbc*    pbc,
2266                 const t_graph*  g,
2267                 real            lambda,
2268                 real*           dvdlambda,
2269                 gmx_bool        bZAxis)
2270 {
2271     int  i, m, type, ai, aj, ak, al;
2272     int  t1, t2;
2273     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2274     rvec r_ij, r_kl, f_i, f_k = { 0, 0, 0 };
2275     real st, sth, nrij2, nrkl2, c, cij, ckl;
2276
2277     ivec dt;
2278     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2279
2280     vtot = 0.0;
2281     ak = al = 0; /* to avoid warnings */
2282     for (i = 0; i < nbonds;)
2283     {
2284         type = forceatoms[i++];
2285         ai   = forceatoms[i++];
2286         aj   = forceatoms[i++];
2287         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij); /*  3             */
2288         if (!bZAxis)
2289         {
2290             ak = forceatoms[i++];
2291             al = forceatoms[i++];
2292             t2 = pbc_rvec_sub(pbc, x[al], x[ak], r_kl); /*  3           */
2293         }
2294         else
2295         {
2296             r_kl[XX] = 0;
2297             r_kl[YY] = 0;
2298             r_kl[ZZ] = 1;
2299         }
2300
2301         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2302         phi     = std::acos(cos_phi);    /* 10           */
2303
2304         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA, forceparams[type].pdihs.cpB,
2305                                   forceparams[type].pdihs.phiA, forceparams[type].pdihs.phiB,
2306                                   forceparams[type].pdihs.mult, phi, lambda, &vid, &dVdphi); /*  40 */
2307
2308         vtot += vid;
2309
2310         cos_phi2 = gmx::square(cos_phi); /*   1         */
2311         if (cos_phi2 < 1)
2312         {
2313             st    = -dVdphi * gmx::invsqrt(1 - cos_phi2); /*  12                */
2314             sth   = st * cos_phi;                         /*   1                */
2315             nrij2 = iprod(r_ij, r_ij);                    /*   5                */
2316             nrkl2 = iprod(r_kl, r_kl);                    /*   5          */
2317
2318             c   = st * gmx::invsqrt(nrij2 * nrkl2); /*  11              */
2319             cij = sth / nrij2;                      /*  10              */
2320             ckl = sth / nrkl2;                      /*  10              */
2321
2322             for (m = 0; m < DIM; m++) /*  18+18       */
2323             {
2324                 f_i[m] = (c * r_kl[m] - cij * r_ij[m]);
2325                 f[ai][m] += f_i[m];
2326                 f[aj][m] -= f_i[m];
2327                 if (!bZAxis)
2328                 {
2329                     f_k[m] = (c * r_ij[m] - ckl * r_kl[m]);
2330                     f[ak][m] += f_k[m];
2331                     f[al][m] -= f_k[m];
2332                 }
2333             }
2334
2335             if (computeVirial(flavor))
2336             {
2337                 if (g)
2338                 {
2339                     ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
2340                     t1 = IVEC2IS(dt);
2341                 }
2342                 rvec_inc(fshift[t1], f_i);
2343                 rvec_dec(fshift[CENTRAL], f_i);
2344                 if (!bZAxis)
2345                 {
2346                     if (g)
2347                     {
2348                         ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, al), dt);
2349                         t2 = IVEC2IS(dt);
2350                     }
2351                     rvec_inc(fshift[t2], f_k);
2352                     rvec_dec(fshift[CENTRAL], f_k);
2353                 }
2354             }
2355         }
2356     }
2357
2358     return vtot; /*  184 / 157 (bZAxis)  total  */
2359 }
2360
2361 template<BondedKernelFlavor flavor>
2362 real angres(int             nbonds,
2363             const t_iatom   forceatoms[],
2364             const t_iparams forceparams[],
2365             const rvec      x[],
2366             rvec4           f[],
2367             rvec            fshift[],
2368             const t_pbc*    pbc,
2369             const t_graph*  g,
2370             real            lambda,
2371             real*           dvdlambda,
2372             const t_mdatoms gmx_unused* md,
2373             t_fcdata gmx_unused* fcd,
2374             int gmx_unused* global_atom_index)
2375 {
2376     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g, lambda,
2377                               dvdlambda, FALSE);
2378 }
2379
2380 template<BondedKernelFlavor flavor>
2381 real angresz(int             nbonds,
2382              const t_iatom   forceatoms[],
2383              const t_iparams forceparams[],
2384              const rvec      x[],
2385              rvec4           f[],
2386              rvec            fshift[],
2387              const t_pbc*    pbc,
2388              const t_graph*  g,
2389              real            lambda,
2390              real*           dvdlambda,
2391              const t_mdatoms gmx_unused* md,
2392              t_fcdata gmx_unused* fcd,
2393              int gmx_unused* global_atom_index)
2394 {
2395     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g, lambda,
2396                               dvdlambda, TRUE);
2397 }
2398
2399 template<BondedKernelFlavor flavor>
2400 real dihres(int             nbonds,
2401             const t_iatom   forceatoms[],
2402             const t_iparams forceparams[],
2403             const rvec      x[],
2404             rvec4           f[],
2405             rvec            fshift[],
2406             const t_pbc*    pbc,
2407             const t_graph*  g,
2408             real            lambda,
2409             real*           dvdlambda,
2410             const t_mdatoms gmx_unused* md,
2411             t_fcdata gmx_unused* fcd,
2412             int gmx_unused* global_atom_index)
2413 {
2414     real vtot = 0;
2415     int  ai, aj, ak, al, i, type, t1, t2, t3;
2416     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2417     real phi, ddphi, ddp, ddp2, dp, d2r, L1;
2418     rvec r_ij, r_kj, r_kl, m, n;
2419
2420     L1 = 1.0 - lambda;
2421
2422     d2r = DEG2RAD;
2423
2424     for (i = 0; (i < nbonds);)
2425     {
2426         type = forceatoms[i++];
2427         ai   = forceatoms[i++];
2428         aj   = forceatoms[i++];
2429         ak   = forceatoms[i++];
2430         al   = forceatoms[i++];
2431
2432         phi0A = forceparams[type].dihres.phiA * d2r;
2433         dphiA = forceparams[type].dihres.dphiA * d2r;
2434         kfacA = forceparams[type].dihres.kfacA;
2435
2436         phi0B = forceparams[type].dihres.phiB * d2r;
2437         dphiB = forceparams[type].dihres.dphiB * d2r;
2438         kfacB = forceparams[type].dihres.kfacB;
2439
2440         phi0 = L1 * phi0A + lambda * phi0B;
2441         dphi = L1 * dphiA + lambda * dphiB;
2442         kfac = L1 * kfacA + lambda * kfacB;
2443
2444         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3);
2445         /* 84 flops */
2446
2447         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2448          * force changes if we just apply a normal harmonic.
2449          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2450          * This means we will never have the periodicity problem, unless
2451          * the dihedral is Pi away from phiO, which is very unlikely due to
2452          * the potential.
2453          */
2454         dp = phi - phi0;
2455         make_dp_periodic(&dp);
2456
2457         if (dp > dphi)
2458         {
2459             ddp = dp - dphi;
2460         }
2461         else if (dp < -dphi)
2462         {
2463             ddp = dp + dphi;
2464         }
2465         else
2466         {
2467             ddp = 0;
2468         }
2469
2470         if (ddp != 0.0)
2471         {
2472             ddp2 = ddp * ddp;
2473             vtot += 0.5 * kfac * ddp2;
2474             ddphi = kfac * ddp;
2475
2476             *dvdlambda += 0.5 * (kfacB - kfacA) * ddp2;
2477             /* lambda dependence from changing restraint distances */
2478             if (ddp > 0)
2479             {
2480                 *dvdlambda -= kfac * ddp * ((dphiB - dphiA) + (phi0B - phi0A));
2481             }
2482             else if (ddp < 0)
2483             {
2484                 *dvdlambda += kfac * ddp * ((dphiB - dphiA) - (phi0B - phi0A));
2485             }
2486             do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x,
2487                                t1, t2, t3); /* 112              */
2488         }
2489     }
2490     return vtot;
2491 }
2492
2493
2494 real unimplemented(int gmx_unused nbonds,
2495                    const t_iatom gmx_unused forceatoms[],
2496                    const t_iparams gmx_unused forceparams[],
2497                    const rvec gmx_unused x[],
2498                    rvec4 gmx_unused f[],
2499                    rvec gmx_unused fshift[],
2500                    const t_pbc gmx_unused* pbc,
2501                    const t_graph gmx_unused* g,
2502                    real gmx_unused lambda,
2503                    real gmx_unused* dvdlambda,
2504                    const t_mdatoms gmx_unused* md,
2505                    t_fcdata gmx_unused* fcd,
2506                    int gmx_unused* global_atom_index)
2507 {
2508     gmx_impl("*** you are using a not implemented function");
2509 }
2510
2511 template<BondedKernelFlavor flavor>
2512 real restrangles(int             nbonds,
2513                  const t_iatom   forceatoms[],
2514                  const t_iparams forceparams[],
2515                  const rvec      x[],
2516                  rvec4           f[],
2517                  rvec            fshift[],
2518                  const t_pbc*    pbc,
2519                  const t_graph*  g,
2520                  real gmx_unused lambda,
2521                  real gmx_unused* dvdlambda,
2522                  const t_mdatoms gmx_unused* md,
2523                  t_fcdata gmx_unused* fcd,
2524                  int gmx_unused* global_atom_index)
2525 {
2526     int    i, d, ai, aj, ak, type, m;
2527     int    t1, t2;
2528     real   v, vtot;
2529     ivec   jt, dt_ij, dt_kj;
2530     rvec   f_i, f_j, f_k;
2531     double prefactor, ratio_ante, ratio_post;
2532     rvec   delta_ante, delta_post, vec_temp;
2533
2534     vtot = 0.0;
2535     for (i = 0; (i < nbonds);)
2536     {
2537         type = forceatoms[i++];
2538         ai   = forceatoms[i++];
2539         aj   = forceatoms[i++];
2540         ak   = forceatoms[i++];
2541
2542         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2543         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2544         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_post);
2545
2546
2547         /* This function computes factors needed for restricted angle potential.
2548          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2549          * when three particles align and the dihedral angle and dihedral potential
2550          * cannot be calculated. This potential is calculated using the formula:
2551            real restrangles(int nbonds,
2552             const t_iatom forceatoms[],const t_iparams forceparams[],
2553             const rvec x[],rvec4 f[],rvec fshift[],
2554             const t_pbc *pbc,const t_graph *g,
2555             real gmx_unused lambda,real gmx_unused *dvdlambda,
2556             const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
2557             int gmx_unused *global_atom_index)
2558            {
2559            int  i, d, ai, aj, ak, type, m;
2560            int t1, t2;
2561            rvec r_ij,r_kj;
2562            real v, vtot;
2563            ivec jt,dt_ij,dt_kj;
2564            rvec f_i, f_j, f_k;
2565            real prefactor, ratio_ante, ratio_post;
2566            rvec delta_ante, delta_post, vec_temp;
2567
2568            vtot = 0.0;
2569            for(i=0; (i<nbonds); )
2570            {
2571            type = forceatoms[i++];
2572            ai   = forceatoms[i++];
2573            aj   = forceatoms[i++];
2574            ak   = forceatoms[i++];
2575
2576          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta} \frac{(\cos\theta_i - \cos\theta_0)^2}
2577          * {\sin^2\theta_i}\f] ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2578          * For more explanations see comments file "restcbt.h". */
2579
2580         compute_factors_restangles(type, forceparams, delta_ante, delta_post, &prefactor,
2581                                    &ratio_ante, &ratio_post, &v);
2582
2583         /*   Forces are computed per component */
2584         for (d = 0; d < DIM; d++)
2585         {
2586             f_i[d] = prefactor * (ratio_ante * delta_ante[d] - delta_post[d]);
2587             f_j[d] = prefactor
2588                      * ((ratio_post + 1.0) * delta_post[d] - (ratio_ante + 1.0) * delta_ante[d]);
2589             f_k[d] = prefactor * (delta_ante[d] - ratio_post * delta_post[d]);
2590         }
2591
2592         /*   Computation of potential energy   */
2593
2594         vtot += v;
2595
2596         /*   Update forces */
2597
2598         for (m = 0; (m < DIM); m++)
2599         {
2600             f[ai][m] += f_i[m];
2601             f[aj][m] += f_j[m];
2602             f[ak][m] += f_k[m];
2603         }
2604
2605         if (computeVirial(flavor))
2606         {
2607             if (g)
2608             {
2609                 copy_ivec(SHIFT_IVEC(g, aj), jt);
2610                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2611                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2612                 t1 = IVEC2IS(dt_ij);
2613                 t2 = IVEC2IS(dt_kj);
2614             }
2615
2616             rvec_inc(fshift[t1], f_i);
2617             rvec_inc(fshift[CENTRAL], f_j);
2618             rvec_inc(fshift[t2], f_k);
2619         }
2620     }
2621     return vtot;
2622 }
2623
2624
2625 template<BondedKernelFlavor flavor>
2626 real restrdihs(int             nbonds,
2627                const t_iatom   forceatoms[],
2628                const t_iparams forceparams[],
2629                const rvec      x[],
2630                rvec4           f[],
2631                rvec            fshift[],
2632                const t_pbc*    pbc,
2633                const t_graph*  g,
2634                real gmx_unused lambda,
2635                real gmx_unused* dvlambda,
2636                const t_mdatoms gmx_unused* md,
2637                t_fcdata gmx_unused* fcd,
2638                int gmx_unused* global_atom_index)
2639 {
2640     int  i, d, type, ai, aj, ak, al;
2641     rvec f_i, f_j, f_k, f_l;
2642     rvec dx_jl;
2643     ivec jt, dt_ij, dt_kj, dt_lj;
2644     int  t1, t2, t3;
2645     real v, vtot;
2646     rvec delta_ante, delta_crnt, delta_post, vec_temp;
2647     real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post;
2648     real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post;
2649     real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post;
2650     real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post;
2651     real prefactor_phi;
2652
2653
2654     vtot = 0.0;
2655     for (i = 0; (i < nbonds);)
2656     {
2657         type = forceatoms[i++];
2658         ai   = forceatoms[i++];
2659         aj   = forceatoms[i++];
2660         ak   = forceatoms[i++];
2661         al   = forceatoms[i++];
2662
2663         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2664         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2665         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2666         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2667         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2668
2669         /* This function computes factors needed for restricted angle potential.
2670          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2671          * when three particles align and the dihedral angle and dihedral potential cannot be calculated.
2672          * This potential is calculated using the formula:
2673          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta}
2674          * \frac{(\cos\theta_i - \cos\theta_0)^2}{\sin^2\theta_i}\f]
2675          * ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2676          * For more explanations see comments file "restcbt.h" */
2677
2678         compute_factors_restrdihs(
2679                 type, forceparams, delta_ante, delta_crnt, delta_post, &factor_phi_ai_ante,
2680                 &factor_phi_ai_crnt, &factor_phi_ai_post, &factor_phi_aj_ante, &factor_phi_aj_crnt,
2681                 &factor_phi_aj_post, &factor_phi_ak_ante, &factor_phi_ak_crnt, &factor_phi_ak_post,
2682                 &factor_phi_al_ante, &factor_phi_al_crnt, &factor_phi_al_post, &prefactor_phi, &v);
2683
2684
2685         /*      Computation of forces per component */
2686         for (d = 0; d < DIM; d++)
2687         {
2688             f_i[d] = prefactor_phi
2689                      * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d]
2690                         + factor_phi_ai_post * delta_post[d]);
2691             f_j[d] = prefactor_phi
2692                      * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d]
2693                         + factor_phi_aj_post * delta_post[d]);
2694             f_k[d] = prefactor_phi
2695                      * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d]
2696                         + factor_phi_ak_post * delta_post[d]);
2697             f_l[d] = prefactor_phi
2698                      * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d]
2699                         + factor_phi_al_post * delta_post[d]);
2700         }
2701         /*      Computation of the energy */
2702
2703         vtot += v;
2704
2705
2706         /*    Updating the forces */
2707
2708         rvec_inc(f[ai], f_i);
2709         rvec_inc(f[aj], f_j);
2710         rvec_inc(f[ak], f_k);
2711         rvec_inc(f[al], f_l);
2712
2713         if (computeVirial(flavor))
2714         {
2715             if (g)
2716             {
2717                 copy_ivec(SHIFT_IVEC(g, aj), jt);
2718                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2719                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2720                 ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2721                 t1 = IVEC2IS(dt_ij);
2722                 t2 = IVEC2IS(dt_kj);
2723                 t3 = IVEC2IS(dt_lj);
2724             }
2725             else if (pbc)
2726             {
2727                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2728             }
2729             else
2730             {
2731                 t3 = CENTRAL;
2732             }
2733
2734             rvec_inc(fshift[t1], f_i);
2735             rvec_inc(fshift[CENTRAL], f_j);
2736             rvec_inc(fshift[t2], f_k);
2737             rvec_inc(fshift[t3], f_l);
2738         }
2739     }
2740
2741     return vtot;
2742 }
2743
2744
2745 template<BondedKernelFlavor flavor>
2746 real cbtdihs(int             nbonds,
2747              const t_iatom   forceatoms[],
2748              const t_iparams forceparams[],
2749              const rvec      x[],
2750              rvec4           f[],
2751              rvec            fshift[],
2752              const t_pbc*    pbc,
2753              const t_graph*  g,
2754              real gmx_unused lambda,
2755              real gmx_unused* dvdlambda,
2756              const t_mdatoms gmx_unused* md,
2757              t_fcdata gmx_unused* fcd,
2758              int gmx_unused* global_atom_index)
2759 {
2760     int  type, ai, aj, ak, al, i, d;
2761     int  t1, t2, t3;
2762     real v, vtot;
2763     rvec vec_temp;
2764     rvec f_i, f_j, f_k, f_l;
2765     ivec jt, dt_ij, dt_kj, dt_lj;
2766     rvec dx_jl;
2767     rvec delta_ante, delta_crnt, delta_post;
2768     rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
2769     rvec f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak;
2770     rvec f_theta_post_aj, f_theta_post_ak, f_theta_post_al;
2771
2772
2773     vtot = 0.0;
2774     for (i = 0; (i < nbonds);)
2775     {
2776         type = forceatoms[i++];
2777         ai   = forceatoms[i++];
2778         aj   = forceatoms[i++];
2779         ak   = forceatoms[i++];
2780         al   = forceatoms[i++];
2781
2782
2783         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2784         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2785         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
2786         pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2787         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2788         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2789
2790         /* \brief Compute factors for CBT potential
2791          * The combined bending-torsion potential goes to zero in a very smooth manner, eliminating the numerical
2792          * instabilities, when three coarse-grained particles align and the dihedral angle and standard
2793          * dihedral potentials cannot be calculated. The CBT potential is calculated using the formula:
2794          * \f[V_{\rm CBT}(\theta_{i-1}, \theta_i, \phi_i) = k_{\phi} \sin^3\theta_{i-1} \sin^3\theta_{i}
2795          * \sum_{n=0}^4 { a_n \cos^n\phi_i}\f] ({eq:CBT} and ref \cite{MonicaGoga2013} from the manual).
2796          * It contains in its expression not only the dihedral angle \f$\phi\f$
2797          * but also \f[\theta_{i-1}\f] (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)
2798          * --- the adjacent bending angles.
2799          * For more explanations see comments file "restcbt.h". */
2800
2801         compute_factors_cbtdihs(type, forceparams, delta_ante, delta_crnt, delta_post, f_phi_ai,
2802                                 f_phi_aj, f_phi_ak, f_phi_al, f_theta_ante_ai, f_theta_ante_aj,
2803                                 f_theta_ante_ak, f_theta_post_aj, f_theta_post_ak, f_theta_post_al, &v);
2804
2805
2806         /*      Acumulate the resuts per beads */
2807         for (d = 0; d < DIM; d++)
2808         {
2809             f_i[d] = f_phi_ai[d] + f_theta_ante_ai[d];
2810             f_j[d] = f_phi_aj[d] + f_theta_ante_aj[d] + f_theta_post_aj[d];
2811             f_k[d] = f_phi_ak[d] + f_theta_ante_ak[d] + f_theta_post_ak[d];
2812             f_l[d] = f_phi_al[d] + f_theta_post_al[d];
2813         }
2814
2815         /*      Compute the potential energy */
2816
2817         vtot += v;
2818
2819
2820         /*  Updating the forces */
2821         rvec_inc(f[ai], f_i);
2822         rvec_inc(f[aj], f_j);
2823         rvec_inc(f[ak], f_k);
2824         rvec_inc(f[al], f_l);
2825
2826
2827         if (computeVirial(flavor))
2828         {
2829             if (g)
2830             {
2831                 copy_ivec(SHIFT_IVEC(g, aj), jt);
2832                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2833                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2834                 ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2835                 t1 = IVEC2IS(dt_ij);
2836                 t2 = IVEC2IS(dt_kj);
2837                 t3 = IVEC2IS(dt_lj);
2838             }
2839             else if (pbc)
2840             {
2841                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2842             }
2843             else
2844             {
2845                 t3 = CENTRAL;
2846             }
2847
2848             rvec_inc(fshift[t1], f_i);
2849             rvec_inc(fshift[CENTRAL], f_j);
2850             rvec_inc(fshift[t2], f_k);
2851             rvec_inc(fshift[t3], f_l);
2852         }
2853     }
2854
2855     return vtot;
2856 }
2857
2858 template<BondedKernelFlavor flavor>
2859 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
2860 rbdihs(int             nbonds,
2861        const t_iatom   forceatoms[],
2862        const t_iparams forceparams[],
2863        const rvec      x[],
2864        rvec4           f[],
2865        rvec            fshift[],
2866        const t_pbc*    pbc,
2867        const t_graph*  g,
2868        real            lambda,
2869        real*           dvdlambda,
2870        const t_mdatoms gmx_unused* md,
2871        t_fcdata gmx_unused* fcd,
2872        int gmx_unused* global_atom_index)
2873 {
2874     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2875     int        type, ai, aj, ak, al, i, j;
2876     int        t1, t2, t3;
2877     rvec       r_ij, r_kj, r_kl, m, n;
2878     real       parmA[NR_RBDIHS];
2879     real       parmB[NR_RBDIHS];
2880     real       parm[NR_RBDIHS];
2881     real       cos_phi, phi, rbp, rbpBA;
2882     real       v, ddphi, sin_phi;
2883     real       cosfac, vtot;
2884     real       L1        = 1.0 - lambda;
2885     real       dvdl_term = 0;
2886
2887     vtot = 0.0;
2888     for (i = 0; (i < nbonds);)
2889     {
2890         type = forceatoms[i++];
2891         ai   = forceatoms[i++];
2892         aj   = forceatoms[i++];
2893         ak   = forceatoms[i++];
2894         al   = forceatoms[i++];
2895
2896         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2897
2898         /* Change to polymer convention */
2899         if (phi < c0)
2900         {
2901             phi += M_PI;
2902         }
2903         else
2904         {
2905             phi -= M_PI; /*   1         */
2906         }
2907         cos_phi = std::cos(phi);
2908         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2909         sin_phi = std::sin(phi);
2910
2911         for (j = 0; (j < NR_RBDIHS); j++)
2912         {
2913             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2914             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2915             parm[j]  = L1 * parmA[j] + lambda * parmB[j];
2916         }
2917         /* Calculate cosine powers */
2918         /* Calculate the energy */
2919         /* Calculate the derivative */
2920
2921         v = parm[0];
2922         dvdl_term += (parmB[0] - parmA[0]);
2923         ddphi  = c0;
2924         cosfac = c1;
2925
2926         rbp   = parm[1];
2927         rbpBA = parmB[1] - parmA[1];
2928         ddphi += rbp * cosfac;
2929         cosfac *= cos_phi;
2930         v += cosfac * rbp;
2931         dvdl_term += cosfac * rbpBA;
2932         rbp   = parm[2];
2933         rbpBA = parmB[2] - parmA[2];
2934         ddphi += c2 * rbp * cosfac;
2935         cosfac *= cos_phi;
2936         v += cosfac * rbp;
2937         dvdl_term += cosfac * rbpBA;
2938         rbp   = parm[3];
2939         rbpBA = parmB[3] - parmA[3];
2940         ddphi += c3 * rbp * cosfac;
2941         cosfac *= cos_phi;
2942         v += cosfac * rbp;
2943         dvdl_term += cosfac * rbpBA;
2944         rbp   = parm[4];
2945         rbpBA = parmB[4] - parmA[4];
2946         ddphi += c4 * rbp * cosfac;
2947         cosfac *= cos_phi;
2948         v += cosfac * rbp;
2949         dvdl_term += cosfac * rbpBA;
2950         rbp   = parm[5];
2951         rbpBA = parmB[5] - parmA[5];
2952         ddphi += c5 * rbp * cosfac;
2953         cosfac *= cos_phi;
2954         v += cosfac * rbp;
2955         dvdl_term += cosfac * rbpBA;
2956
2957         ddphi = -ddphi * sin_phi; /*  11                */
2958
2959         do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x, t1,
2960                            t2, t3); /* 112              */
2961         vtot += v;
2962     }
2963     *dvdlambda += dvdl_term;
2964
2965     return vtot;
2966 }
2967
2968 //! \endcond
2969
2970 /*! \brief Mysterious undocumented function */
2971 int cmap_setup_grid_index(int ip, int grid_spacing, int* ipm1, int* ipp1, int* ipp2)
2972 {
2973     int im1, ip1, ip2;
2974
2975     if (ip < 0)
2976     {
2977         ip = ip + grid_spacing - 1;
2978     }
2979     else if (ip > grid_spacing)
2980     {
2981         ip = ip - grid_spacing - 1;
2982     }
2983
2984     im1 = ip - 1;
2985     ip1 = ip + 1;
2986     ip2 = ip + 2;
2987
2988     if (ip == 0)
2989     {
2990         im1 = grid_spacing - 1;
2991     }
2992     else if (ip == grid_spacing - 2)
2993     {
2994         ip2 = 0;
2995     }
2996     else if (ip == grid_spacing - 1)
2997     {
2998         ip1 = 0;
2999         ip2 = 1;
3000     }
3001
3002     *ipm1 = im1;
3003     *ipp1 = ip1;
3004     *ipp2 = ip2;
3005
3006     return ip;
3007 }
3008
3009 } // namespace
3010
3011 real cmap_dihs(int                   nbonds,
3012                const t_iatom         forceatoms[],
3013                const t_iparams       forceparams[],
3014                const gmx_cmap_t*     cmap_grid,
3015                const rvec            x[],
3016                rvec4                 f[],
3017                rvec                  fshift[],
3018                const struct t_pbc*   pbc,
3019                const struct t_graph* g,
3020                real gmx_unused lambda,
3021                real gmx_unused* dvdlambda,
3022                const t_mdatoms gmx_unused* md,
3023                t_fcdata gmx_unused* fcd,
3024                int gmx_unused* global_atom_index)
3025 {
3026     int i, n;
3027     int ai, aj, ak, al, am;
3028     int a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
3029     int type;
3030     int t11, t21, t31, t12, t22, t32;
3031     int iphi1, ip1m1, ip1p1, ip1p2;
3032     int iphi2, ip2m1, ip2p1, ip2p2;
3033     int l1, l2, l3;
3034     int pos1, pos2, pos3, pos4;
3035
3036     real ty[4], ty1[4], ty2[4], ty12[4], tx[16];
3037     real phi1, cos_phi1, sin_phi1, xphi1;
3038     real phi2, cos_phi2, sin_phi2, xphi2;
3039     real dx, tt, tu, e, df1, df2, vtot;
3040     real ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
3041     real ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
3042     real fg1, hg1, fga1, hgb1, gaa1, gbb1;
3043     real fg2, hg2, fga2, hgb2, gaa2, gbb2;
3044     real fac;
3045
3046     rvec r1_ij, r1_kj, r1_kl, m1, n1;
3047     rvec r2_ij, r2_kj, r2_kl, m2, n2;
3048     rvec f1_i, f1_j, f1_k, f1_l;
3049     rvec f2_i, f2_j, f2_k, f2_l;
3050     rvec a1, b1, a2, b2;
3051     rvec f1, g1, h1, f2, g2, h2;
3052     rvec dtf1, dtg1, dth1, dtf2, dtg2, dth2;
3053     ivec jt1, dt1_ij, dt1_kj, dt1_lj;
3054     ivec jt2, dt2_ij, dt2_kj, dt2_lj;
3055
3056     int loop_index[4][4] = { { 0, 4, 8, 12 }, { 1, 5, 9, 13 }, { 2, 6, 10, 14 }, { 3, 7, 11, 15 } };
3057
3058     /* Total CMAP energy */
3059     vtot = 0;
3060
3061     for (n = 0; n < nbonds;)
3062     {
3063         /* Five atoms are involved in the two torsions */
3064         type = forceatoms[n++];
3065         ai   = forceatoms[n++];
3066         aj   = forceatoms[n++];
3067         ak   = forceatoms[n++];
3068         al   = forceatoms[n++];
3069         am   = forceatoms[n++];
3070
3071         /* Which CMAP type is this */
3072         const int   cmapA = forceparams[type].cmap.cmapA;
3073         const real* cmapd = cmap_grid->cmapdata[cmapA].cmap.data();
3074
3075         /* First torsion */
3076         a1i = ai;
3077         a1j = aj;
3078         a1k = ak;
3079         a1l = al;
3080
3081         phi1 = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1, &t11,
3082                          &t21, &t31); /* 84 */
3083
3084         cos_phi1 = std::cos(phi1);
3085
3086         a1[0] = r1_ij[1] * r1_kj[2] - r1_ij[2] * r1_kj[1];
3087         a1[1] = r1_ij[2] * r1_kj[0] - r1_ij[0] * r1_kj[2];
3088         a1[2] = r1_ij[0] * r1_kj[1] - r1_ij[1] * r1_kj[0]; /* 9 */
3089
3090         b1[0] = r1_kl[1] * r1_kj[2] - r1_kl[2] * r1_kj[1];
3091         b1[1] = r1_kl[2] * r1_kj[0] - r1_kl[0] * r1_kj[2];
3092         b1[2] = r1_kl[0] * r1_kj[1] - r1_kl[1] * r1_kj[0]; /* 9 */
3093
3094         pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
3095
3096         ra21 = iprod(a1, a1);       /* 5 */
3097         rb21 = iprod(b1, b1);       /* 5 */
3098         rg21 = iprod(r1_kj, r1_kj); /* 5 */
3099         rg1  = sqrt(rg21);
3100
3101         rgr1  = 1.0 / rg1;
3102         ra2r1 = 1.0 / ra21;
3103         rb2r1 = 1.0 / rb21;
3104         rabr1 = sqrt(ra2r1 * rb2r1);
3105
3106         sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
3107
3108         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
3109         {
3110             phi1 = std::asin(sin_phi1);
3111
3112             if (cos_phi1 < 0)
3113             {
3114                 if (phi1 > 0)
3115                 {
3116                     phi1 = M_PI - phi1;
3117                 }
3118                 else
3119                 {
3120                     phi1 = -M_PI - phi1;
3121                 }
3122             }
3123         }
3124         else
3125         {
3126             phi1 = std::acos(cos_phi1);
3127
3128             if (sin_phi1 < 0)
3129             {
3130                 phi1 = -phi1;
3131             }
3132         }
3133
3134         xphi1 = phi1 + M_PI; /* 1 */
3135
3136         /* Second torsion */
3137         a2i = aj;
3138         a2j = ak;
3139         a2k = al;
3140         a2l = am;
3141
3142         phi2 = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2, &t12,
3143                          &t22, &t32); /* 84 */
3144
3145         cos_phi2 = std::cos(phi2);
3146
3147         a2[0] = r2_ij[1] * r2_kj[2] - r2_ij[2] * r2_kj[1];
3148         a2[1] = r2_ij[2] * r2_kj[0] - r2_ij[0] * r2_kj[2];
3149         a2[2] = r2_ij[0] * r2_kj[1] - r2_ij[1] * r2_kj[0]; /* 9 */
3150
3151         b2[0] = r2_kl[1] * r2_kj[2] - r2_kl[2] * r2_kj[1];
3152         b2[1] = r2_kl[2] * r2_kj[0] - r2_kl[0] * r2_kj[2];
3153         b2[2] = r2_kl[0] * r2_kj[1] - r2_kl[1] * r2_kj[0]; /* 9 */
3154
3155         pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
3156
3157         ra22 = iprod(a2, a2);       /* 5 */
3158         rb22 = iprod(b2, b2);       /* 5 */
3159         rg22 = iprod(r2_kj, r2_kj); /* 5 */
3160         rg2  = sqrt(rg22);
3161
3162         rgr2  = 1.0 / rg2;
3163         ra2r2 = 1.0 / ra22;
3164         rb2r2 = 1.0 / rb22;
3165         rabr2 = sqrt(ra2r2 * rb2r2);
3166
3167         sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
3168
3169         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
3170         {
3171             phi2 = std::asin(sin_phi2);
3172
3173             if (cos_phi2 < 0)
3174             {
3175                 if (phi2 > 0)
3176                 {
3177                     phi2 = M_PI - phi2;
3178                 }
3179                 else
3180                 {
3181                     phi2 = -M_PI - phi2;
3182                 }
3183             }
3184         }
3185         else
3186         {
3187             phi2 = std::acos(cos_phi2);
3188
3189             if (sin_phi2 < 0)
3190             {
3191                 phi2 = -phi2;
3192             }
3193         }
3194
3195         xphi2 = phi2 + M_PI; /* 1 */
3196
3197         /* Range mangling */
3198         if (xphi1 < 0)
3199         {
3200             xphi1 = xphi1 + 2 * M_PI;
3201         }
3202         else if (xphi1 >= 2 * M_PI)
3203         {
3204             xphi1 = xphi1 - 2 * M_PI;
3205         }
3206
3207         if (xphi2 < 0)
3208         {
3209             xphi2 = xphi2 + 2 * M_PI;
3210         }
3211         else if (xphi2 >= 2 * M_PI)
3212         {
3213             xphi2 = xphi2 - 2 * M_PI;
3214         }
3215
3216         /* Number of grid points */
3217         dx = 2 * M_PI / cmap_grid->grid_spacing;
3218
3219         /* Where on the grid are we */
3220         iphi1 = static_cast<int>(xphi1 / dx);
3221         iphi2 = static_cast<int>(xphi2 / dx);
3222
3223         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3224         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3225
3226         pos1 = iphi1 * cmap_grid->grid_spacing + iphi2;
3227         pos2 = ip1p1 * cmap_grid->grid_spacing + iphi2;
3228         pos3 = ip1p1 * cmap_grid->grid_spacing + ip2p1;
3229         pos4 = iphi1 * cmap_grid->grid_spacing + ip2p1;
3230
3231         ty[0] = cmapd[pos1 * 4];
3232         ty[1] = cmapd[pos2 * 4];
3233         ty[2] = cmapd[pos3 * 4];
3234         ty[3] = cmapd[pos4 * 4];
3235
3236         ty1[0] = cmapd[pos1 * 4 + 1];
3237         ty1[1] = cmapd[pos2 * 4 + 1];
3238         ty1[2] = cmapd[pos3 * 4 + 1];
3239         ty1[3] = cmapd[pos4 * 4 + 1];
3240
3241         ty2[0] = cmapd[pos1 * 4 + 2];
3242         ty2[1] = cmapd[pos2 * 4 + 2];
3243         ty2[2] = cmapd[pos3 * 4 + 2];
3244         ty2[3] = cmapd[pos4 * 4 + 2];
3245
3246         ty12[0] = cmapd[pos1 * 4 + 3];
3247         ty12[1] = cmapd[pos2 * 4 + 3];
3248         ty12[2] = cmapd[pos3 * 4 + 3];
3249         ty12[3] = cmapd[pos4 * 4 + 3];
3250
3251         /* Switch to degrees */
3252         dx    = 360.0 / cmap_grid->grid_spacing;
3253         xphi1 = xphi1 * RAD2DEG;
3254         xphi2 = xphi2 * RAD2DEG;
3255
3256         for (i = 0; i < 4; i++) /* 16 */
3257         {
3258             tx[i]      = ty[i];
3259             tx[i + 4]  = ty1[i] * dx;
3260             tx[i + 8]  = ty2[i] * dx;
3261             tx[i + 12] = ty12[i] * dx * dx;
3262         }
3263
3264         real tc[16] = { 0 };
3265         for (int idx = 0; idx < 16; idx++) /* 1056 */
3266         {
3267             for (int k = 0; k < 16; k++)
3268             {
3269                 tc[idx] += cmap_coeff_matrix[k * 16 + idx] * tx[k];
3270             }
3271         }
3272
3273         tt = (xphi1 - iphi1 * dx) / dx;
3274         tu = (xphi2 - iphi2 * dx) / dx;
3275
3276         e   = 0;
3277         df1 = 0;
3278         df2 = 0;
3279
3280         for (i = 3; i >= 0; i--)
3281         {
3282             l1 = loop_index[i][3];
3283             l2 = loop_index[i][2];
3284             l3 = loop_index[i][1];
3285
3286             e = tt * e + ((tc[i * 4 + 3] * tu + tc[i * 4 + 2]) * tu + tc[i * 4 + 1]) * tu + tc[i * 4];
3287             df1 = tu * df1 + (3.0 * tc[l1] * tt + 2.0 * tc[l2]) * tt + tc[l3];
3288             df2 = tt * df2 + (3.0 * tc[i * 4 + 3] * tu + 2.0 * tc[i * 4 + 2]) * tu + tc[i * 4 + 1];
3289         }
3290
3291         fac = RAD2DEG / dx;
3292         df1 = df1 * fac;
3293         df2 = df2 * fac;
3294
3295         /* CMAP energy */
3296         vtot += e;
3297
3298         /* Do forces - first torsion */
3299         fg1  = iprod(r1_ij, r1_kj);
3300         hg1  = iprod(r1_kl, r1_kj);
3301         fga1 = fg1 * ra2r1 * rgr1;
3302         hgb1 = hg1 * rb2r1 * rgr1;
3303         gaa1 = -ra2r1 * rg1;
3304         gbb1 = rb2r1 * rg1;
3305
3306         for (i = 0; i < DIM; i++)
3307         {
3308             dtf1[i] = gaa1 * a1[i];
3309             dtg1[i] = fga1 * a1[i] - hgb1 * b1[i];
3310             dth1[i] = gbb1 * b1[i];
3311
3312             f1[i] = df1 * dtf1[i];
3313             g1[i] = df1 * dtg1[i];
3314             h1[i] = df1 * dth1[i];
3315
3316             f1_i[i] = f1[i];
3317             f1_j[i] = -f1[i] - g1[i];
3318             f1_k[i] = h1[i] + g1[i];
3319             f1_l[i] = -h1[i];
3320
3321             f[a1i][i] = f[a1i][i] + f1_i[i];
3322             f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3323             f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3324             f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3325         }
3326
3327         /* Do forces - second torsion */
3328         fg2  = iprod(r2_ij, r2_kj);
3329         hg2  = iprod(r2_kl, r2_kj);
3330         fga2 = fg2 * ra2r2 * rgr2;
3331         hgb2 = hg2 * rb2r2 * rgr2;
3332         gaa2 = -ra2r2 * rg2;
3333         gbb2 = rb2r2 * rg2;
3334
3335         for (i = 0; i < DIM; i++)
3336         {
3337             dtf2[i] = gaa2 * a2[i];
3338             dtg2[i] = fga2 * a2[i] - hgb2 * b2[i];
3339             dth2[i] = gbb2 * b2[i];
3340
3341             f2[i] = df2 * dtf2[i];
3342             g2[i] = df2 * dtg2[i];
3343             h2[i] = df2 * dth2[i];
3344
3345             f2_i[i] = f2[i];
3346             f2_j[i] = -f2[i] - g2[i];
3347             f2_k[i] = h2[i] + g2[i];
3348             f2_l[i] = -h2[i];
3349
3350             f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3351             f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3352             f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3353             f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3354         }
3355
3356         /* Shift forces */
3357         if (fshift != nullptr)
3358         {
3359             if (g)
3360             {
3361                 copy_ivec(SHIFT_IVEC(g, a1j), jt1);
3362                 ivec_sub(SHIFT_IVEC(g, a1i), jt1, dt1_ij);
3363                 ivec_sub(SHIFT_IVEC(g, a1k), jt1, dt1_kj);
3364                 ivec_sub(SHIFT_IVEC(g, a1l), jt1, dt1_lj);
3365                 t11 = IVEC2IS(dt1_ij);
3366                 t21 = IVEC2IS(dt1_kj);
3367                 t31 = IVEC2IS(dt1_lj);
3368
3369                 copy_ivec(SHIFT_IVEC(g, a2j), jt2);
3370                 ivec_sub(SHIFT_IVEC(g, a2i), jt2, dt2_ij);
3371                 ivec_sub(SHIFT_IVEC(g, a2k), jt2, dt2_kj);
3372                 ivec_sub(SHIFT_IVEC(g, a2l), jt2, dt2_lj);
3373                 t12 = IVEC2IS(dt2_ij);
3374                 t22 = IVEC2IS(dt2_kj);
3375                 t32 = IVEC2IS(dt2_lj);
3376             }
3377             else if (pbc)
3378             {
3379                 t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3380                 t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3381             }
3382             else
3383             {
3384                 t31 = CENTRAL;
3385                 t32 = CENTRAL;
3386             }
3387
3388             rvec_inc(fshift[t11], f1_i);
3389             rvec_inc(fshift[CENTRAL], f1_j);
3390             rvec_inc(fshift[t21], f1_k);
3391             rvec_inc(fshift[t31], f1_l);
3392
3393             rvec_inc(fshift[t12], f2_i);
3394             rvec_inc(fshift[CENTRAL], f2_j);
3395             rvec_inc(fshift[t22], f2_k);
3396             rvec_inc(fshift[t32], f2_l);
3397         }
3398     }
3399     return vtot;
3400 }
3401
3402 namespace
3403 {
3404
3405 //! \cond
3406 /***********************************************************
3407  *
3408  *   G R O M O S  9 6   F U N C T I O N S
3409  *
3410  ***********************************************************/
3411 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
3412 {
3413     const real half = 0.5;
3414     real       L1, kk, x0, dx, dx2;
3415     real       v, f, dvdlambda;
3416
3417     L1 = 1.0 - lambda;
3418     kk = L1 * kA + lambda * kB;
3419     x0 = L1 * xA + lambda * xB;
3420
3421     dx  = x - x0;
3422     dx2 = dx * dx;
3423
3424     f         = -kk * dx;
3425     v         = half * kk * dx2;
3426     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
3427
3428     *F = f;
3429     *V = v;
3430
3431     return dvdlambda;
3432
3433     /* That was 21 flops */
3434 }
3435
3436 template<BondedKernelFlavor flavor>
3437 real g96bonds(int             nbonds,
3438               const t_iatom   forceatoms[],
3439               const t_iparams forceparams[],
3440               const rvec      x[],
3441               rvec4           f[],
3442               rvec            fshift[],
3443               const t_pbc*    pbc,
3444               const t_graph*  g,
3445               real            lambda,
3446               real*           dvdlambda,
3447               const t_mdatoms gmx_unused* md,
3448               t_fcdata gmx_unused* fcd,
3449               int gmx_unused* global_atom_index)
3450 {
3451     int  i, ki, ai, aj, type;
3452     real dr2, fbond, vbond, vtot;
3453     rvec dx;
3454
3455     vtot = 0.0;
3456     for (i = 0; (i < nbonds);)
3457     {
3458         type = forceatoms[i++];
3459         ai   = forceatoms[i++];
3460         aj   = forceatoms[i++];
3461
3462         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3463         dr2 = iprod(dx, dx);                       /*   5               */
3464
3465         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
3466                                   forceparams[type].harmonic.rA, forceparams[type].harmonic.rB, dr2,
3467                                   lambda, &vbond, &fbond);
3468
3469         vtot += 0.5 * vbond; /* 1*/
3470
3471         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
3472     }                                                                  /* 44 TOTAL      */
3473     return vtot;
3474 }
3475
3476 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc* pbc, rvec r_ij, rvec r_kj, int* t1, int* t2)
3477 /* Return value is the angle between the bonds i-j and j-k */
3478 {
3479     real costh;
3480
3481     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3482     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3483
3484     costh = cos_angle(r_ij, r_kj); /* 25                */
3485     /* 41 TOTAL */
3486     return costh;
3487 }
3488
3489 template<BondedKernelFlavor flavor>
3490 real g96angles(int             nbonds,
3491                const t_iatom   forceatoms[],
3492                const t_iparams forceparams[],
3493                const rvec      x[],
3494                rvec4           f[],
3495                rvec            fshift[],
3496                const t_pbc*    pbc,
3497                const t_graph*  g,
3498                real            lambda,
3499                real*           dvdlambda,
3500                const t_mdatoms gmx_unused* md,
3501                t_fcdata gmx_unused* fcd,
3502                int gmx_unused* global_atom_index)
3503 {
3504     int  i, ai, aj, ak, type, m, t1, t2;
3505     rvec r_ij, r_kj;
3506     real cos_theta, dVdt, va, vtot;
3507     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3508     rvec f_i, f_j, f_k;
3509     ivec jt, dt_ij, dt_kj;
3510
3511     vtot = 0.0;
3512     for (i = 0; (i < nbonds);)
3513     {
3514         type = forceatoms[i++];
3515         ai   = forceatoms[i++];
3516         aj   = forceatoms[i++];
3517         ak   = forceatoms[i++];
3518
3519         cos_theta = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3520
3521         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
3522                                   forceparams[type].harmonic.rA, forceparams[type].harmonic.rB,
3523                                   cos_theta, lambda, &va, &dVdt);
3524         vtot += va;
3525
3526         rij_1    = gmx::invsqrt(iprod(r_ij, r_ij));
3527         rkj_1    = gmx::invsqrt(iprod(r_kj, r_kj));
3528         rij_2    = rij_1 * rij_1;
3529         rkj_2    = rkj_1 * rkj_1;
3530         rijrkj_1 = rij_1 * rkj_1; /* 23 */
3531
3532         for (m = 0; (m < DIM); m++) /*  42      */
3533         {
3534             f_i[m] = dVdt * (r_kj[m] * rijrkj_1 - r_ij[m] * rij_2 * cos_theta);
3535             f_k[m] = dVdt * (r_ij[m] * rijrkj_1 - r_kj[m] * rkj_2 * cos_theta);
3536             f_j[m] = -f_i[m] - f_k[m];
3537             f[ai][m] += f_i[m];
3538             f[aj][m] += f_j[m];
3539             f[ak][m] += f_k[m];
3540         }
3541
3542         if (computeVirial(flavor))
3543         {
3544             if (g)
3545             {
3546                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3547
3548                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3549                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3550                 t1 = IVEC2IS(dt_ij);
3551                 t2 = IVEC2IS(dt_kj);
3552             }
3553             rvec_inc(fshift[t1], f_i);
3554             rvec_inc(fshift[CENTRAL], f_j);
3555             rvec_inc(fshift[t2], f_k); /* 9 */
3556         }
3557         /* 163 TOTAL    */
3558     }
3559     return vtot;
3560 }
3561
3562 template<BondedKernelFlavor flavor>
3563 real cross_bond_bond(int             nbonds,
3564                      const t_iatom   forceatoms[],
3565                      const t_iparams forceparams[],
3566                      const rvec      x[],
3567                      rvec4           f[],
3568                      rvec            fshift[],
3569                      const t_pbc*    pbc,
3570                      const t_graph*  g,
3571                      real gmx_unused lambda,
3572                      real gmx_unused* dvdlambda,
3573                      const t_mdatoms gmx_unused* md,
3574                      t_fcdata gmx_unused* fcd,
3575                      int gmx_unused* global_atom_index)
3576 {
3577     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3578      * pp. 842-847
3579      */
3580     int  i, ai, aj, ak, type, m, t1, t2;
3581     rvec r_ij, r_kj;
3582     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3583     rvec f_i, f_j, f_k;
3584     ivec jt, dt_ij, dt_kj;
3585
3586     vtot = 0.0;
3587     for (i = 0; (i < nbonds);)
3588     {
3589         type = forceatoms[i++];
3590         ai   = forceatoms[i++];
3591         aj   = forceatoms[i++];
3592         ak   = forceatoms[i++];
3593         r1e  = forceparams[type].cross_bb.r1e;
3594         r2e  = forceparams[type].cross_bb.r2e;
3595         krr  = forceparams[type].cross_bb.krr;
3596
3597         /* Compute distance vectors ... */
3598         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3599         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3600
3601         /* ... and their lengths */
3602         r1 = norm(r_ij);
3603         r2 = norm(r_kj);
3604
3605         /* Deviations from ideality */
3606         s1 = r1 - r1e;
3607         s2 = r2 - r2e;
3608
3609         /* Energy (can be negative!) */
3610         vrr = krr * s1 * s2;
3611         vtot += vrr;
3612
3613         /* Forces */
3614         svmul(-krr * s2 / r1, r_ij, f_i);
3615         svmul(-krr * s1 / r2, r_kj, f_k);
3616
3617         for (m = 0; (m < DIM); m++) /*  12      */
3618         {
3619             f_j[m] = -f_i[m] - f_k[m];
3620             f[ai][m] += f_i[m];
3621             f[aj][m] += f_j[m];
3622             f[ak][m] += f_k[m];
3623         }
3624
3625         if (computeVirial(flavor))
3626         {
3627             if (g)
3628             {
3629                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3630
3631                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3632                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3633                 t1 = IVEC2IS(dt_ij);
3634                 t2 = IVEC2IS(dt_kj);
3635             }
3636             rvec_inc(fshift[t1], f_i);
3637             rvec_inc(fshift[CENTRAL], f_j);
3638             rvec_inc(fshift[t2], f_k); /* 9 */
3639         }
3640         /* 163 TOTAL    */
3641     }
3642     return vtot;
3643 }
3644
3645 template<BondedKernelFlavor flavor>
3646 real cross_bond_angle(int             nbonds,
3647                       const t_iatom   forceatoms[],
3648                       const t_iparams forceparams[],
3649                       const rvec      x[],
3650                       rvec4           f[],
3651                       rvec            fshift[],
3652                       const t_pbc*    pbc,
3653                       const t_graph*  g,
3654                       real gmx_unused lambda,
3655                       real gmx_unused* dvdlambda,
3656                       const t_mdatoms gmx_unused* md,
3657                       t_fcdata gmx_unused* fcd,
3658                       int gmx_unused* global_atom_index)
3659 {
3660     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3661      * pp. 842-847
3662      */
3663     int  i, ai, aj, ak, type, m, t1, t2;
3664     rvec r_ij, r_kj, r_ik;
3665     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3666     rvec f_i, f_j, f_k;
3667     ivec jt, dt_ij, dt_kj;
3668
3669     vtot = 0.0;
3670     for (i = 0; (i < nbonds);)
3671     {
3672         type = forceatoms[i++];
3673         ai   = forceatoms[i++];
3674         aj   = forceatoms[i++];
3675         ak   = forceatoms[i++];
3676         r1e  = forceparams[type].cross_ba.r1e;
3677         r2e  = forceparams[type].cross_ba.r2e;
3678         r3e  = forceparams[type].cross_ba.r3e;
3679         krt  = forceparams[type].cross_ba.krt;
3680
3681         /* Compute distance vectors ... */
3682         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3683         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3684         pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3685
3686         /* ... and their lengths */
3687         r1 = norm(r_ij);
3688         r2 = norm(r_kj);
3689         r3 = norm(r_ik);
3690
3691         /* Deviations from ideality */
3692         s1 = r1 - r1e;
3693         s2 = r2 - r2e;
3694         s3 = r3 - r3e;
3695
3696         /* Energy (can be negative!) */
3697         vrt = krt * s3 * (s1 + s2);
3698         vtot += vrt;
3699
3700         /* Forces */
3701         k1 = -krt * (s3 / r1);
3702         k2 = -krt * (s3 / r2);
3703         k3 = -krt * (s1 + s2) / r3;
3704         for (m = 0; (m < DIM); m++)
3705         {
3706             f_i[m] = k1 * r_ij[m] + k3 * r_ik[m];
3707             f_k[m] = k2 * r_kj[m] - k3 * r_ik[m];
3708             f_j[m] = -f_i[m] - f_k[m];
3709         }
3710
3711         for (m = 0; (m < DIM); m++) /*  12      */
3712         {
3713             f[ai][m] += f_i[m];
3714             f[aj][m] += f_j[m];
3715             f[ak][m] += f_k[m];
3716         }
3717
3718         if (computeVirial(flavor))
3719         {
3720             if (g)
3721             {
3722                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3723
3724                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3725                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3726                 t1 = IVEC2IS(dt_ij);
3727                 t2 = IVEC2IS(dt_kj);
3728             }
3729             rvec_inc(fshift[t1], f_i);
3730             rvec_inc(fshift[CENTRAL], f_j);
3731             rvec_inc(fshift[t2], f_k); /* 9 */
3732         }
3733         /* 163 TOTAL    */
3734     }
3735     return vtot;
3736 }
3737
3738 /*! \brief Computes the potential and force for a tabulated potential */
3739 real bonded_tab(const char*          type,
3740                 int                  table_nr,
3741                 const bondedtable_t* table,
3742                 real                 kA,
3743                 real                 kB,
3744                 real                 r,
3745                 real                 lambda,
3746                 real*                V,
3747                 real*                F)
3748 {
3749     real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3750     int  n0, nnn;
3751     real dvdlambda;
3752
3753     k = (1.0 - lambda) * kA + lambda * kB;
3754
3755     tabscale = table->scale;
3756     VFtab    = table->data;
3757
3758     rt = r * tabscale;
3759     n0 = static_cast<int>(rt);
3760     if (n0 >= table->n)
3761     {
3762         gmx_fatal(FARGS,
3763                   "A tabulated %s interaction table number %d is out of the table range: r %f, "
3764                   "between table indices %d and %d, table length %d",
3765                   type, table_nr, r, n0, n0 + 1, table->n);
3766     }
3767     eps   = rt - n0;
3768     eps2  = eps * eps;
3769     nnn   = 4 * n0;
3770     Yt    = VFtab[nnn];
3771     Ft    = VFtab[nnn + 1];
3772     Geps  = VFtab[nnn + 2] * eps;
3773     Heps2 = VFtab[nnn + 3] * eps2;
3774     Fp    = Ft + Geps + Heps2;
3775     VV    = Yt + Fp * eps;
3776     FF    = Fp + Geps + 2.0 * Heps2;
3777
3778     *F        = -k * FF * tabscale;
3779     *V        = k * VV;
3780     dvdlambda = (kB - kA) * VV;
3781
3782     return dvdlambda;
3783
3784     /* That was 22 flops */
3785 }
3786
3787 template<BondedKernelFlavor flavor>
3788 real tab_bonds(int             nbonds,
3789                const t_iatom   forceatoms[],
3790                const t_iparams forceparams[],
3791                const rvec      x[],
3792                rvec4           f[],
3793                rvec            fshift[],
3794                const t_pbc*    pbc,
3795                const t_graph*  g,
3796                real            lambda,
3797                real*           dvdlambda,
3798                const t_mdatoms gmx_unused* md,
3799                t_fcdata*                   fcd,
3800                int gmx_unused* global_atom_index)
3801 {
3802     int  i, ki, ai, aj, type, table;
3803     real dr, dr2, fbond, vbond, vtot;
3804     rvec dx;
3805
3806     vtot = 0.0;
3807     for (i = 0; (i < nbonds);)
3808     {
3809         type = forceatoms[i++];
3810         ai   = forceatoms[i++];
3811         aj   = forceatoms[i++];
3812
3813         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3814         dr2 = iprod(dx, dx);                       /*   5               */
3815         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
3816
3817         table = forceparams[type].tab.table;
3818
3819         *dvdlambda += bonded_tab("bond", table, &fcd->bondtab[table], forceparams[type].tab.kA,
3820                                  forceparams[type].tab.kB, dr, lambda, &vbond, &fbond); /*  22 */
3821
3822         if (dr2 == 0.0)
3823         {
3824             continue;
3825         }
3826
3827
3828         vtot += vbond;              /* 1*/
3829         fbond *= gmx::invsqrt(dr2); /*   6              */
3830
3831         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
3832     }                                                                  /* 62 TOTAL      */
3833     return vtot;
3834 }
3835
3836 template<BondedKernelFlavor flavor>
3837 real tab_angles(int             nbonds,
3838                 const t_iatom   forceatoms[],
3839                 const t_iparams forceparams[],
3840                 const rvec      x[],
3841                 rvec4           f[],
3842                 rvec            fshift[],
3843                 const t_pbc*    pbc,
3844                 const t_graph*  g,
3845                 real            lambda,
3846                 real*           dvdlambda,
3847                 const t_mdatoms gmx_unused* md,
3848                 t_fcdata*                   fcd,
3849                 int gmx_unused* global_atom_index)
3850 {
3851     int  i, ai, aj, ak, t1, t2, type, table;
3852     rvec r_ij, r_kj;
3853     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3854     ivec jt, dt_ij, dt_kj;
3855
3856     vtot = 0.0;
3857     for (i = 0; (i < nbonds);)
3858     {
3859         type = forceatoms[i++];
3860         ai   = forceatoms[i++];
3861         aj   = forceatoms[i++];
3862         ak   = forceatoms[i++];
3863
3864         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
3865
3866         table = forceparams[type].tab.table;
3867
3868         *dvdlambda += bonded_tab("angle", table, &fcd->angletab[table], forceparams[type].tab.kA,
3869                                  forceparams[type].tab.kB, theta, lambda, &va, &dVdt); /*  22  */
3870         vtot += va;
3871
3872         cos_theta2 = gmx::square(cos_theta); /*   1             */
3873         if (cos_theta2 < 1)
3874         {
3875             int  m;
3876             real st, sth;
3877             real cik, cii, ckk;
3878             real nrkj2, nrij2;
3879             rvec f_i, f_j, f_k;
3880
3881             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
3882             sth   = st * cos_theta;                      /*   1         */
3883             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
3884             nrij2 = iprod(r_ij, r_ij);
3885
3886             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
3887             cii = sth / nrij2;                      /*  10              */
3888             ckk = sth / nrkj2;                      /*  10              */
3889
3890             for (m = 0; (m < DIM); m++) /*  39          */
3891             {
3892                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
3893                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
3894                 f_j[m] = -f_i[m] - f_k[m];
3895                 f[ai][m] += f_i[m];
3896                 f[aj][m] += f_j[m];
3897                 f[ak][m] += f_k[m];
3898             }
3899
3900             if (computeVirial(flavor))
3901             {
3902                 if (g)
3903                 {
3904                     copy_ivec(SHIFT_IVEC(g, aj), jt);
3905
3906                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3907                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3908                     t1 = IVEC2IS(dt_ij);
3909                     t2 = IVEC2IS(dt_kj);
3910                 }
3911                 rvec_inc(fshift[t1], f_i);
3912                 rvec_inc(fshift[CENTRAL], f_j);
3913                 rvec_inc(fshift[t2], f_k);
3914             }
3915         } /* 169 TOTAL  */
3916     }
3917     return vtot;
3918 }
3919
3920 template<BondedKernelFlavor flavor>
3921 real tab_dihs(int             nbonds,
3922               const t_iatom   forceatoms[],
3923               const t_iparams forceparams[],
3924               const rvec      x[],
3925               rvec4           f[],
3926               rvec            fshift[],
3927               const t_pbc*    pbc,
3928               const t_graph*  g,
3929               real            lambda,
3930               real*           dvdlambda,
3931               const t_mdatoms gmx_unused* md,
3932               t_fcdata*                   fcd,
3933               int gmx_unused* global_atom_index)
3934 {
3935     int  i, type, ai, aj, ak, al, table;
3936     int  t1, t2, t3;
3937     rvec r_ij, r_kj, r_kl, m, n;
3938     real phi, ddphi, vpd, vtot;
3939
3940     vtot = 0.0;
3941     for (i = 0; (i < nbonds);)
3942     {
3943         type = forceatoms[i++];
3944         ai   = forceatoms[i++];
3945         aj   = forceatoms[i++];
3946         ak   = forceatoms[i++];
3947         al   = forceatoms[i++];
3948
3949         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84  */
3950
3951         table = forceparams[type].tab.table;
3952
3953         /* Hopefully phi+M_PI never results in values < 0 */
3954         *dvdlambda += bonded_tab("dihedral", table, &fcd->dihtab[table], forceparams[type].tab.kA,
3955                                  forceparams[type].tab.kB, phi + M_PI, lambda, &vpd, &ddphi);
3956
3957         vtot += vpd;
3958         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x, t1,
3959                            t2, t3); /* 112      */
3960
3961     } /* 227 TOTAL  */
3962
3963     return vtot;
3964 }
3965
3966 struct BondedInteractions
3967 {
3968     BondedFunction function;
3969     int            nrnbIndex;
3970 };
3971
3972 /*! \brief Lookup table of bonded interaction functions
3973  *
3974  * This must have as many entries as interaction_function in ifunc.cpp */
3975 template<BondedKernelFlavor flavor>
3976 const std::array<BondedInteractions, F_NRE> c_bondedInteractionFunctions = {
3977     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_BONDS
3978     BondedInteractions{ g96bonds<flavor>, eNR_BONDS },                    // F_G96BONDS
3979     BondedInteractions{ morse_bonds<flavor>, eNR_MORSE },                 // F_MORSE
3980     BondedInteractions{ cubic_bonds<flavor>, eNR_CUBICBONDS },            // F_CUBICBONDS
3981     BondedInteractions{ unimplemented, -1 },                              // F_CONNBONDS
3982     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_HARMONIC
3983     BondedInteractions{ FENE_bonds<flavor>, eNR_FENEBONDS },              // F_FENEBONDS
3984     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDS
3985     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDSNC
3986     BondedInteractions{ restraint_bonds<flavor>, eNR_RESTRBONDS },        // F_RESTRBONDS
3987     BondedInteractions{ angles<flavor>, eNR_ANGLES },                     // F_ANGLES
3988     BondedInteractions{ g96angles<flavor>, eNR_ANGLES },                  // F_G96ANGLES
3989     BondedInteractions{ restrangles<flavor>, eNR_ANGLES },                // F_RESTRANGLES
3990     BondedInteractions{ linear_angles<flavor>, eNR_ANGLES },              // F_LINEAR_ANGLES
3991     BondedInteractions{ cross_bond_bond<flavor>, eNR_CROSS_BOND_BOND },   // F_CROSS_BOND_BONDS
3992     BondedInteractions{ cross_bond_angle<flavor>, eNR_CROSS_BOND_ANGLE }, // F_CROSS_BOND_ANGLES
3993     BondedInteractions{ urey_bradley<flavor>, eNR_UREY_BRADLEY },         // F_UREY_BRADLEY
3994     BondedInteractions{ quartic_angles<flavor>, eNR_QANGLES },            // F_QUARTIC_ANGLES
3995     BondedInteractions{ tab_angles<flavor>, eNR_TABANGLES },              // F_TABANGLES
3996     BondedInteractions{ pdihs<flavor>, eNR_PROPER },                      // F_PDIHS
3997     BondedInteractions{ rbdihs<flavor>, eNR_RB },                         // F_RBDIHS
3998     BondedInteractions{ restrdihs<flavor>, eNR_PROPER },                  // F_RESTRDIHS
3999     BondedInteractions{ cbtdihs<flavor>, eNR_RB },                        // F_CBTDIHS
4000     BondedInteractions{ rbdihs<flavor>, eNR_FOURDIH },                    // F_FOURDIHS
4001     BondedInteractions{ idihs<flavor>, eNR_IMPROPER },                    // F_IDIHS
4002     BondedInteractions{ pdihs<flavor>, eNR_IMPROPER },                    // F_PIDIHS
4003     BondedInteractions{ tab_dihs<flavor>, eNR_TABDIHS },                  // F_TABDIHS
4004     BondedInteractions{ unimplemented, eNR_CMAP },                        // F_CMAP
4005     BondedInteractions{ unimplemented, -1 },                              // F_GB12_NOLONGERUSED
4006     BondedInteractions{ unimplemented, -1 },                              // F_GB13_NOLONGERUSED
4007     BondedInteractions{ unimplemented, -1 },                              // F_GB14_NOLONGERUSED
4008     BondedInteractions{ unimplemented, -1 },                              // F_GBPOL_NOLONGERUSED
4009     BondedInteractions{ unimplemented, -1 },                       // F_NPSOLVATION_NOLONGERUSED
4010     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJ14
4011     BondedInteractions{ unimplemented, -1 },                       // F_COUL14
4012     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC14_Q
4013     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC_PAIRS_NB
4014     BondedInteractions{ unimplemented, -1 },                       // F_LJ
4015     BondedInteractions{ unimplemented, -1 },                       // F_BHAM
4016     BondedInteractions{ unimplemented, -1 },                       // F_LJ_LR_NOLONGERUSED
4017     BondedInteractions{ unimplemented, -1 },                       // F_BHAM_LR_NOLONGERUSED
4018     BondedInteractions{ unimplemented, -1 },                       // F_DISPCORR
4019     BondedInteractions{ unimplemented, -1 },                       // F_COUL_SR
4020     BondedInteractions{ unimplemented, -1 },                       // F_COUL_LR_NOLONGERUSED
4021     BondedInteractions{ unimplemented, -1 },                       // F_RF_EXCL
4022     BondedInteractions{ unimplemented, -1 },                       // F_COUL_RECIP
4023     BondedInteractions{ unimplemented, -1 },                       // F_LJ_RECIP
4024     BondedInteractions{ unimplemented, -1 },                       // F_DPD
4025     BondedInteractions{ polarize<flavor>, eNR_POLARIZE },          // F_POLARIZATION
4026     BondedInteractions{ water_pol<flavor>, eNR_WPOL },             // F_WATER_POL
4027     BondedInteractions{ thole_pol<flavor>, eNR_THOLE },            // F_THOLE_POL
4028     BondedInteractions{ anharm_polarize<flavor>, eNR_ANHARM_POL }, // F_ANHARM_POL
4029     BondedInteractions{ unimplemented, -1 },                       // F_POSRES
4030     BondedInteractions{ unimplemented, -1 },                       // F_FBPOSRES
4031     BondedInteractions{ ta_disres, eNR_DISRES },                   // F_DISRES
4032     BondedInteractions{ unimplemented, -1 },                       // F_DISRESVIOL
4033     BondedInteractions{ orires, eNR_ORIRES },                      // F_ORIRES
4034     BondedInteractions{ unimplemented, -1 },                       // F_ORIRESDEV
4035     BondedInteractions{ angres<flavor>, eNR_ANGRES },              // F_ANGRES
4036     BondedInteractions{ angresz<flavor>, eNR_ANGRESZ },            // F_ANGRESZ
4037     BondedInteractions{ dihres<flavor>, eNR_DIHRES },              // F_DIHRES
4038     BondedInteractions{ unimplemented, -1 },                       // F_DIHRESVIOL
4039     BondedInteractions{ unimplemented, -1 },                       // F_CONSTR
4040     BondedInteractions{ unimplemented, -1 },                       // F_CONSTRNC
4041     BondedInteractions{ unimplemented, -1 },                       // F_SETTLE
4042     BondedInteractions{ unimplemented, -1 },                       // F_VSITE2
4043     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3
4044     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FD
4045     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FAD
4046     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3OUT
4047     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FD
4048     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FDN
4049     BondedInteractions{ unimplemented, -1 },                       // F_VSITEN
4050     BondedInteractions{ unimplemented, -1 },                       // F_COM_PULL
4051     BondedInteractions{ unimplemented, -1 },                       // F_DENSITYFITTING
4052     BondedInteractions{ unimplemented, -1 },                       // F_EQM
4053     BondedInteractions{ unimplemented, -1 },                       // F_EPOT
4054     BondedInteractions{ unimplemented, -1 },                       // F_EKIN
4055     BondedInteractions{ unimplemented, -1 },                       // F_ETOT
4056     BondedInteractions{ unimplemented, -1 },                       // F_ECONSERVED
4057     BondedInteractions{ unimplemented, -1 },                       // F_TEMP
4058     BondedInteractions{ unimplemented, -1 },                       // F_VTEMP_NOLONGERUSED
4059     BondedInteractions{ unimplemented, -1 },                       // F_PDISPCORR
4060     BondedInteractions{ unimplemented, -1 },                       // F_PRES
4061     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_CONSTR
4062     BondedInteractions{ unimplemented, -1 },                       // F_DVDL
4063     BondedInteractions{ unimplemented, -1 },                       // F_DKDL
4064     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_COUL
4065     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_VDW
4066     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_BONDED
4067     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_RESTRAINT
4068     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_TEMPERATURE
4069 };
4070
4071 /*! \brief List of instantiated BondedInteractions list */
4072 const gmx::EnumerationArray<BondedKernelFlavor, std::array<BondedInteractions, F_NRE>> c_bondedInteractionFunctionsPerFlavor = {
4073     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesSimdWhenAvailable>,
4074     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesNoSimd>,
4075     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>,
4076     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndEnergy>
4077 };
4078
4079 //! \endcond
4080
4081 } // namespace
4082
4083 real calculateSimpleBond(const int            ftype,
4084                          const int            numForceatoms,
4085                          const t_iatom        forceatoms[],
4086                          const t_iparams      forceparams[],
4087                          const rvec           x[],
4088                          rvec4                f[],
4089                          rvec                 fshift[],
4090                          const struct t_pbc*  pbc,
4091                          const struct t_graph gmx_unused* g,
4092                          const real                       lambda,
4093                          real*                            dvdlambda,
4094                          const t_mdatoms*                 md,
4095                          t_fcdata*                        fcd,
4096                          int gmx_unused*          global_atom_index,
4097                          const BondedKernelFlavor bondedKernelFlavor)
4098 {
4099     const BondedInteractions& bonded = c_bondedInteractionFunctionsPerFlavor[bondedKernelFlavor][ftype];
4100
4101     real v = bonded.function(numForceatoms, forceatoms, forceparams, x, f, fshift, pbc, g, lambda,
4102                              dvdlambda, md, fcd, global_atom_index);
4103
4104     return v;
4105 }
4106
4107 int nrnbIndex(int ftype)
4108 {
4109     return c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>[ftype].nrnbIndex;
4110 }