src/gromacs/listed_forces/bonded.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief This file defines low-level functions necessary for
  40  * computing energies and forces for listed interactions.
  41  *
  42  * \author Mark Abraham <mark.j.abraham@gmail.com>
  43  *
  44  * \ingroup module_listed_forces
  45  */
  46 #include "gmxpre.h"
  47
  48 #include "bonded.h"
  49
  50 #include "config.h"
  51
  52 #include <cassert>
  53 #include <cmath>
  54
  55 #include <algorithm>
  56
  57 #include "gromacs/gmxlib/nrnb.h"
  58 #include "gromacs/listed_forces/disre.h"
  59 #include "gromacs/listed_forces/orires.h"
  60 #include "gromacs/math/functions.h"
  61 #include "gromacs/math/units.h"
  62 #include "gromacs/math/utilities.h"
  63 #include "gromacs/math/vec.h"
  64 #include "gromacs/mdtypes/fcdata.h"
  65 #include "gromacs/mdtypes/mdatom.h"
  66 #include "gromacs/pbcutil/ishift.h"
  67 #include "gromacs/pbcutil/mshift.h"
  68 #include "gromacs/pbcutil/pbc.h"
  69 #include "gromacs/pbcutil/pbc_simd.h"
  70 #include "gromacs/simd/simd.h"
  71 #include "gromacs/simd/simd_math.h"
  72 #include "gromacs/simd/vector_operations.h"
  73 #include "gromacs/utility/basedefinitions.h"
  74 #include "gromacs/utility/enumerationhelpers.h"
  75 #include "gromacs/utility/fatalerror.h"
  76 #include "gromacs/utility/real.h"
  77 #include "gromacs/utility/smalloc.h"
  78
  79 #include "listed_internal.h"
  80 #include "restcbt.h"
  81
  82 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  83
  84 namespace
  85 {
  86
  87 //! Type of CPU function to compute a bonded interaction.
  88 using BondedFunction = real (*)(int              nbonds,
  89                                 const t_iatom    iatoms[],
  90                                 const t_iparams  iparams[],
  91                                 const rvec       x[],
  92                                 rvec4            f[],
  93                                 rvec             fshift[],
  94                                 const t_pbc*     pbc,
  95                                 const t_graph*   g,
  96                                 real             lambda,
  97                                 real*            dvdlambda,
  98                                 const t_mdatoms* md,
  99                                 t_fcdata*        fcd,
 100                                 int*             ddgatindex);
 101
 102 /*! \brief Mysterious CMAP coefficient matrix */
 103 const int cmap_coeff_matrix[] = {
 104     1,  0,  -3, 2,  0,  0, 0,  0,  -3, 0,  9,  -6, 2, 0,  -6, 4,  0,  0,  0, 0,  0, 0, 0,  0,
 105     3,  0,  -9, 6,  -2, 0, 6,  -4, 0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  9, -6, 0, 0, -6, 4,
 106     0,  0,  3,  -2, 0,  0, 0,  0,  0,  0,  -9, 6,  0, 0,  6,  -4, 0,  0,  0, 0,  1, 0, -3, 2,
 107     -2, 0,  6,  -4, 1,  0, -3, 2,  0,  0,  0,  0,  0, 0,  0,  0,  -1, 0,  3, -2, 1, 0, -3, 2,
 108     0,  0,  0,  0,  0,  0, 0,  0,  0,  0,  -3, 2,  0, 0,  3,  -2, 0,  0,  0, 0,  0, 0, 3,  -2,
 109     0,  0,  -6, 4,  0,  0, 3,  -2, 0,  1,  -2, 1,  0, 0,  0,  0,  0,  -3, 6, -3, 0, 2, -4, 2,
 110     0,  0,  0,  0,  0,  0, 0,  0,  0,  3,  -6, 3,  0, -2, 4,  -2, 0,  0,  0, 0,  0, 0, 0,  0,
 111     0,  0,  -3, 3,  0,  0, 2,  -2, 0,  0,  -1, 1,  0, 0,  0,  0,  0,  0,  3, -3, 0, 0, -2, 2,
 112     0,  0,  0,  0,  0,  1, -2, 1,  0,  -2, 4,  -2, 0, 1,  -2, 1,  0,  0,  0, 0,  0, 0, 0,  0,
 113     0,  -1, 2,  -1, 0,  1, -2, 1,  0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  1, -1, 0, 0, -1, 1,
 114     0,  0,  0,  0,  0,  0, -1, 1,  0,  0,  2,  -2, 0, 0,  -1, 1
 115 };
 116
 117
 118 /*! \brief Compute dx = xi - xj, modulo PBC if non-NULL
 119  *
 120  * \todo This kind of code appears in many places. Consolidate it */
 121 int pbc_rvec_sub(const t_pbc* pbc, const rvec xi, const rvec xj, rvec dx)
 122 {
 123     if (pbc)
 124     {
 125         return pbc_dx_aiuc(pbc, xi, xj, dx);
 126     }
 127     else
 128     {
 129         rvec_sub(xi, xj, dx);
 130         return CENTRAL;
 131     }
 132 }
 133
 134 } // namespace
 135
 136 //! \cond
 137 real bond_angle(const rvec   xi,
 138                 const rvec   xj,
 139                 const rvec   xk,
 140                 const t_pbc* pbc,
 141                 rvec         r_ij,
 142                 rvec         r_kj,
 143                 real*        costh,
 144                 int*         t1,
 145                 int*         t2)
 146 /* Return value is the angle between the bonds i-j and j-k */
 147 {
 148     /* 41 FLOPS */
 149     real th;
 150
 151     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 152     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 153
 154     *costh = cos_angle(r_ij, r_kj); /* 25               */
 155     th     = std::acos(*costh);     /* 10               */
 156     /* 41 TOTAL */
 157     return th;
 158 }
 159
 160 real dih_angle(const rvec   xi,
 161                const rvec   xj,
 162                const rvec   xk,
 163                const rvec   xl,
 164                const t_pbc* pbc,
 165                rvec         r_ij,
 166                rvec         r_kj,
 167                rvec         r_kl,
 168                rvec         m,
 169                rvec         n,
 170                int*         t1,
 171                int*         t2,
 172                int*         t3)
 173 {
 174     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
 175     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 176     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
 177
 178     cprod(r_ij, r_kj, m);        /*  9        */
 179     cprod(r_kj, r_kl, n);        /*  9          */
 180     real phi  = gmx_angle(m, n); /* 49 (assuming 25 for atan2) */
 181     real ipr  = iprod(r_ij, n);  /*  5        */
 182     real sign = (ipr < 0.0) ? -1.0 : 1.0;
 183     phi       = sign * phi; /*  1               */
 184     /* 82 TOTAL */
 185     return phi;
 186 }
 187 //! \endcond
 188
 189 void make_dp_periodic(real* dp) /* 1 flop? */
 190 {
 191     /* dp cannot be outside (-pi,pi) */
 192     if (*dp >= M_PI)
 193     {
 194         *dp -= 2 * M_PI;
 195     }
 196     else if (*dp < -M_PI)
 197     {
 198         *dp += 2 * M_PI;
 199     }
 200 }
 201
 202 namespace
 203 {
 204
 205 /*! \brief Spreads and accumulates the bonded forces to the two atoms and adds the virial contribution when needed
 206  *
 207  * When \p g==nullptr, \p shiftIndex is used as the periodic shift.
 208  * When \p g!=nullptr, the graph is used to compute the periodic shift.
 209  */
 210 template<BondedKernelFlavor flavor>
 211 inline void spreadBondForces(const real     bondForce,
 212                              const rvec     dx,
 213                              const int      ai,
 214                              const int      aj,
 215                              rvec4*         f,
 216                              int            shiftIndex,
 217                              const t_graph* g,
 218                              rvec*          fshift)
 219 {
 220     if (computeVirial(flavor) && g)
 221     {
 222         ivec dt;
 223         ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 224         shiftIndex = IVEC2IS(dt);
 225     }
 226
 227     for (int m = 0; m < DIM; m++) /*  15          */
 228     {
 229         const real fij = bondForce * dx[m];
 230         f[ai][m] += fij;
 231         f[aj][m] -= fij;
 232         if (computeVirial(flavor))
 233         {
 234             fshift[shiftIndex][m] += fij;
 235             fshift[CENTRAL][m] -= fij;
 236         }
 237     }
 238 }
 239
 240 /*! \brief Morse potential bond
 241  *
 242  * By Frank Everdij. Three parameters needed:
 243  *
 244  * b0 = equilibrium distance in nm
 245  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 246  * cb = well depth in kJ/mol
 247  *
 248  * Note: the potential is referenced to be +cb at infinite separation
 249  *       and zero at the equilibrium distance!
 250  */
 251 template<BondedKernelFlavor flavor>
 252 real morse_bonds(int             nbonds,
 253                  const t_iatom   forceatoms[],
 254                  const t_iparams forceparams[],
 255                  const rvec      x[],
 256                  rvec4           f[],
 257                  rvec            fshift[],
 258                  const t_pbc*    pbc,
 259                  const t_graph*  g,
 260                  real            lambda,
 261                  real*           dvdlambda,
 262                  const t_mdatoms gmx_unused* md,
 263                  t_fcdata gmx_unused* fcd,
 264                  int gmx_unused* global_atom_index)
 265 {
 266     const real one = 1.0;
 267     const real two = 2.0;
 268     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, vtot;
 269     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 270     rvec       dx;
 271     int        i, ki, type, ai, aj;
 272
 273     vtot = 0.0;
 274     for (i = 0; (i < nbonds);)
 275     {
 276         type = forceatoms[i++];
 277         ai   = forceatoms[i++];
 278         aj   = forceatoms[i++];
 279
 280         b0A = forceparams[type].morse.b0A;
 281         beA = forceparams[type].morse.betaA;
 282         cbA = forceparams[type].morse.cbA;
 283
 284         b0B = forceparams[type].morse.b0B;
 285         beB = forceparams[type].morse.betaB;
 286         cbB = forceparams[type].morse.cbB;
 287
 288         L1 = one - lambda;            /* 1 */
 289         b0 = L1 * b0A + lambda * b0B; /* 3 */
 290         be = L1 * beA + lambda * beB; /* 3 */
 291         cb = L1 * cbA + lambda * cbB; /* 3 */
 292
 293         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 294         dr2  = iprod(dx, dx);                       /*   5          */
 295         dr   = dr2 * gmx::invsqrt(dr2);             /*  10          */
 296         temp = std::exp(-be * (dr - b0));           /*  12          */
 297
 298         if (temp == one)
 299         {
 300             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 301             *dvdlambda += cbB - cbA;
 302             continue;
 303         }
 304
 305         omtemp   = one - temp;                                      /*   1          */
 306         cbomtemp = cb * omtemp;                                     /*   1          */
 307         vbond    = cbomtemp * omtemp;                               /*   1          */
 308         fbond    = -two * be * temp * cbomtemp * gmx::invsqrt(dr2); /*   9          */
 309         vtot += vbond;                                              /*   1          */
 310
 311         *dvdlambda += (cbB - cbA) * omtemp * omtemp
 312                       - (2 - 2 * omtemp) * omtemp * cb * ((b0B - b0A) * be - (beB - beA) * (dr - b0)); /* 15 */
 313
 314         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 315     }                                                                  /*  83 TOTAL    */
 316     return vtot;
 317 }
 318
 319 //! \cond
 320 template<BondedKernelFlavor flavor>
 321 real cubic_bonds(int             nbonds,
 322                  const t_iatom   forceatoms[],
 323                  const t_iparams forceparams[],
 324                  const rvec      x[],
 325                  rvec4           f[],
 326                  rvec            fshift[],
 327                  const t_pbc*    pbc,
 328                  const t_graph*  g,
 329                  real gmx_unused lambda,
 330                  real gmx_unused* dvdlambda,
 331                  const t_mdatoms gmx_unused* md,
 332                  t_fcdata gmx_unused* fcd,
 333                  int gmx_unused* global_atom_index)
 334 {
 335     const real three = 3.0;
 336     const real two   = 2.0;
 337     real       kb, b0, kcub;
 338     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, vtot;
 339     rvec       dx;
 340     int        i, ki, type, ai, aj;
 341
 342     vtot = 0.0;
 343     for (i = 0; (i < nbonds);)
 344     {
 345         type = forceatoms[i++];
 346         ai   = forceatoms[i++];
 347         aj   = forceatoms[i++];
 348
 349         b0   = forceparams[type].cubic.b0;
 350         kb   = forceparams[type].cubic.kb;
 351         kcub = forceparams[type].cubic.kcub;
 352
 353         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 354         dr2 = iprod(dx, dx);                       /*   5          */
 355
 356         if (dr2 == 0.0)
 357         {
 358             continue;
 359         }
 360
 361         dr     = dr2 * gmx::invsqrt(dr2); /*  10          */
 362         dist   = dr - b0;
 363         kdist  = kb * dist;
 364         kdist2 = kdist * dist;
 365
 366         vbond = kdist2 + kcub * kdist2 * dist;
 367         fbond = -(two * kdist + three * kdist2 * kcub) / dr;
 368
 369         vtot += vbond; /* 21 */
 370
 371         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 372     }                                                                  /*  54 TOTAL    */
 373     return vtot;
 374 }
 375
 376 template<BondedKernelFlavor flavor>
 377 real FENE_bonds(int             nbonds,
 378                 const t_iatom   forceatoms[],
 379                 const t_iparams forceparams[],
 380                 const rvec      x[],
 381                 rvec4           f[],
 382                 rvec            fshift[],
 383                 const t_pbc*    pbc,
 384                 const t_graph*  g,
 385                 real gmx_unused lambda,
 386                 real gmx_unused* dvdlambda,
 387                 const t_mdatoms gmx_unused* md,
 388                 t_fcdata gmx_unused* fcd,
 389                 int*                 global_atom_index)
 390 {
 391     const real half = 0.5;
 392     const real one  = 1.0;
 393     real       bm, kb;
 394     real       dr2, bm2, omdr2obm2, fbond, vbond, vtot;
 395     rvec       dx;
 396     int        i, ki, type, ai, aj;
 397
 398     vtot = 0.0;
 399     for (i = 0; (i < nbonds);)
 400     {
 401         type = forceatoms[i++];
 402         ai   = forceatoms[i++];
 403         aj   = forceatoms[i++];
 404
 405         bm = forceparams[type].fene.bm;
 406         kb = forceparams[type].fene.kb;
 407
 408         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 409         dr2 = iprod(dx, dx);                       /*   5          */
 410
 411         if (dr2 == 0.0)
 412         {
 413             continue;
 414         }
 415
 416         bm2 = bm * bm;
 417
 418         if (dr2 >= bm2)
 419         {
 420             gmx_fatal(FARGS, "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d", dr2, bm2,
 421                       glatnr(global_atom_index, ai), glatnr(global_atom_index, aj));
 422         }
 423
 424         omdr2obm2 = one - dr2 / bm2;
 425
 426         vbond = -half * kb * bm2 * std::log(omdr2obm2);
 427         fbond = -kb / omdr2obm2;
 428
 429         vtot += vbond; /* 35 */
 430
 431         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 432     }                                                                  /*  58 TOTAL    */
 433     return vtot;
 434 }
 435
 436 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
 437 {
 438     const real half = 0.5;
 439     real       L1, kk, x0, dx, dx2;
 440     real       v, f, dvdlambda;
 441
 442     L1 = 1.0 - lambda;
 443     kk = L1 * kA + lambda * kB;
 444     x0 = L1 * xA + lambda * xB;
 445
 446     dx  = x - x0;
 447     dx2 = dx * dx;
 448
 449     f         = -kk * dx;
 450     v         = half * kk * dx2;
 451     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
 452
 453     *F = f;
 454     *V = v;
 455
 456     return dvdlambda;
 457
 458     /* That was 19 flops */
 459 }
 460
 461
 462 template<BondedKernelFlavor flavor>
 463 real bonds(int             nbonds,
 464            const t_iatom   forceatoms[],
 465            const t_iparams forceparams[],
 466            const rvec      x[],
 467            rvec4           f[],
 468            rvec            fshift[],
 469            const t_pbc*    pbc,
 470            const t_graph*  g,
 471            real            lambda,
 472            real*           dvdlambda,
 473            const t_mdatoms gmx_unused* md,
 474            t_fcdata gmx_unused* fcd,
 475            int gmx_unused* global_atom_index)
 476 {
 477     int  i, ki, ai, aj, type;
 478     real dr, dr2, fbond, vbond, vtot;
 479     rvec dx;
 480
 481     vtot = 0.0;
 482     for (i = 0; (i < nbonds);)
 483     {
 484         type = forceatoms[i++];
 485         ai   = forceatoms[i++];
 486         aj   = forceatoms[i++];
 487
 488         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 489         dr2 = iprod(dx, dx);                       /*   5               */
 490         dr  = std::sqrt(dr2);                      /*  10               */
 491
 492         *dvdlambda += harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
 493                                forceparams[type].harmonic.rA, forceparams[type].harmonic.rB, dr,
 494                                lambda, &vbond, &fbond); /*  19  */
 495
 496         if (dr2 == 0.0)
 497         {
 498             continue;
 499         }
 500
 501
 502         vtot += vbond;              /* 1*/
 503         fbond *= gmx::invsqrt(dr2); /*   6              */
 504
 505         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 506     }                                                                  /* 59 TOTAL      */
 507     return vtot;
 508 }
 509
 510 template<BondedKernelFlavor flavor>
 511 real restraint_bonds(int             nbonds,
 512                      const t_iatom   forceatoms[],
 513                      const t_iparams forceparams[],
 514                      const rvec      x[],
 515                      rvec4           f[],
 516                      rvec            fshift[],
 517                      const t_pbc*    pbc,
 518                      const t_graph*  g,
 519                      real            lambda,
 520                      real*           dvdlambda,
 521                      const t_mdatoms gmx_unused* md,
 522                      t_fcdata gmx_unused* fcd,
 523                      int gmx_unused* global_atom_index)
 524 {
 525     int  i, ki, ai, aj, type;
 526     real dr, dr2, fbond, vbond, vtot;
 527     real L1;
 528     real low, dlow, up1, dup1, up2, dup2, k, dk;
 529     real drh, drh2;
 530     rvec dx;
 531
 532     L1 = 1.0 - lambda;
 533
 534     vtot = 0.0;
 535     for (i = 0; (i < nbonds);)
 536     {
 537         type = forceatoms[i++];
 538         ai   = forceatoms[i++];
 539         aj   = forceatoms[i++];
 540
 541         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 542         dr2 = iprod(dx, dx);                       /*   5               */
 543         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 544
 545         low  = L1 * forceparams[type].restraint.lowA + lambda * forceparams[type].restraint.lowB;
 546         dlow = -forceparams[type].restraint.lowA + forceparams[type].restraint.lowB;
 547         up1  = L1 * forceparams[type].restraint.up1A + lambda * forceparams[type].restraint.up1B;
 548         dup1 = -forceparams[type].restraint.up1A + forceparams[type].restraint.up1B;
 549         up2  = L1 * forceparams[type].restraint.up2A + lambda * forceparams[type].restraint.up2B;
 550         dup2 = -forceparams[type].restraint.up2A + forceparams[type].restraint.up2B;
 551         k    = L1 * forceparams[type].restraint.kA + lambda * forceparams[type].restraint.kB;
 552         dk   = -forceparams[type].restraint.kA + forceparams[type].restraint.kB;
 553         /* 24 */
 554
 555         if (dr < low)
 556         {
 557             drh   = dr - low;
 558             drh2  = drh * drh;
 559             vbond = 0.5 * k * drh2;
 560             fbond = -k * drh;
 561             *dvdlambda += 0.5 * dk * drh2 - k * dlow * drh;
 562         } /* 11 */
 563         else if (dr <= up1)
 564         {
 565             vbond = 0;
 566             fbond = 0;
 567         }
 568         else if (dr <= up2)
 569         {
 570             drh   = dr - up1;
 571             drh2  = drh * drh;
 572             vbond = 0.5 * k * drh2;
 573             fbond = -k * drh;
 574             *dvdlambda += 0.5 * dk * drh2 - k * dup1 * drh;
 575         } /* 11 */
 576         else
 577         {
 578             drh   = dr - up2;
 579             vbond = k * (up2 - up1) * (0.5 * (up2 - up1) + drh);
 580             fbond = -k * (up2 - up1);
 581             *dvdlambda += dk * (up2 - up1) * (0.5 * (up2 - up1) + drh)
 582                           + k * (dup2 - dup1) * (up2 - up1 + drh) - k * (up2 - up1) * dup2;
 583         }
 584
 585         if (dr2 == 0.0)
 586         {
 587             continue;
 588         }
 589
 590         vtot += vbond;              /* 1*/
 591         fbond *= gmx::invsqrt(dr2); /*   6              */
 592
 593         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 594     }                                                                  /* 59 TOTAL      */
 595
 596     return vtot;
 597 }
 598
 599 template<BondedKernelFlavor flavor>
 600 real polarize(int              nbonds,
 601               const t_iatom    forceatoms[],
 602               const t_iparams  forceparams[],
 603               const rvec       x[],
 604               rvec4            f[],
 605               rvec             fshift[],
 606               const t_pbc*     pbc,
 607               const t_graph*   g,
 608               real             lambda,
 609               real*            dvdlambda,
 610               const t_mdatoms* md,
 611               t_fcdata gmx_unused* fcd,
 612               int gmx_unused* global_atom_index)
 613 {
 614     int  i, ki, ai, aj, type;
 615     real dr, dr2, fbond, vbond, vtot, ksh;
 616     rvec dx;
 617
 618     vtot = 0.0;
 619     for (i = 0; (i < nbonds);)
 620     {
 621         type = forceatoms[i++];
 622         ai   = forceatoms[i++];
 623         aj   = forceatoms[i++];
 624         ksh  = gmx::square(md->chargeA[aj]) * ONE_4PI_EPS0 / forceparams[type].polarize.alpha;
 625
 626         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 627         dr2 = iprod(dx, dx);                       /*   5               */
 628         dr  = std::sqrt(dr2);                      /*  10               */
 629
 630         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 631
 632         if (dr2 == 0.0)
 633         {
 634             continue;
 635         }
 636
 637         vtot += vbond;              /* 1*/
 638         fbond *= gmx::invsqrt(dr2); /*   6              */
 639
 640         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 641     }                                                                  /* 59 TOTAL      */
 642     return vtot;
 643 }
 644
 645 template<BondedKernelFlavor flavor>
 646 real anharm_polarize(int              nbonds,
 647                      const t_iatom    forceatoms[],
 648                      const t_iparams  forceparams[],
 649                      const rvec       x[],
 650                      rvec4            f[],
 651                      rvec             fshift[],
 652                      const t_pbc*     pbc,
 653                      const t_graph*   g,
 654                      real             lambda,
 655                      real*            dvdlambda,
 656                      const t_mdatoms* md,
 657                      t_fcdata gmx_unused* fcd,
 658                      int gmx_unused* global_atom_index)
 659 {
 660     int  i, ki, ai, aj, type;
 661     real dr, dr2, fbond, vbond, vtot, ksh, khyp, drcut, ddr, ddr3;
 662     rvec dx;
 663
 664     vtot = 0.0;
 665     for (i = 0; (i < nbonds);)
 666     {
 667         type = forceatoms[i++];
 668         ai   = forceatoms[i++];
 669         aj   = forceatoms[i++];
 670         ksh = gmx::square(md->chargeA[aj]) * ONE_4PI_EPS0 / forceparams[type].anharm_polarize.alpha; /* 7*/
 671         khyp  = forceparams[type].anharm_polarize.khyp;
 672         drcut = forceparams[type].anharm_polarize.drcut;
 673
 674         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 675         dr2 = iprod(dx, dx);                       /*   5               */
 676         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 677
 678         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 679
 680         if (dr2 == 0.0)
 681         {
 682             continue;
 683         }
 684
 685         if (dr > drcut)
 686         {
 687             ddr  = dr - drcut;
 688             ddr3 = ddr * ddr * ddr;
 689             vbond += khyp * ddr * ddr3;
 690             fbond -= 4 * khyp * ddr3;
 691         }
 692         fbond *= gmx::invsqrt(dr2); /*   6              */
 693         vtot += vbond;              /* 1*/
 694
 695         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 696     }                                                                  /* 72 TOTAL      */
 697     return vtot;
 698 }
 699
 700 template<BondedKernelFlavor flavor>
 701 real water_pol(int             nbonds,
 702                const t_iatom   forceatoms[],
 703                const t_iparams forceparams[],
 704                const rvec      x[],
 705                rvec4           f[],
 706                rvec gmx_unused fshift[],
 707                const t_pbc gmx_unused* pbc,
 708                const t_graph gmx_unused* g,
 709                real gmx_unused lambda,
 710                real gmx_unused* dvdlambda,
 711                const t_mdatoms gmx_unused* md,
 712                t_fcdata gmx_unused* fcd,
 713                int gmx_unused* global_atom_index)
 714 {
 715     /* This routine implements anisotropic polarizibility for water, through
 716      * a shell connected to a dummy with spring constant that differ in the
 717      * three spatial dimensions in the molecular frame.
 718      */
 719     int  i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
 720     ivec dt;
 721     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 722     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 723
 724     vtot = 0.0;
 725     if (nbonds > 0)
 726     {
 727         type0  = forceatoms[0];
 728         aS     = forceatoms[5];
 729         qS     = md->chargeA[aS];
 730         kk[XX] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_x;
 731         kk[YY] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_y;
 732         kk[ZZ] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_z;
 733         r_HH   = 1.0 / forceparams[type0].wpol.rHH;
 734         for (i = 0; (i < nbonds); i += 6)
 735         {
 736             type = forceatoms[i];
 737             if (type != type0)
 738             {
 739                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d", type, type0,
 740                           __FILE__, __LINE__);
 741             }
 742             aO  = forceatoms[i + 1];
 743             aH1 = forceatoms[i + 2];
 744             aH2 = forceatoms[i + 3];
 745             aD  = forceatoms[i + 4];
 746             aS  = forceatoms[i + 5];
 747
 748             /* Compute vectors describing the water frame */
 749             pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
 750             pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
 751             pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
 752             pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
 753             ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
 754             cprod(dOH1, dOH2, nW);
 755
 756             /* Compute inverse length of normal vector
 757              * (this one could be precomputed, but I'm too lazy now)
 758              */
 759             r_nW = gmx::invsqrt(iprod(nW, nW));
 760             /* This is for precision, but does not make a big difference,
 761              * it can go later.
 762              */
 763             r_OD = gmx::invsqrt(iprod(dOD, dOD));
 764
 765             /* Normalize the vectors in the water frame */
 766             svmul(r_nW, nW, nW);
 767             svmul(r_HH, dHH, dHH);
 768             svmul(r_OD, dOD, dOD);
 769
 770             /* Compute displacement of shell along components of the vector */
 771             dx[ZZ] = iprod(dDS, dOD);
 772             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 773             for (m = 0; (m < DIM); m++)
 774             {
 775                 proj[m] = dDS[m] - dx[ZZ] * dOD[m];
 776             }
 777
 778             /*dx[XX] = iprod(dDS,nW);
 779                dx[YY] = iprod(dDS,dHH);*/
 780             dx[XX] = iprod(proj, nW);
 781             for (m = 0; (m < DIM); m++)
 782             {
 783                 proj[m] -= dx[XX] * nW[m];
 784             }
 785             dx[YY] = iprod(proj, dHH);
 786             /* Now compute the forces and energy */
 787             kdx[XX] = kk[XX] * dx[XX];
 788             kdx[YY] = kk[YY] * dx[YY];
 789             kdx[ZZ] = kk[ZZ] * dx[ZZ];
 790             vtot += iprod(dx, kdx);
 791
 792             if (computeVirial(flavor) && g)
 793             {
 794                 ivec_sub(SHIFT_IVEC(g, aS), SHIFT_IVEC(g, aD), dt);
 795                 ki = IVEC2IS(dt);
 796             }
 797
 798             for (m = 0; (m < DIM); m++)
 799             {
 800                 /* This is a tensor operation but written out for speed */
 801                 tx  = nW[m] * kdx[XX];
 802                 ty  = dHH[m] * kdx[YY];
 803                 tz  = dOD[m] * kdx[ZZ];
 804                 fij = -tx - ty - tz;
 805                 f[aS][m] += fij;
 806                 f[aD][m] -= fij;
 807                 if (computeVirial(flavor))
 808                 {
 809                     fshift[ki][m] += fij;
 810                     fshift[CENTRAL][m] -= fij;
 811                 }
 812             }
 813         }
 814     }
 815     return 0.5 * vtot;
 816 }
 817
 818 template<BondedKernelFlavor flavor>
 819 static real
 820 do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj, const t_pbc* pbc, real qq, rvec fshift[], real afac)
 821 {
 822     rvec r12;
 823     real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff;
 824     int  m, t;
 825
 826     t = pbc_rvec_sub(pbc, xi, xj, r12); /*  3 */
 827
 828     r12sq  = iprod(r12, r12);                                                     /*  5 */
 829     r12_1  = gmx::invsqrt(r12sq);                                                 /*  5 */
 830     r12bar = afac / r12_1;                                                        /*  5 */
 831     v0     = qq * ONE_4PI_EPS0 * r12_1;                                           /*  2 */
 832     ebar   = std::exp(-r12bar);                                                   /*  5 */
 833     v1     = (1 - (1 + 0.5 * r12bar) * ebar);                                     /*  4 */
 834     fscal  = ((v0 * r12_1) * v1 - v0 * 0.5 * afac * ebar * (r12bar + 1)) * r12_1; /* 9 */
 835
 836     for (m = 0; (m < DIM); m++)
 837     {
 838         fff = fscal * r12[m];
 839         fi[m] += fff;
 840         fj[m] -= fff;
 841         if (computeVirial(flavor))
 842         {
 843             fshift[t][m] += fff;
 844             fshift[CENTRAL][m] -= fff;
 845         }
 846     } /* 15 */
 847
 848     return v0 * v1; /* 1 */
 849     /* 54 */
 850 }
 851
 852 template<BondedKernelFlavor flavor>
 853 real thole_pol(int             nbonds,
 854                const t_iatom   forceatoms[],
 855                const t_iparams forceparams[],
 856                const rvec      x[],
 857                rvec4           f[],
 858                rvec            fshift[],
 859                const t_pbc*    pbc,
 860                const t_graph gmx_unused* g,
 861                real gmx_unused lambda,
 862                real gmx_unused* dvdlambda,
 863                const t_mdatoms* md,
 864                t_fcdata gmx_unused* fcd,
 865                int gmx_unused* global_atom_index)
 866 {
 867     /* Interaction between two pairs of particles with opposite charge */
 868     int  i, type, a1, da1, a2, da2;
 869     real q1, q2, qq, a, al1, al2, afac;
 870     real V = 0;
 871
 872     for (i = 0; (i < nbonds);)
 873     {
 874         type = forceatoms[i++];
 875         a1   = forceatoms[i++];
 876         da1  = forceatoms[i++];
 877         a2   = forceatoms[i++];
 878         da2  = forceatoms[i++];
 879         q1   = md->chargeA[da1];
 880         q2   = md->chargeA[da2];
 881         a    = forceparams[type].thole.a;
 882         al1  = forceparams[type].thole.alpha1;
 883         al2  = forceparams[type].thole.alpha2;
 884         qq   = q1 * q2;
 885         afac = a * gmx::invsixthroot(al1 * al2);
 886         V += do_1_thole<flavor>(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 887         V += do_1_thole<flavor>(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 888         V += do_1_thole<flavor>(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 889         V += do_1_thole<flavor>(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 890     }
 891     /* 290 flops */
 892     return V;
 893 }
 894
 895 // Avoid gcc 386 -O3 code generation bug in this function (see Redmine
 896 // #3205 for more information)
 897 #if defined(__GNUC__) && defined(__i386__) && defined(__OPTIMIZE__)
 898 #    pragma GCC push_options
 899 #    pragma GCC optimize("O2")
 900 #    define avoid_gcc_i386_o3_code_generation_bug
 901 #endif
 902
 903 template<BondedKernelFlavor flavor>
 904 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
 905 angles(int             nbonds,
 906        const t_iatom   forceatoms[],
 907        const t_iparams forceparams[],
 908        const rvec      x[],
 909        rvec4           f[],
 910        rvec            fshift[],
 911        const t_pbc*    pbc,
 912        const t_graph*  g,
 913        real            lambda,
 914        real*           dvdlambda,
 915        const t_mdatoms gmx_unused* md,
 916        t_fcdata gmx_unused* fcd,
 917        int gmx_unused* global_atom_index)
 918 {
 919     int  i, ai, aj, ak, t1, t2, type;
 920     rvec r_ij, r_kj;
 921     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
 922     ivec jt, dt_ij, dt_kj;
 923
 924     vtot = 0.0;
 925     for (i = 0; i < nbonds;)
 926     {
 927         type = forceatoms[i++];
 928         ai   = forceatoms[i++];
 929         aj   = forceatoms[i++];
 930         ak   = forceatoms[i++];
 931
 932         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
 933
 934         *dvdlambda += harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
 935                                forceparams[type].harmonic.rA * DEG2RAD,
 936                                forceparams[type].harmonic.rB * DEG2RAD, theta, lambda, &va, &dVdt); /*  21  */
 937         vtot += va;
 938
 939         cos_theta2 = gmx::square(cos_theta);
 940         if (cos_theta2 < 1)
 941         {
 942             int  m;
 943             real st, sth;
 944             real cik, cii, ckk;
 945             real nrkj2, nrij2;
 946             real nrkj_1, nrij_1;
 947             rvec f_i, f_j, f_k;
 948
 949             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
 950             sth   = st * cos_theta;                      /*   1         */
 951             nrij2 = iprod(r_ij, r_ij);                   /*   5         */
 952             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
 953
 954             nrij_1 = gmx::invsqrt(nrij2); /*  10                */
 955             nrkj_1 = gmx::invsqrt(nrkj2); /*  10                */
 956
 957             cik = st * nrij_1 * nrkj_1;  /*   2         */
 958             cii = sth * nrij_1 * nrij_1; /*   2         */
 959             ckk = sth * nrkj_1 * nrkj_1; /*   2         */
 960
 961             for (m = 0; m < DIM; m++)
 962             { /*  39            */
 963                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
 964                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
 965                 f_j[m] = -f_i[m] - f_k[m];
 966                 f[ai][m] += f_i[m];
 967                 f[aj][m] += f_j[m];
 968                 f[ak][m] += f_k[m];
 969             }
 970             if (computeVirial(flavor))
 971             {
 972                 if (g != nullptr)
 973                 {
 974                     copy_ivec(SHIFT_IVEC(g, aj), jt);
 975
 976                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
 977                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
 978                     t1 = IVEC2IS(dt_ij);
 979                     t2 = IVEC2IS(dt_kj);
 980                 }
 981                 rvec_inc(fshift[t1], f_i);
 982                 rvec_inc(fshift[CENTRAL], f_j);
 983                 rvec_inc(fshift[t2], f_k);
 984             }
 985         } /* 161 TOTAL  */
 986     }
 987
 988     return vtot;
 989 }
 990
 991 #ifdef avoid_gcc_i386_o3_code_generation_bug
 992 #    pragma GCC pop_options
 993 #    undef avoid_gcc_i386_o3_code_generation_bug
 994 #endif
 995
 996 #if GMX_SIMD_HAVE_REAL
 997
 998 /* As angles, but using SIMD to calculate many angles at once.
 999  * This routines does not calculate energies and shift forces.
1000  */
1001 template<BondedKernelFlavor flavor>
1002 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1003 angles(int             nbonds,
1004        const t_iatom   forceatoms[],
1005        const t_iparams forceparams[],
1006        const rvec      x[],
1007        rvec4           f[],
1008        rvec gmx_unused fshift[],
1009        const t_pbc*    pbc,
1010        const t_graph gmx_unused* g,
1011        real gmx_unused lambda,
1012        real gmx_unused* dvdlambda,
1013        const t_mdatoms gmx_unused* md,
1014        t_fcdata gmx_unused* fcd,
1015        int gmx_unused* global_atom_index)
1016 {
1017     const int                                nfa1 = 4;
1018     int                                      i, iu, s;
1019     int                                      type;
1020     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1021     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1022     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1023     alignas(GMX_SIMD_ALIGNMENT) real         coeff[2 * GMX_SIMD_REAL_WIDTH];
1024     SimdReal                                 deg2rad_S(DEG2RAD);
1025     SimdReal                                 xi_S, yi_S, zi_S;
1026     SimdReal                                 xj_S, yj_S, zj_S;
1027     SimdReal                                 xk_S, yk_S, zk_S;
1028     SimdReal                                 k_S, theta0_S;
1029     SimdReal                                 rijx_S, rijy_S, rijz_S;
1030     SimdReal                                 rkjx_S, rkjy_S, rkjz_S;
1031     SimdReal                                 one_S(1.0);
1032     SimdReal min_one_plus_eps_S(-1.0 + 2.0 * GMX_REAL_EPS); // Smallest number > -1
1033
1034     SimdReal                         rij_rkj_S;
1035     SimdReal                         nrij2_S, nrij_1_S;
1036     SimdReal                         nrkj2_S, nrkj_1_S;
1037     SimdReal                         cos_S, invsin_S;
1038     SimdReal                         theta_S;
1039     SimdReal                         st_S, sth_S;
1040     SimdReal                         cik_S, cii_S, ckk_S;
1041     SimdReal                         f_ix_S, f_iy_S, f_iz_S;
1042     SimdReal                         f_kx_S, f_ky_S, f_kz_S;
1043     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1044
1045     set_pbc_simd(pbc, pbc_simd);
1046
1047     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1048     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
1049     {
1050         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1051          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1052          */
1053         iu = i;
1054         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1055         {
1056             type  = forceatoms[iu];
1057             ai[s] = forceatoms[iu + 1];
1058             aj[s] = forceatoms[iu + 2];
1059             ak[s] = forceatoms[iu + 3];
1060
1061             /* At the end fill the arrays with the last atoms and 0 params */
1062             if (i + s * nfa1 < nbonds)
1063             {
1064                 coeff[s]                       = forceparams[type].harmonic.krA;
1065                 coeff[GMX_SIMD_REAL_WIDTH + s] = forceparams[type].harmonic.rA;
1066
1067                 if (iu + nfa1 < nbonds)
1068                 {
1069                     iu += nfa1;
1070                 }
1071             }
1072             else
1073             {
1074                 coeff[s]                       = 0;
1075                 coeff[GMX_SIMD_REAL_WIDTH + s] = 0;
1076             }
1077         }
1078
1079         /* Store the non PBC corrected distances packed and aligned */
1080         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1081         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1082         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1083         rijx_S = xi_S - xj_S;
1084         rijy_S = yi_S - yj_S;
1085         rijz_S = zi_S - zj_S;
1086         rkjx_S = xk_S - xj_S;
1087         rkjy_S = yk_S - yj_S;
1088         rkjz_S = zk_S - zj_S;
1089
1090         k_S      = load<SimdReal>(coeff);
1091         theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * deg2rad_S;
1092
1093         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1094         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1095
1096         rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1097
1098         nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1099         nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1100
1101         nrij_1_S = invsqrt(nrij2_S);
1102         nrkj_1_S = invsqrt(nrkj2_S);
1103
1104         cos_S = rij_rkj_S * nrij_1_S * nrkj_1_S;
1105
1106         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1107          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1108          * This also ensures that rounding errors would cause the argument
1109          * of simdAcos to be < -1.
1110          * Note that we do not take precautions for cos(0)=1, so the outer
1111          * atoms in an angle should not be on top of each other.
1112          */
1113         cos_S = max(cos_S, min_one_plus_eps_S);
1114
1115         theta_S = acos(cos_S);
1116
1117         invsin_S = invsqrt(one_S - cos_S * cos_S);
1118
1119         st_S  = k_S * (theta0_S - theta_S) * invsin_S;
1120         sth_S = st_S * cos_S;
1121
1122         cik_S = st_S * nrij_1_S * nrkj_1_S;
1123         cii_S = sth_S * nrij_1_S * nrij_1_S;
1124         ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1125
1126         f_ix_S = cii_S * rijx_S;
1127         f_ix_S = fnma(cik_S, rkjx_S, f_ix_S);
1128         f_iy_S = cii_S * rijy_S;
1129         f_iy_S = fnma(cik_S, rkjy_S, f_iy_S);
1130         f_iz_S = cii_S * rijz_S;
1131         f_iz_S = fnma(cik_S, rkjz_S, f_iz_S);
1132         f_kx_S = ckk_S * rkjx_S;
1133         f_kx_S = fnma(cik_S, rijx_S, f_kx_S);
1134         f_ky_S = ckk_S * rkjy_S;
1135         f_ky_S = fnma(cik_S, rijy_S, f_ky_S);
1136         f_kz_S = ckk_S * rkjz_S;
1137         f_kz_S = fnma(cik_S, rijz_S, f_kz_S);
1138
1139         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1140         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S,
1141                                  f_iz_S + f_kz_S);
1142         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1143     }
1144
1145     return 0;
1146 }
1147
1148 #endif // GMX_SIMD_HAVE_REAL
1149
1150 template<BondedKernelFlavor flavor>
1151 real linear_angles(int             nbonds,
1152                    const t_iatom   forceatoms[],
1153                    const t_iparams forceparams[],
1154                    const rvec      x[],
1155                    rvec4           f[],
1156                    rvec            fshift[],
1157                    const t_pbc*    pbc,
1158                    const t_graph*  g,
1159                    real            lambda,
1160                    real*           dvdlambda,
1161                    const t_mdatoms gmx_unused* md,
1162                    t_fcdata gmx_unused* fcd,
1163                    int gmx_unused* global_atom_index)
1164 {
1165     int  i, m, ai, aj, ak, t1, t2, type;
1166     rvec f_i, f_j, f_k;
1167     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1168     ivec jt, dt_ij, dt_kj;
1169     rvec r_ij, r_kj, r_ik, dx;
1170
1171     L1   = 1 - lambda;
1172     vtot = 0.0;
1173     for (i = 0; (i < nbonds);)
1174     {
1175         type = forceatoms[i++];
1176         ai   = forceatoms[i++];
1177         aj   = forceatoms[i++];
1178         ak   = forceatoms[i++];
1179
1180         kA   = forceparams[type].linangle.klinA;
1181         kB   = forceparams[type].linangle.klinB;
1182         klin = L1 * kA + lambda * kB;
1183
1184         aA = forceparams[type].linangle.aA;
1185         aB = forceparams[type].linangle.aB;
1186         a  = L1 * aA + lambda * aB;
1187         b  = 1 - a;
1188
1189         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1190         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1191         rvec_sub(r_ij, r_kj, r_ik);
1192
1193         dr2 = 0;
1194         for (m = 0; (m < DIM); m++)
1195         {
1196             dr = -a * r_ij[m] - b * r_kj[m];
1197             dr2 += dr * dr;
1198             dx[m]  = dr;
1199             f_i[m] = a * klin * dr;
1200             f_k[m] = b * klin * dr;
1201             f_j[m] = -(f_i[m] + f_k[m]);
1202             f[ai][m] += f_i[m];
1203             f[aj][m] += f_j[m];
1204             f[ak][m] += f_k[m];
1205         }
1206         va = 0.5 * klin * dr2;
1207         *dvdlambda += 0.5 * (kB - kA) * dr2 + klin * (aB - aA) * iprod(dx, r_ik);
1208
1209         vtot += va;
1210
1211         if (computeVirial(flavor))
1212         {
1213             if (g)
1214             {
1215                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1216
1217                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1218                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1219                 t1 = IVEC2IS(dt_ij);
1220                 t2 = IVEC2IS(dt_kj);
1221             }
1222             rvec_inc(fshift[t1], f_i);
1223             rvec_inc(fshift[CENTRAL], f_j);
1224             rvec_inc(fshift[t2], f_k);
1225         }
1226     } /* 57 TOTAL       */
1227     return vtot;
1228 }
1229
1230 template<BondedKernelFlavor flavor>
1231 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1232 urey_bradley(int             nbonds,
1233              const t_iatom   forceatoms[],
1234              const t_iparams forceparams[],
1235              const rvec      x[],
1236              rvec4           f[],
1237              rvec            fshift[],
1238              const t_pbc*    pbc,
1239              const t_graph*  g,
1240              real            lambda,
1241              real*           dvdlambda,
1242              const t_mdatoms gmx_unused* md,
1243              t_fcdata gmx_unused* fcd,
1244              int gmx_unused* global_atom_index)
1245 {
1246     int  i, m, ai, aj, ak, t1, t2, type, ki;
1247     rvec r_ij, r_kj, r_ik;
1248     real cos_theta, cos_theta2, theta;
1249     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1250     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1251     ivec jt, dt_ij, dt_kj, dt_ik;
1252
1253     vtot = 0.0;
1254     for (i = 0; (i < nbonds);)
1255     {
1256         type = forceatoms[i++];
1257         ai   = forceatoms[i++];
1258         aj   = forceatoms[i++];
1259         ak   = forceatoms[i++];
1260         th0A = forceparams[type].u_b.thetaA * DEG2RAD;
1261         kthA = forceparams[type].u_b.kthetaA;
1262         r13A = forceparams[type].u_b.r13A;
1263         kUBA = forceparams[type].u_b.kUBA;
1264         th0B = forceparams[type].u_b.thetaB * DEG2RAD;
1265         kthB = forceparams[type].u_b.kthetaB;
1266         r13B = forceparams[type].u_b.r13B;
1267         kUBB = forceparams[type].u_b.kUBB;
1268
1269         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1270
1271         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1272         vtot += va;
1273
1274         ki  = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik); /*   3      */
1275         dr2 = iprod(r_ik, r_ik);                     /*   5             */
1276         dr  = dr2 * gmx::invsqrt(dr2);               /*  10             */
1277
1278         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1279
1280         cos_theta2 = gmx::square(cos_theta); /*   1             */
1281         if (cos_theta2 < 1)
1282         {
1283             real st, sth;
1284             real cik, cii, ckk;
1285             real nrkj2, nrij2;
1286             rvec f_i, f_j, f_k;
1287
1288             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1289             sth   = st * cos_theta;                      /*   1         */
1290             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1291             nrij2 = iprod(r_ij, r_ij);
1292
1293             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1294             cii = sth / nrij2;                      /*  10              */
1295             ckk = sth / nrkj2;                      /*  10              */
1296
1297             for (m = 0; (m < DIM); m++) /*  39          */
1298             {
1299                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1300                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1301                 f_j[m] = -f_i[m] - f_k[m];
1302                 f[ai][m] += f_i[m];
1303                 f[aj][m] += f_j[m];
1304                 f[ak][m] += f_k[m];
1305             }
1306             if (computeVirial(flavor))
1307             {
1308                 if (g)
1309                 {
1310                     copy_ivec(SHIFT_IVEC(g, aj), jt);
1311
1312                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1313                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1314                     t1 = IVEC2IS(dt_ij);
1315                     t2 = IVEC2IS(dt_kj);
1316                 }
1317                 rvec_inc(fshift[t1], f_i);
1318                 rvec_inc(fshift[CENTRAL], f_j);
1319                 rvec_inc(fshift[t2], f_k);
1320             }
1321         } /* 161 TOTAL  */
1322         /* Time for the bond calculations */
1323         if (dr2 == 0.0)
1324         {
1325             continue;
1326         }
1327
1328         vtot += vbond;              /* 1*/
1329         fbond *= gmx::invsqrt(dr2); /*   6              */
1330
1331         if (computeVirial(flavor) && g)
1332         {
1333             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), dt_ik);
1334             ki = IVEC2IS(dt_ik);
1335         }
1336         for (m = 0; (m < DIM); m++) /*  15              */
1337         {
1338             fik = fbond * r_ik[m];
1339             f[ai][m] += fik;
1340             f[ak][m] -= fik;
1341             if (computeVirial(flavor))
1342             {
1343                 fshift[ki][m] += fik;
1344                 fshift[CENTRAL][m] -= fik;
1345             }
1346         }
1347     }
1348     return vtot;
1349 }
1350
1351 #if GMX_SIMD_HAVE_REAL
1352
1353 /* As urey_bradley, but using SIMD to calculate many potentials at once.
1354  * This routines does not calculate energies and shift forces.
1355  */
1356 template<BondedKernelFlavor flavor>
1357 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1358 urey_bradley(int             nbonds,
1359              const t_iatom   forceatoms[],
1360              const t_iparams forceparams[],
1361              const rvec      x[],
1362              rvec4           f[],
1363              rvec gmx_unused fshift[],
1364              const t_pbc*    pbc,
1365              const t_graph gmx_unused* g,
1366              real gmx_unused lambda,
1367              real gmx_unused* dvdlambda,
1368              const t_mdatoms gmx_unused* md,
1369              t_fcdata gmx_unused* fcd,
1370              int gmx_unused* global_atom_index)
1371 {
1372     constexpr int                            nfa1 = 4;
1373     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1374     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1375     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1376     alignas(GMX_SIMD_ALIGNMENT) real         coeff[4 * GMX_SIMD_REAL_WIDTH];
1377     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1378
1379     set_pbc_simd(pbc, pbc_simd);
1380
1381     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1382     for (int i = 0; i < nbonds; i += GMX_SIMD_REAL_WIDTH * nfa1)
1383     {
1384         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1385          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1386          */
1387         int iu = i;
1388         for (int s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1389         {
1390             const int type = forceatoms[iu];
1391             ai[s]          = forceatoms[iu + 1];
1392             aj[s]          = forceatoms[iu + 2];
1393             ak[s]          = forceatoms[iu + 3];
1394
1395             /* At the end fill the arrays with the last atoms and 0 params */
1396             if (i + s * nfa1 < nbonds)
1397             {
1398                 coeff[s]                           = forceparams[type].u_b.kthetaA;
1399                 coeff[GMX_SIMD_REAL_WIDTH + s]     = forceparams[type].u_b.thetaA;
1400                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = forceparams[type].u_b.kUBA;
1401                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = forceparams[type].u_b.r13A;
1402
1403                 if (iu + nfa1 < nbonds)
1404                 {
1405                     iu += nfa1;
1406                 }
1407             }
1408             else
1409             {
1410                 coeff[s]                           = 0;
1411                 coeff[GMX_SIMD_REAL_WIDTH + s]     = 0;
1412                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = 0;
1413                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = 0;
1414             }
1415         }
1416
1417         SimdReal xi_S, yi_S, zi_S;
1418         SimdReal xj_S, yj_S, zj_S;
1419         SimdReal xk_S, yk_S, zk_S;
1420
1421         /* Store the non PBC corrected distances packed and aligned */
1422         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1423         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1424         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1425         SimdReal rijx_S = xi_S - xj_S;
1426         SimdReal rijy_S = yi_S - yj_S;
1427         SimdReal rijz_S = zi_S - zj_S;
1428         SimdReal rkjx_S = xk_S - xj_S;
1429         SimdReal rkjy_S = yk_S - yj_S;
1430         SimdReal rkjz_S = zk_S - zj_S;
1431         SimdReal rikx_S = xi_S - xk_S;
1432         SimdReal riky_S = yi_S - yk_S;
1433         SimdReal rikz_S = zi_S - zk_S;
1434
1435         const SimdReal ktheta_S = load<SimdReal>(coeff);
1436         const SimdReal theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * DEG2RAD;
1437         const SimdReal kUB_S    = load<SimdReal>(coeff + 2 * GMX_SIMD_REAL_WIDTH);
1438         const SimdReal r13_S    = load<SimdReal>(coeff + 3 * GMX_SIMD_REAL_WIDTH);
1439
1440         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1441         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1442         pbc_correct_dx_simd(&rikx_S, &riky_S, &rikz_S, pbc_simd);
1443
1444         const SimdReal rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1445
1446         const SimdReal dr2_S = iprod(rikx_S, riky_S, rikz_S, rikx_S, riky_S, rikz_S);
1447
1448         const SimdReal nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1449         const SimdReal nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1450
1451         const SimdReal nrij_1_S = invsqrt(nrij2_S);
1452         const SimdReal nrkj_1_S = invsqrt(nrkj2_S);
1453         const SimdReal invdr2_S = invsqrt(dr2_S);
1454         const SimdReal dr_S     = dr2_S * invdr2_S;
1455
1456         constexpr real min_one_plus_eps = -1.0 + 2.0 * GMX_REAL_EPS; // Smallest number > -1
1457
1458         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1459          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1460          * This also ensures that rounding errors would cause the argument
1461          * of simdAcos to be < -1.
1462          * Note that we do not take precautions for cos(0)=1, so the bonds
1463          * in an angle should not align at an angle of 0 degrees.
1464          */
1465         const SimdReal cos_S = max(rij_rkj_S * nrij_1_S * nrkj_1_S, min_one_plus_eps);
1466
1467         const SimdReal theta_S  = acos(cos_S);
1468         const SimdReal invsin_S = invsqrt(1.0 - cos_S * cos_S);
1469         const SimdReal st_S     = ktheta_S * (theta0_S - theta_S) * invsin_S;
1470         const SimdReal sth_S    = st_S * cos_S;
1471
1472         const SimdReal cik_S = st_S * nrij_1_S * nrkj_1_S;
1473         const SimdReal cii_S = sth_S * nrij_1_S * nrij_1_S;
1474         const SimdReal ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1475
1476         const SimdReal sUB_S = kUB_S * (r13_S - dr_S) * invdr2_S;
1477
1478         const SimdReal f_ikx_S = sUB_S * rikx_S;
1479         const SimdReal f_iky_S = sUB_S * riky_S;
1480         const SimdReal f_ikz_S = sUB_S * rikz_S;
1481
1482         const SimdReal f_ix_S = fnma(cik_S, rkjx_S, cii_S * rijx_S) + f_ikx_S;
1483         const SimdReal f_iy_S = fnma(cik_S, rkjy_S, cii_S * rijy_S) + f_iky_S;
1484         const SimdReal f_iz_S = fnma(cik_S, rkjz_S, cii_S * rijz_S) + f_ikz_S;
1485         const SimdReal f_kx_S = fnma(cik_S, rijx_S, ckk_S * rkjx_S) - f_ikx_S;
1486         const SimdReal f_ky_S = fnma(cik_S, rijy_S, ckk_S * rkjy_S) - f_iky_S;
1487         const SimdReal f_kz_S = fnma(cik_S, rijz_S, ckk_S * rkjz_S) - f_ikz_S;
1488
1489         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1490         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S,
1491                                  f_iz_S + f_kz_S);
1492         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1493     }
1494
1495     return 0;
1496 }
1497
1498 #endif // GMX_SIMD_HAVE_REAL
1499
1500 template<BondedKernelFlavor flavor>
1501 real quartic_angles(int             nbonds,
1502                     const t_iatom   forceatoms[],
1503                     const t_iparams forceparams[],
1504                     const rvec      x[],
1505                     rvec4           f[],
1506                     rvec            fshift[],
1507                     const t_pbc*    pbc,
1508                     const t_graph*  g,
1509                     real gmx_unused lambda,
1510                     real gmx_unused* dvdlambda,
1511                     const t_mdatoms gmx_unused* md,
1512                     t_fcdata gmx_unused* fcd,
1513                     int gmx_unused* global_atom_index)
1514 {
1515     int  i, j, ai, aj, ak, t1, t2, type;
1516     rvec r_ij, r_kj;
1517     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1518     ivec jt, dt_ij, dt_kj;
1519
1520     vtot = 0.0;
1521     for (i = 0; (i < nbonds);)
1522     {
1523         type = forceatoms[i++];
1524         ai   = forceatoms[i++];
1525         aj   = forceatoms[i++];
1526         ak   = forceatoms[i++];
1527
1528         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1529
1530         dt = theta - forceparams[type].qangle.theta * DEG2RAD; /* 2          */
1531
1532         dVdt = 0;
1533         va   = forceparams[type].qangle.c[0];
1534         dtp  = 1.0;
1535         for (j = 1; j <= 4; j++)
1536         {
1537             c = forceparams[type].qangle.c[j];
1538             dVdt -= j * c * dtp;
1539             dtp *= dt;
1540             va += c * dtp;
1541         }
1542         /* 20 */
1543
1544         vtot += va;
1545
1546         cos_theta2 = gmx::square(cos_theta); /*   1             */
1547         if (cos_theta2 < 1)
1548         {
1549             int  m;
1550             real st, sth;
1551             real cik, cii, ckk;
1552             real nrkj2, nrij2;
1553             rvec f_i, f_j, f_k;
1554
1555             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1556             sth   = st * cos_theta;                      /*   1         */
1557             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1558             nrij2 = iprod(r_ij, r_ij);
1559
1560             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1561             cii = sth / nrij2;                      /*  10              */
1562             ckk = sth / nrkj2;                      /*  10              */
1563
1564             for (m = 0; (m < DIM); m++) /*  39          */
1565             {
1566                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1567                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1568                 f_j[m] = -f_i[m] - f_k[m];
1569                 f[ai][m] += f_i[m];
1570                 f[aj][m] += f_j[m];
1571                 f[ak][m] += f_k[m];
1572             }
1573
1574             if (computeVirial(flavor))
1575             {
1576                 if (g)
1577                 {
1578                     copy_ivec(SHIFT_IVEC(g, aj), jt);
1579
1580                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1581                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1582                     t1 = IVEC2IS(dt_ij);
1583                     t2 = IVEC2IS(dt_kj);
1584                 }
1585                 rvec_inc(fshift[t1], f_i);
1586                 rvec_inc(fshift[CENTRAL], f_j);
1587                 rvec_inc(fshift[t2], f_k);
1588             }
1589         } /* 153 TOTAL  */
1590     }
1591     return vtot;
1592 }
1593
1594
1595 #if GMX_SIMD_HAVE_REAL
1596
1597 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1598  * also calculates the pre-factor required for the dihedral force update.
1599  * Note that bv and buf should be register aligned.
1600  */
1601 inline void dih_angle_simd(const rvec* x,
1602                            const int*  ai,
1603                            const int*  aj,
1604                            const int*  ak,
1605                            const int*  al,
1606                            const real* pbc_simd,
1607                            SimdReal*   phi_S,
1608                            SimdReal*   mx_S,
1609                            SimdReal*   my_S,
1610                            SimdReal*   mz_S,
1611                            SimdReal*   nx_S,
1612                            SimdReal*   ny_S,
1613                            SimdReal*   nz_S,
1614                            SimdReal*   nrkj_m2_S,
1615                            SimdReal*   nrkj_n2_S,
1616                            SimdReal*   p_S,
1617                            SimdReal*   q_S)
1618 {
1619     SimdReal xi_S, yi_S, zi_S;
1620     SimdReal xj_S, yj_S, zj_S;
1621     SimdReal xk_S, yk_S, zk_S;
1622     SimdReal xl_S, yl_S, zl_S;
1623     SimdReal rijx_S, rijy_S, rijz_S;
1624     SimdReal rkjx_S, rkjy_S, rkjz_S;
1625     SimdReal rklx_S, rkly_S, rklz_S;
1626     SimdReal cx_S, cy_S, cz_S;
1627     SimdReal cn_S;
1628     SimdReal s_S;
1629     SimdReal ipr_S;
1630     SimdReal iprm_S, iprn_S;
1631     SimdReal nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1632     SimdReal toler_S;
1633     SimdReal nrkj2_min_S;
1634     SimdReal real_eps_S;
1635
1636     /* Used to avoid division by zero.
1637      * We take into acount that we multiply the result by real_eps_S.
1638      */
1639     nrkj2_min_S = SimdReal(GMX_REAL_MIN / (2 * GMX_REAL_EPS));
1640
1641     /* The value of the last significant bit (GMX_REAL_EPS is half of that) */
1642     real_eps_S = SimdReal(2 * GMX_REAL_EPS);
1643
1644     /* Store the non PBC corrected distances packed and aligned */
1645     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1646     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1647     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1648     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), al, &xl_S, &yl_S, &zl_S);
1649     rijx_S = xi_S - xj_S;
1650     rijy_S = yi_S - yj_S;
1651     rijz_S = zi_S - zj_S;
1652     rkjx_S = xk_S - xj_S;
1653     rkjy_S = yk_S - yj_S;
1654     rkjz_S = zk_S - zj_S;
1655     rklx_S = xk_S - xl_S;
1656     rkly_S = yk_S - yl_S;
1657     rklz_S = zk_S - zl_S;
1658
1659     pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1660     pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1661     pbc_correct_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc_simd);
1662
1663     cprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S, mx_S, my_S, mz_S);
1664
1665     cprod(rkjx_S, rkjy_S, rkjz_S, rklx_S, rkly_S, rklz_S, nx_S, ny_S, nz_S);
1666
1667     cprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S, &cx_S, &cy_S, &cz_S);
1668
1669     cn_S = sqrt(norm2(cx_S, cy_S, cz_S));
1670
1671     s_S = iprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1672
1673     /* Determine the dihedral angle, the sign might need correction */
1674     *phi_S = atan2(cn_S, s_S);
1675
1676     ipr_S = iprod(rijx_S, rijy_S, rijz_S, *nx_S, *ny_S, *nz_S);
1677
1678     iprm_S = norm2(*mx_S, *my_S, *mz_S);
1679     iprn_S = norm2(*nx_S, *ny_S, *nz_S);
1680
1681     nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1682
1683     /* Avoid division by zero. When zero, the result is multiplied by 0
1684      * anyhow, so the 3 max below do not affect the final result.
1685      */
1686     nrkj2_S  = max(nrkj2_S, nrkj2_min_S);
1687     nrkj_1_S = invsqrt(nrkj2_S);
1688     nrkj_2_S = nrkj_1_S * nrkj_1_S;
1689     nrkj_S   = nrkj2_S * nrkj_1_S;
1690
1691     toler_S = nrkj2_S * real_eps_S;
1692
1693     /* Here the plain-C code uses a conditional, but we can't do that in SIMD.
1694      * So we take a max with the tolerance instead. Since we multiply with
1695      * m or n later, the max does not affect the results.
1696      */
1697     iprm_S     = max(iprm_S, toler_S);
1698     iprn_S     = max(iprn_S, toler_S);
1699     *nrkj_m2_S = nrkj_S * inv(iprm_S);
1700     *nrkj_n2_S = nrkj_S * inv(iprn_S);
1701
1702     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1703     *phi_S = copysign(*phi_S, ipr_S);
1704     *p_S   = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1705     *p_S   = *p_S * nrkj_2_S;
1706
1707     *q_S = iprod(rklx_S, rkly_S, rklz_S, rkjx_S, rkjy_S, rkjz_S);
1708     *q_S = *q_S * nrkj_2_S;
1709 }
1710
1711 #endif // GMX_SIMD_HAVE_REAL
1712
1713 } // namespace
1714
1715 template<BondedKernelFlavor flavor>
1716 void do_dih_fup(int            i,
1717                 int            j,
1718                 int            k,
1719                 int            l,
1720                 real           ddphi,
1721                 rvec           r_ij,
1722                 rvec           r_kj,
1723                 rvec           r_kl,
1724                 rvec           m,
1725                 rvec           n,
1726                 rvec4          f[],
1727                 rvec           fshift[],
1728                 const t_pbc*   pbc,
1729                 const t_graph* g,
1730                 const rvec     x[],
1731                 int            t1,
1732                 int            t2,
1733                 int            t3)
1734 {
1735     /* 143 FLOPS */
1736     rvec f_i, f_j, f_k, f_l;
1737     rvec uvec, vvec, svec, dx_jl;
1738     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1739     real a, b, p, q, toler;
1740     ivec jt, dt_ij, dt_kj, dt_lj;
1741
1742     iprm  = iprod(m, m);       /*  5    */
1743     iprn  = iprod(n, n);       /*  5    */
1744     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1745     toler = nrkj2 * GMX_REAL_EPS;
1746     if ((iprm > toler) && (iprn > toler))
1747     {
1748         nrkj_1 = gmx::invsqrt(nrkj2);  /* 10    */
1749         nrkj_2 = nrkj_1 * nrkj_1;      /*  1    */
1750         nrkj   = nrkj2 * nrkj_1;       /*  1    */
1751         a      = -ddphi * nrkj / iprm; /* 11    */
1752         svmul(a, m, f_i);              /*  3    */
1753         b = ddphi * nrkj / iprn;       /* 11    */
1754         svmul(b, n, f_l);              /*  3  */
1755         p = iprod(r_ij, r_kj);         /*  5    */
1756         p *= nrkj_2;                   /*  1    */
1757         q = iprod(r_kl, r_kj);         /*  5    */
1758         q *= nrkj_2;                   /*  1    */
1759         svmul(p, f_i, uvec);           /*  3    */
1760         svmul(q, f_l, vvec);           /*  3    */
1761         rvec_sub(uvec, vvec, svec);    /*  3    */
1762         rvec_sub(f_i, svec, f_j);      /*  3    */
1763         rvec_add(f_l, svec, f_k);      /*  3    */
1764         rvec_inc(f[i], f_i);           /*  3    */
1765         rvec_dec(f[j], f_j);           /*  3    */
1766         rvec_dec(f[k], f_k);           /*  3    */
1767         rvec_inc(f[l], f_l);           /*  3    */
1768
1769         if (computeVirial(flavor))
1770         {
1771             if (g)
1772             {
1773                 copy_ivec(SHIFT_IVEC(g, j), jt);
1774                 ivec_sub(SHIFT_IVEC(g, i), jt, dt_ij);
1775                 ivec_sub(SHIFT_IVEC(g, k), jt, dt_kj);
1776                 ivec_sub(SHIFT_IVEC(g, l), jt, dt_lj);
1777                 t1 = IVEC2IS(dt_ij);
1778                 t2 = IVEC2IS(dt_kj);
1779                 t3 = IVEC2IS(dt_lj);
1780             }
1781             else if (pbc)
1782             {
1783                 t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1784             }
1785             else
1786             {
1787                 t3 = CENTRAL;
1788             }
1789
1790             rvec_inc(fshift[t1], f_i);
1791             rvec_dec(fshift[CENTRAL], f_j);
1792             rvec_dec(fshift[t2], f_k);
1793             rvec_inc(fshift[t3], f_l);
1794         }
1795     }
1796     /* 112 TOTAL    */
1797 }
1798
1799 namespace
1800 {
1801
1802 #if GMX_SIMD_HAVE_REAL
1803 /* As do_dih_fup_noshiftf above, but with SIMD and pre-calculated pre-factors */
1804 inline void gmx_simdcall do_dih_fup_noshiftf_simd(const int* ai,
1805                                                   const int* aj,
1806                                                   const int* ak,
1807                                                   const int* al,
1808                                                   SimdReal   p,
1809                                                   SimdReal   q,
1810                                                   SimdReal   f_i_x,
1811                                                   SimdReal   f_i_y,
1812                                                   SimdReal   f_i_z,
1813                                                   SimdReal   mf_l_x,
1814                                                   SimdReal   mf_l_y,
1815                                                   SimdReal   mf_l_z,
1816                                                   rvec4      f[])
1817 {
1818     SimdReal sx    = p * f_i_x + q * mf_l_x;
1819     SimdReal sy    = p * f_i_y + q * mf_l_y;
1820     SimdReal sz    = p * f_i_z + q * mf_l_z;
1821     SimdReal f_j_x = f_i_x - sx;
1822     SimdReal f_j_y = f_i_y - sy;
1823     SimdReal f_j_z = f_i_z - sz;
1824     SimdReal f_k_x = mf_l_x - sx;
1825     SimdReal f_k_y = mf_l_y - sy;
1826     SimdReal f_k_z = mf_l_z - sz;
1827     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_i_x, f_i_y, f_i_z);
1828     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_j_x, f_j_y, f_j_z);
1829     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_k_x, f_k_y, f_k_z);
1830     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), al, mf_l_x, mf_l_y, mf_l_z);
1831 }
1832 #endif // GMX_SIMD_HAVE_REAL
1833
1834 /*! \brief Computes and returns the proper dihedral force
1835  *
1836  * With the appropriate kernel flavor, also computes and accumulates
1837  * the energy and dV/dlambda.
1838  */
1839 template<BondedKernelFlavor flavor>
1840 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* dvdlambda)
1841 {
1842     const real L1   = 1.0 - lambda;
1843     const real ph0  = (L1 * phiA + lambda * phiB) * DEG2RAD;
1844     const real dph0 = (phiB - phiA) * DEG2RAD;
1845     const real cp   = L1 * cpA + lambda * cpB;
1846
1847     const real mdphi = mult * phi - ph0;
1848     const real sdphi = std::sin(mdphi);
1849     const real ddphi = -cp * mult * sdphi;
1850     if (computeEnergy(flavor))
1851     {
1852         const real v1 = 1 + std::cos(mdphi);
1853         *V += cp * v1;
1854
1855         *dvdlambda += (cpB - cpA) * v1 + cp * dph0 * sdphi;
1856     }
1857
1858     return ddphi;
1859     /* That was 40 flops */
1860 }
1861
1862 /*! \brief Similar to \p dopdihs(), except for a minus sign and a different treatment of mult/phi0 */
1863 real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* F)
1864 {
1865     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1866     real L1   = 1.0 - lambda;
1867     real ph0  = (L1 * phiA + lambda * phiB) * DEG2RAD;
1868     real dph0 = (phiB - phiA) * DEG2RAD;
1869     real cp   = L1 * cpA + lambda * cpB;
1870
1871     mdphi = mult * (phi - ph0);
1872     sdphi = std::sin(mdphi);
1873     ddphi = cp * mult * sdphi;
1874     v1    = 1.0 - std::cos(mdphi);
1875     v     = cp * v1;
1876
1877     dvdlambda = (cpB - cpA) * v1 + cp * dph0 * sdphi;
1878
1879     *V = v;
1880     *F = ddphi;
1881
1882     return dvdlambda;
1883
1884     /* That was 40 flops */
1885 }
1886
1887 template<BondedKernelFlavor flavor>
1888 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1889 pdihs(int             nbonds,
1890       const t_iatom   forceatoms[],
1891       const t_iparams forceparams[],
1892       const rvec      x[],
1893       rvec4           f[],
1894       rvec            fshift[],
1895       const t_pbc*    pbc,
1896       const t_graph*  g,
1897       real            lambda,
1898       real*           dvdlambda,
1899       const t_mdatoms gmx_unused* md,
1900       t_fcdata gmx_unused* fcd,
1901       int gmx_unused* global_atom_index)
1902 {
1903     int  t1, t2, t3;
1904     rvec r_ij, r_kj, r_kl, m, n;
1905
1906     real vtot = 0.0;
1907
1908     for (int i = 0; i < nbonds;)
1909     {
1910         const int ai = forceatoms[i + 1];
1911         const int aj = forceatoms[i + 2];
1912         const int ak = forceatoms[i + 3];
1913         const int al = forceatoms[i + 4];
1914
1915         const real phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1,
1916                                    &t2, &t3); /*  84      */
1917
1918         /* Loop over dihedrals working on the same atoms,
1919          * so we avoid recalculating angles and distributing forces.
1920          */
1921         real ddphi_tot = 0;
1922         do
1923         {
1924             const int type = forceatoms[i];
1925             ddphi_tot += dopdihs<flavor>(forceparams[type].pdihs.cpA, forceparams[type].pdihs.cpB,
1926                                          forceparams[type].pdihs.phiA, forceparams[type].pdihs.phiB,
1927                                          forceparams[type].pdihs.mult, phi, lambda, &vtot, dvdlambda);
1928
1929             i += 5;
1930         } while (i < nbonds && forceatoms[i + 1] == ai && forceatoms[i + 2] == aj
1931                  && forceatoms[i + 3] == ak && forceatoms[i + 4] == al);
1932
1933         do_dih_fup<flavor>(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x,
1934                            t1, t2, t3); /* 112          */
1935     }                                   /* 223 TOTAL  */
1936
1937     return vtot;
1938 }
1939
1940 #if GMX_SIMD_HAVE_REAL
1941
1942 /* As pdihs above, but using SIMD to calculate multiple dihedrals at once */
1943 template<BondedKernelFlavor flavor>
1944 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1945 pdihs(int             nbonds,
1946       const t_iatom   forceatoms[],
1947       const t_iparams forceparams[],
1948       const rvec      x[],
1949       rvec4           f[],
1950       rvec gmx_unused fshift[],
1951       const t_pbc*    pbc,
1952       const t_graph gmx_unused* g,
1953       real gmx_unused lambda,
1954       real gmx_unused* dvdlambda,
1955       const t_mdatoms gmx_unused* md,
1956       t_fcdata gmx_unused* fcd,
1957       int gmx_unused* global_atom_index)
1958 {
1959     const int                                nfa1 = 5;
1960     int                                      i, iu, s;
1961     int                                      type;
1962     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1963     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1964     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1965     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
1966     alignas(GMX_SIMD_ALIGNMENT) real         buf[3 * GMX_SIMD_REAL_WIDTH];
1967     real *                                   cp, *phi0, *mult;
1968     SimdReal                                 deg2rad_S(DEG2RAD);
1969     SimdReal                                 p_S, q_S;
1970     SimdReal                                 phi0_S, phi_S;
1971     SimdReal                                 mx_S, my_S, mz_S;
1972     SimdReal                                 nx_S, ny_S, nz_S;
1973     SimdReal                                 nrkj_m2_S, nrkj_n2_S;
1974     SimdReal                                 cp_S, mdphi_S, mult_S;
1975     SimdReal                                 sin_S, cos_S;
1976     SimdReal                                 mddphi_S;
1977     SimdReal                                 sf_i_S, msf_l_S;
1978     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1979
1980     /* Extract aligned pointer for parameters and variables */
1981     cp   = buf + 0 * GMX_SIMD_REAL_WIDTH;
1982     phi0 = buf + 1 * GMX_SIMD_REAL_WIDTH;
1983     mult = buf + 2 * GMX_SIMD_REAL_WIDTH;
1984
1985     set_pbc_simd(pbc, pbc_simd);
1986
1987     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
1988     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
1989     {
1990         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
1991          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1992          */
1993         iu = i;
1994         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1995         {
1996             type  = forceatoms[iu];
1997             ai[s] = forceatoms[iu + 1];
1998             aj[s] = forceatoms[iu + 2];
1999             ak[s] = forceatoms[iu + 3];
2000             al[s] = forceatoms[iu + 4];
2001
2002             /* At the end fill the arrays with the last atoms and 0 params */
2003             if (i + s * nfa1 < nbonds)
2004             {
2005                 cp[s]   = forceparams[type].pdihs.cpA;
2006                 phi0[s] = forceparams[type].pdihs.phiA;
2007                 mult[s] = forceparams[type].pdihs.mult;
2008
2009                 if (iu + nfa1 < nbonds)
2010                 {
2011                     iu += nfa1;
2012                 }
2013             }
2014             else
2015             {
2016                 cp[s]   = 0;
2017                 phi0[s] = 0;
2018                 mult[s] = 0;
2019             }
2020         }
2021
2022         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2023         dih_angle_simd(x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S,
2024                        &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
2025
2026         cp_S   = load<SimdReal>(cp);
2027         phi0_S = load<SimdReal>(phi0) * deg2rad_S;
2028         mult_S = load<SimdReal>(mult);
2029
2030         mdphi_S = fms(mult_S, phi_S, phi0_S);
2031
2032         /* Calculate GMX_SIMD_REAL_WIDTH sines at once */
2033         sincos(mdphi_S, &sin_S, &cos_S);
2034         mddphi_S = cp_S * mult_S * sin_S;
2035         sf_i_S   = mddphi_S * nrkj_m2_S;
2036         msf_l_S  = mddphi_S * nrkj_n2_S;
2037
2038         /* After this m?_S will contain f[i] */
2039         mx_S = sf_i_S * mx_S;
2040         my_S = sf_i_S * my_S;
2041         mz_S = sf_i_S * mz_S;
2042
2043         /* After this m?_S will contain -f[l] */
2044         nx_S = msf_l_S * nx_S;
2045         ny_S = msf_l_S * ny_S;
2046         nz_S = msf_l_S * nz_S;
2047
2048         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
2049     }
2050
2051     return 0;
2052 }
2053
2054 /* This is mostly a copy of the SIMD flavor of pdihs above, but with using
2055  * the RB potential instead of a harmonic potential.
2056  * This function can replace rbdihs() when no energy and virial are needed.
2057  */
2058 template<BondedKernelFlavor flavor>
2059 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
2060 rbdihs(int             nbonds,
2061        const t_iatom   forceatoms[],
2062        const t_iparams forceparams[],
2063        const rvec      x[],
2064        rvec4           f[],
2065        rvec gmx_unused fshift[],
2066        const t_pbc*    pbc,
2067        const t_graph gmx_unused* g,
2068        real gmx_unused lambda,
2069        real gmx_unused* dvdlambda,
2070        const t_mdatoms gmx_unused* md,
2071        t_fcdata gmx_unused* fcd,
2072        int gmx_unused* global_atom_index)
2073 {
2074     const int                                nfa1 = 5;
2075     int                                      i, iu, s, j;
2076     int                                      type;
2077     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
2078     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
2079     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
2080     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
2081     alignas(GMX_SIMD_ALIGNMENT) real         parm[NR_RBDIHS * GMX_SIMD_REAL_WIDTH];
2082
2083     SimdReal                         p_S, q_S;
2084     SimdReal                         phi_S;
2085     SimdReal                         ddphi_S, cosfac_S;
2086     SimdReal                         mx_S, my_S, mz_S;
2087     SimdReal                         nx_S, ny_S, nz_S;
2088     SimdReal                         nrkj_m2_S, nrkj_n2_S;
2089     SimdReal                         parm_S, c_S;
2090     SimdReal                         sin_S, cos_S;
2091     SimdReal                         sf_i_S, msf_l_S;
2092     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
2093
2094     SimdReal pi_S(M_PI);
2095     SimdReal one_S(1.0);
2096
2097     set_pbc_simd(pbc, pbc_simd);
2098
2099     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2100     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
2101     {
2102         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2103          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2104          */
2105         iu = i;
2106         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2107         {
2108             type  = forceatoms[iu];
2109             ai[s] = forceatoms[iu + 1];
2110             aj[s] = forceatoms[iu + 2];
2111             ak[s] = forceatoms[iu + 3];
2112             al[s] = forceatoms[iu + 4];
2113
2114             /* At the end fill the arrays with the last atoms and 0 params */
2115             if (i + s * nfa1 < nbonds)
2116             {
2117                 /* We don't need the first parameter, since that's a constant
2118                  * which only affects the energies, not the forces.
2119                  */
2120                 for (j = 1; j < NR_RBDIHS; j++)
2121                 {
2122                     parm[j * GMX_SIMD_REAL_WIDTH + s] = forceparams[type].rbdihs.rbcA[j];
2123                 }
2124
2125                 if (iu + nfa1 < nbonds)
2126                 {
2127                     iu += nfa1;
2128                 }
2129             }
2130             else
2131             {
2132                 for (j = 1; j < NR_RBDIHS; j++)
2133                 {
2134                     parm[j * GMX_SIMD_REAL_WIDTH + s] = 0;
2135                 }
2136             }
2137         }
2138
2139         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2140         dih_angle_simd(x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S,
2141                        &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
2142
2143         /* Change to polymer convention */
2144         phi_S = phi_S - pi_S;
2145
2146         sincos(phi_S, &sin_S, &cos_S);
2147
2148         ddphi_S  = setZero();
2149         c_S      = one_S;
2150         cosfac_S = one_S;
2151         for (j = 1; j < NR_RBDIHS; j++)
2152         {
2153             parm_S   = load<SimdReal>(parm + j * GMX_SIMD_REAL_WIDTH);
2154             ddphi_S  = fma(c_S * parm_S, cosfac_S, ddphi_S);
2155             cosfac_S = cosfac_S * cos_S;
2156             c_S      = c_S + one_S;
2157         }
2158
2159         /* Note that here we do not use the minus sign which is present
2160          * in the normal RB code. This is corrected for through (m)sf below.
2161          */
2162         ddphi_S = ddphi_S * sin_S;
2163
2164         sf_i_S  = ddphi_S * nrkj_m2_S;
2165         msf_l_S = ddphi_S * nrkj_n2_S;
2166
2167         /* After this m?_S will contain f[i] */
2168         mx_S = sf_i_S * mx_S;
2169         my_S = sf_i_S * my_S;
2170         mz_S = sf_i_S * mz_S;
2171
2172         /* After this m?_S will contain -f[l] */
2173         nx_S = msf_l_S * nx_S;
2174         ny_S = msf_l_S * ny_S;
2175         nz_S = msf_l_S * nz_S;
2176
2177         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
2178     }
2179
2180     return 0;
2181 }
2182
2183 #endif // GMX_SIMD_HAVE_REAL
2184
2185
2186 template<BondedKernelFlavor flavor>
2187 real idihs(int             nbonds,
2188            const t_iatom   forceatoms[],
2189            const t_iparams forceparams[],
2190            const rvec      x[],
2191            rvec4           f[],
2192            rvec            fshift[],
2193            const t_pbc*    pbc,
2194            const t_graph*  g,
2195            real            lambda,
2196            real*           dvdlambda,
2197            const t_mdatoms gmx_unused* md,
2198            t_fcdata gmx_unused* fcd,
2199            int gmx_unused* global_atom_index)
2200 {
2201     int  i, type, ai, aj, ak, al;
2202     int  t1, t2, t3;
2203     real phi, phi0, dphi0, ddphi, vtot;
2204     rvec r_ij, r_kj, r_kl, m, n;
2205     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2206
2207     L1        = 1.0 - lambda;
2208     dvdl_term = 0;
2209     vtot      = 0.0;
2210     for (i = 0; (i < nbonds);)
2211     {
2212         type = forceatoms[i++];
2213         ai   = forceatoms[i++];
2214         aj   = forceatoms[i++];
2215         ak   = forceatoms[i++];
2216         al   = forceatoms[i++];
2217
2218         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2219
2220         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2221          * force changes if we just apply a normal harmonic.
2222          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2223          * This means we will never have the periodicity problem, unless
2224          * the dihedral is Pi away from phiO, which is very unlikely due to
2225          * the potential.
2226          */
2227         kA = forceparams[type].harmonic.krA;
2228         kB = forceparams[type].harmonic.krB;
2229         pA = forceparams[type].harmonic.rA;
2230         pB = forceparams[type].harmonic.rB;
2231
2232         kk    = L1 * kA + lambda * kB;
2233         phi0  = (L1 * pA + lambda * pB) * DEG2RAD;
2234         dphi0 = (pB - pA) * DEG2RAD;
2235
2236         dp = phi - phi0;
2237
2238         make_dp_periodic(&dp);
2239
2240         dp2 = dp * dp;
2241
2242         vtot += 0.5 * kk * dp2;
2243         ddphi = -kk * dp;
2244
2245         dvdl_term += 0.5 * (kB - kA) * dp2 - kk * dphi0 * dp;
2246
2247         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x, t1,
2248                            t2, t3); /* 112              */
2249         /* 218 TOTAL    */
2250     }
2251
2252     *dvdlambda += dvdl_term;
2253     return vtot;
2254 }
2255
2256 /*! \brief Computes angle restraints of two different types */
2257 template<BondedKernelFlavor flavor>
2258 real low_angres(int             nbonds,
2259                 const t_iatom   forceatoms[],
2260                 const t_iparams forceparams[],
2261                 const rvec      x[],
2262                 rvec4           f[],
2263                 rvec            fshift[],
2264                 const t_pbc*    pbc,
2265                 const t_graph*  g,
2266                 real            lambda,
2267                 real*           dvdlambda,
2268                 gmx_bool        bZAxis)
2269 {
2270     int  i, m, type, ai, aj, ak, al;
2271     int  t1, t2;
2272     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2273     rvec r_ij, r_kl, f_i, f_k = { 0, 0, 0 };
2274     real st, sth, nrij2, nrkl2, c, cij, ckl;
2275
2276     ivec dt;
2277     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2278
2279     vtot = 0.0;
2280     ak = al = 0; /* to avoid warnings */
2281     for (i = 0; i < nbonds;)
2282     {
2283         type = forceatoms[i++];
2284         ai   = forceatoms[i++];
2285         aj   = forceatoms[i++];
2286         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij); /*  3             */
2287         if (!bZAxis)
2288         {
2289             ak = forceatoms[i++];
2290             al = forceatoms[i++];
2291             t2 = pbc_rvec_sub(pbc, x[al], x[ak], r_kl); /*  3           */
2292         }
2293         else
2294         {
2295             r_kl[XX] = 0;
2296             r_kl[YY] = 0;
2297             r_kl[ZZ] = 1;
2298         }
2299
2300         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2301         phi     = std::acos(cos_phi);    /* 10           */
2302
2303         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA, forceparams[type].pdihs.cpB,
2304                                   forceparams[type].pdihs.phiA, forceparams[type].pdihs.phiB,
2305                                   forceparams[type].pdihs.mult, phi, lambda, &vid, &dVdphi); /*  40 */
2306
2307         vtot += vid;
2308
2309         cos_phi2 = gmx::square(cos_phi); /*   1         */
2310         if (cos_phi2 < 1)
2311         {
2312             st    = -dVdphi * gmx::invsqrt(1 - cos_phi2); /*  12                */
2313             sth   = st * cos_phi;                         /*   1                */
2314             nrij2 = iprod(r_ij, r_ij);                    /*   5                */
2315             nrkl2 = iprod(r_kl, r_kl);                    /*   5          */
2316
2317             c   = st * gmx::invsqrt(nrij2 * nrkl2); /*  11              */
2318             cij = sth / nrij2;                      /*  10              */
2319             ckl = sth / nrkl2;                      /*  10              */
2320
2321             for (m = 0; m < DIM; m++) /*  18+18       */
2322             {
2323                 f_i[m] = (c * r_kl[m] - cij * r_ij[m]);
2324                 f[ai][m] += f_i[m];
2325                 f[aj][m] -= f_i[m];
2326                 if (!bZAxis)
2327                 {
2328                     f_k[m] = (c * r_ij[m] - ckl * r_kl[m]);
2329                     f[ak][m] += f_k[m];
2330                     f[al][m] -= f_k[m];
2331                 }
2332             }
2333
2334             if (computeVirial(flavor))
2335             {
2336                 if (g)
2337                 {
2338                     ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
2339                     t1 = IVEC2IS(dt);
2340                 }
2341                 rvec_inc(fshift[t1], f_i);
2342                 rvec_dec(fshift[CENTRAL], f_i);
2343                 if (!bZAxis)
2344                 {
2345                     if (g)
2346                     {
2347                         ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, al), dt);
2348                         t2 = IVEC2IS(dt);
2349                     }
2350                     rvec_inc(fshift[t2], f_k);
2351                     rvec_dec(fshift[CENTRAL], f_k);
2352                 }
2353             }
2354         }
2355     }
2356
2357     return vtot; /*  184 / 157 (bZAxis)  total  */
2358 }
2359
2360 template<BondedKernelFlavor flavor>
2361 real angres(int             nbonds,
2362             const t_iatom   forceatoms[],
2363             const t_iparams forceparams[],
2364             const rvec      x[],
2365             rvec4           f[],
2366             rvec            fshift[],
2367             const t_pbc*    pbc,
2368             const t_graph*  g,
2369             real            lambda,
2370             real*           dvdlambda,
2371             const t_mdatoms gmx_unused* md,
2372             t_fcdata gmx_unused* fcd,
2373             int gmx_unused* global_atom_index)
2374 {
2375     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g, lambda,
2376                               dvdlambda, FALSE);
2377 }
2378
2379 template<BondedKernelFlavor flavor>
2380 real angresz(int             nbonds,
2381              const t_iatom   forceatoms[],
2382              const t_iparams forceparams[],
2383              const rvec      x[],
2384              rvec4           f[],
2385              rvec            fshift[],
2386              const t_pbc*    pbc,
2387              const t_graph*  g,
2388              real            lambda,
2389              real*           dvdlambda,
2390              const t_mdatoms gmx_unused* md,
2391              t_fcdata gmx_unused* fcd,
2392              int gmx_unused* global_atom_index)
2393 {
2394     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g, lambda,
2395                               dvdlambda, TRUE);
2396 }
2397
2398 template<BondedKernelFlavor flavor>
2399 real dihres(int             nbonds,
2400             const t_iatom   forceatoms[],
2401             const t_iparams forceparams[],
2402             const rvec      x[],
2403             rvec4           f[],
2404             rvec            fshift[],
2405             const t_pbc*    pbc,
2406             const t_graph*  g,
2407             real            lambda,
2408             real*           dvdlambda,
2409             const t_mdatoms gmx_unused* md,
2410             t_fcdata gmx_unused* fcd,
2411             int gmx_unused* global_atom_index)
2412 {
2413     real vtot = 0;
2414     int  ai, aj, ak, al, i, type, t1, t2, t3;
2415     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2416     real phi, ddphi, ddp, ddp2, dp, d2r, L1;
2417     rvec r_ij, r_kj, r_kl, m, n;
2418
2419     L1 = 1.0 - lambda;
2420
2421     d2r = DEG2RAD;
2422
2423     for (i = 0; (i < nbonds);)
2424     {
2425         type = forceatoms[i++];
2426         ai   = forceatoms[i++];
2427         aj   = forceatoms[i++];
2428         ak   = forceatoms[i++];
2429         al   = forceatoms[i++];
2430
2431         phi0A = forceparams[type].dihres.phiA * d2r;
2432         dphiA = forceparams[type].dihres.dphiA * d2r;
2433         kfacA = forceparams[type].dihres.kfacA;
2434
2435         phi0B = forceparams[type].dihres.phiB * d2r;
2436         dphiB = forceparams[type].dihres.dphiB * d2r;
2437         kfacB = forceparams[type].dihres.kfacB;
2438
2439         phi0 = L1 * phi0A + lambda * phi0B;
2440         dphi = L1 * dphiA + lambda * dphiB;
2441         kfac = L1 * kfacA + lambda * kfacB;
2442
2443         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3);
2444         /* 84 flops */
2445
2446         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2447          * force changes if we just apply a normal harmonic.
2448          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2449          * This means we will never have the periodicity problem, unless
2450          * the dihedral is Pi away from phiO, which is very unlikely due to
2451          * the potential.
2452          */
2453         dp = phi - phi0;
2454         make_dp_periodic(&dp);
2455
2456         if (dp > dphi)
2457         {
2458             ddp = dp - dphi;
2459         }
2460         else if (dp < -dphi)
2461         {
2462             ddp = dp + dphi;
2463         }
2464         else
2465         {
2466             ddp = 0;
2467         }
2468
2469         if (ddp != 0.0)
2470         {
2471             ddp2 = ddp * ddp;
2472             vtot += 0.5 * kfac * ddp2;
2473             ddphi = kfac * ddp;
2474
2475             *dvdlambda += 0.5 * (kfacB - kfacA) * ddp2;
2476             /* lambda dependence from changing restraint distances */
2477             if (ddp > 0)
2478             {
2479                 *dvdlambda -= kfac * ddp * ((dphiB - dphiA) + (phi0B - phi0A));
2480             }
2481             else if (ddp < 0)
2482             {
2483                 *dvdlambda += kfac * ddp * ((dphiB - dphiA) - (phi0B - phi0A));
2484             }
2485             do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x,
2486                                t1, t2, t3); /* 112              */
2487         }
2488     }
2489     return vtot;
2490 }
2491
2492
2493 real unimplemented(int gmx_unused nbonds,
2494                    const t_iatom gmx_unused forceatoms[],
2495                    const t_iparams gmx_unused forceparams[],
2496                    const rvec gmx_unused x[],
2497                    rvec4 gmx_unused f[],
2498                    rvec gmx_unused fshift[],
2499                    const t_pbc gmx_unused* pbc,
2500                    const t_graph gmx_unused* g,
2501                    real gmx_unused lambda,
2502                    real gmx_unused* dvdlambda,
2503                    const t_mdatoms gmx_unused* md,
2504                    t_fcdata gmx_unused* fcd,
2505                    int gmx_unused* global_atom_index)
2506 {
2507     gmx_impl("*** you are using a not implemented function");
2508 }
2509
2510 template<BondedKernelFlavor flavor>
2511 real restrangles(int             nbonds,
2512                  const t_iatom   forceatoms[],
2513                  const t_iparams forceparams[],
2514                  const rvec      x[],
2515                  rvec4           f[],
2516                  rvec            fshift[],
2517                  const t_pbc*    pbc,
2518                  const t_graph*  g,
2519                  real gmx_unused lambda,
2520                  real gmx_unused* dvdlambda,
2521                  const t_mdatoms gmx_unused* md,
2522                  t_fcdata gmx_unused* fcd,
2523                  int gmx_unused* global_atom_index)
2524 {
2525     int    i, d, ai, aj, ak, type, m;
2526     int    t1, t2;
2527     real   v, vtot;
2528     ivec   jt, dt_ij, dt_kj;
2529     rvec   f_i, f_j, f_k;
2530     double prefactor, ratio_ante, ratio_post;
2531     rvec   delta_ante, delta_post, vec_temp;
2532
2533     vtot = 0.0;
2534     for (i = 0; (i < nbonds);)
2535     {
2536         type = forceatoms[i++];
2537         ai   = forceatoms[i++];
2538         aj   = forceatoms[i++];
2539         ak   = forceatoms[i++];
2540
2541         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2542         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2543         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_post);
2544
2545
2546         /* This function computes factors needed for restricted angle potential.
2547          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2548          * when three particles align and the dihedral angle and dihedral potential
2549          * cannot be calculated. This potential is calculated using the formula:
2550            real restrangles(int nbonds,
2551             const t_iatom forceatoms[],const t_iparams forceparams[],
2552             const rvec x[],rvec4 f[],rvec fshift[],
2553             const t_pbc *pbc,const t_graph *g,
2554             real gmx_unused lambda,real gmx_unused *dvdlambda,
2555             const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
2556             int gmx_unused *global_atom_index)
2557            {
2558            int  i, d, ai, aj, ak, type, m;
2559            int t1, t2;
2560            rvec r_ij,r_kj;
2561            real v, vtot;
2562            ivec jt,dt_ij,dt_kj;
2563            rvec f_i, f_j, f_k;
2564            real prefactor, ratio_ante, ratio_post;
2565            rvec delta_ante, delta_post, vec_temp;
2566
2567            vtot = 0.0;
2568            for(i=0; (i<nbonds); )
2569            {
2570            type = forceatoms[i++];
2571            ai   = forceatoms[i++];
2572            aj   = forceatoms[i++];
2573            ak   = forceatoms[i++];
2574
2575          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta} \frac{(\cos\theta_i - \cos\theta_0)^2}
2576          * {\sin^2\theta_i}\f] ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2577          * For more explanations see comments file "restcbt.h". */
2578
2579         compute_factors_restangles(type, forceparams, delta_ante, delta_post, &prefactor,
2580                                    &ratio_ante, &ratio_post, &v);
2581
2582         /*   Forces are computed per component */
2583         for (d = 0; d < DIM; d++)
2584         {
2585             f_i[d] = prefactor * (ratio_ante * delta_ante[d] - delta_post[d]);
2586             f_j[d] = prefactor
2587                      * ((ratio_post + 1.0) * delta_post[d] - (ratio_ante + 1.0) * delta_ante[d]);
2588             f_k[d] = prefactor * (delta_ante[d] - ratio_post * delta_post[d]);
2589         }
2590
2591         /*   Computation of potential energy   */
2592
2593         vtot += v;
2594
2595         /*   Update forces */
2596
2597         for (m = 0; (m < DIM); m++)
2598         {
2599             f[ai][m] += f_i[m];
2600             f[aj][m] += f_j[m];
2601             f[ak][m] += f_k[m];
2602         }
2603
2604         if (computeVirial(flavor))
2605         {
2606             if (g)
2607             {
2608                 copy_ivec(SHIFT_IVEC(g, aj), jt);
2609                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2610                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2611                 t1 = IVEC2IS(dt_ij);
2612                 t2 = IVEC2IS(dt_kj);
2613             }
2614
2615             rvec_inc(fshift[t1], f_i);
2616             rvec_inc(fshift[CENTRAL], f_j);
2617             rvec_inc(fshift[t2], f_k);
2618         }
2619     }
2620     return vtot;
2621 }
2622
2623
2624 template<BondedKernelFlavor flavor>
2625 real restrdihs(int             nbonds,
2626                const t_iatom   forceatoms[],
2627                const t_iparams forceparams[],
2628                const rvec      x[],
2629                rvec4           f[],
2630                rvec            fshift[],
2631                const t_pbc*    pbc,
2632                const t_graph*  g,
2633                real gmx_unused lambda,
2634                real gmx_unused* dvlambda,
2635                const t_mdatoms gmx_unused* md,
2636                t_fcdata gmx_unused* fcd,
2637                int gmx_unused* global_atom_index)
2638 {
2639     int  i, d, type, ai, aj, ak, al;
2640     rvec f_i, f_j, f_k, f_l;
2641     rvec dx_jl;
2642     ivec jt, dt_ij, dt_kj, dt_lj;
2643     int  t1, t2, t3;
2644     real v, vtot;
2645     rvec delta_ante, delta_crnt, delta_post, vec_temp;
2646     real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post;
2647     real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post;
2648     real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post;
2649     real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post;
2650     real prefactor_phi;
2651
2652
2653     vtot = 0.0;
2654     for (i = 0; (i < nbonds);)
2655     {
2656         type = forceatoms[i++];
2657         ai   = forceatoms[i++];
2658         aj   = forceatoms[i++];
2659         ak   = forceatoms[i++];
2660         al   = forceatoms[i++];
2661
2662         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2663         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2664         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2665         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2666         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2667
2668         /* This function computes factors needed for restricted angle potential.
2669          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2670          * when three particles align and the dihedral angle and dihedral potential cannot be calculated.
2671          * This potential is calculated using the formula:
2672          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta}
2673          * \frac{(\cos\theta_i - \cos\theta_0)^2}{\sin^2\theta_i}\f]
2674          * ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2675          * For more explanations see comments file "restcbt.h" */
2676
2677         compute_factors_restrdihs(
2678                 type, forceparams, delta_ante, delta_crnt, delta_post, &factor_phi_ai_ante,
2679                 &factor_phi_ai_crnt, &factor_phi_ai_post, &factor_phi_aj_ante, &factor_phi_aj_crnt,
2680                 &factor_phi_aj_post, &factor_phi_ak_ante, &factor_phi_ak_crnt, &factor_phi_ak_post,
2681                 &factor_phi_al_ante, &factor_phi_al_crnt, &factor_phi_al_post, &prefactor_phi, &v);
2682
2683
2684         /*      Computation of forces per component */
2685         for (d = 0; d < DIM; d++)
2686         {
2687             f_i[d] = prefactor_phi
2688                      * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d]
2689                         + factor_phi_ai_post * delta_post[d]);
2690             f_j[d] = prefactor_phi
2691                      * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d]
2692                         + factor_phi_aj_post * delta_post[d]);
2693             f_k[d] = prefactor_phi
2694                      * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d]
2695                         + factor_phi_ak_post * delta_post[d]);
2696             f_l[d] = prefactor_phi
2697                      * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d]
2698                         + factor_phi_al_post * delta_post[d]);
2699         }
2700         /*      Computation of the energy */
2701
2702         vtot += v;
2703
2704
2705         /*    Updating the forces */
2706
2707         rvec_inc(f[ai], f_i);
2708         rvec_inc(f[aj], f_j);
2709         rvec_inc(f[ak], f_k);
2710         rvec_inc(f[al], f_l);
2711
2712         if (computeVirial(flavor))
2713         {
2714             if (g)
2715             {
2716                 copy_ivec(SHIFT_IVEC(g, aj), jt);
2717                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2718                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2719                 ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2720                 t1 = IVEC2IS(dt_ij);
2721                 t2 = IVEC2IS(dt_kj);
2722                 t3 = IVEC2IS(dt_lj);
2723             }
2724             else if (pbc)
2725             {
2726                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2727             }
2728             else
2729             {
2730                 t3 = CENTRAL;
2731             }
2732
2733             rvec_inc(fshift[t1], f_i);
2734             rvec_inc(fshift[CENTRAL], f_j);
2735             rvec_inc(fshift[t2], f_k);
2736             rvec_inc(fshift[t3], f_l);
2737         }
2738     }
2739
2740     return vtot;
2741 }
2742
2743
2744 template<BondedKernelFlavor flavor>
2745 real cbtdihs(int             nbonds,
2746              const t_iatom   forceatoms[],
2747              const t_iparams forceparams[],
2748              const rvec      x[],
2749              rvec4           f[],
2750              rvec            fshift[],
2751              const t_pbc*    pbc,
2752              const t_graph*  g,
2753              real gmx_unused lambda,
2754              real gmx_unused* dvdlambda,
2755              const t_mdatoms gmx_unused* md,
2756              t_fcdata gmx_unused* fcd,
2757              int gmx_unused* global_atom_index)
2758 {
2759     int  type, ai, aj, ak, al, i, d;
2760     int  t1, t2, t3;
2761     real v, vtot;
2762     rvec vec_temp;
2763     rvec f_i, f_j, f_k, f_l;
2764     ivec jt, dt_ij, dt_kj, dt_lj;
2765     rvec dx_jl;
2766     rvec delta_ante, delta_crnt, delta_post;
2767     rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
2768     rvec f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak;
2769     rvec f_theta_post_aj, f_theta_post_ak, f_theta_post_al;
2770
2771
2772     vtot = 0.0;
2773     for (i = 0; (i < nbonds);)
2774     {
2775         type = forceatoms[i++];
2776         ai   = forceatoms[i++];
2777         aj   = forceatoms[i++];
2778         ak   = forceatoms[i++];
2779         al   = forceatoms[i++];
2780
2781
2782         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2783         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2784         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
2785         pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2786         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2787         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2788
2789         /* \brief Compute factors for CBT potential
2790          * The combined bending-torsion potential goes to zero in a very smooth manner, eliminating the numerical
2791          * instabilities, when three coarse-grained particles align and the dihedral angle and standard
2792          * dihedral potentials cannot be calculated. The CBT potential is calculated using the formula:
2793          * \f[V_{\rm CBT}(\theta_{i-1}, \theta_i, \phi_i) = k_{\phi} \sin^3\theta_{i-1} \sin^3\theta_{i}
2794          * \sum_{n=0}^4 { a_n \cos^n\phi_i}\f] ({eq:CBT} and ref \cite{MonicaGoga2013} from the manual).
2795          * It contains in its expression not only the dihedral angle \f$\phi\f$
2796          * but also \f[\theta_{i-1}\f] (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)
2797          * --- the adjacent bending angles.
2798          * For more explanations see comments file "restcbt.h". */
2799
2800         compute_factors_cbtdihs(type, forceparams, delta_ante, delta_crnt, delta_post, f_phi_ai,
2801                                 f_phi_aj, f_phi_ak, f_phi_al, f_theta_ante_ai, f_theta_ante_aj,
2802                                 f_theta_ante_ak, f_theta_post_aj, f_theta_post_ak, f_theta_post_al, &v);
2803
2804
2805         /*      Acumulate the resuts per beads */
2806         for (d = 0; d < DIM; d++)
2807         {
2808             f_i[d] = f_phi_ai[d] + f_theta_ante_ai[d];
2809             f_j[d] = f_phi_aj[d] + f_theta_ante_aj[d] + f_theta_post_aj[d];
2810             f_k[d] = f_phi_ak[d] + f_theta_ante_ak[d] + f_theta_post_ak[d];
2811             f_l[d] = f_phi_al[d] + f_theta_post_al[d];
2812         }
2813
2814         /*      Compute the potential energy */
2815
2816         vtot += v;
2817
2818
2819         /*  Updating the forces */
2820         rvec_inc(f[ai], f_i);
2821         rvec_inc(f[aj], f_j);
2822         rvec_inc(f[ak], f_k);
2823         rvec_inc(f[al], f_l);
2824
2825
2826         if (computeVirial(flavor))
2827         {
2828             if (g)
2829             {
2830                 copy_ivec(SHIFT_IVEC(g, aj), jt);
2831                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2832                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2833                 ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2834                 t1 = IVEC2IS(dt_ij);
2835                 t2 = IVEC2IS(dt_kj);
2836                 t3 = IVEC2IS(dt_lj);
2837             }
2838             else if (pbc)
2839             {
2840                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2841             }
2842             else
2843             {
2844                 t3 = CENTRAL;
2845             }
2846
2847             rvec_inc(fshift[t1], f_i);
2848             rvec_inc(fshift[CENTRAL], f_j);
2849             rvec_inc(fshift[t2], f_k);
2850             rvec_inc(fshift[t3], f_l);
2851         }
2852     }
2853
2854     return vtot;
2855 }
2856
2857 template<BondedKernelFlavor flavor>
2858 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
2859 rbdihs(int             nbonds,
2860        const t_iatom   forceatoms[],
2861        const t_iparams forceparams[],
2862        const rvec      x[],
2863        rvec4           f[],
2864        rvec            fshift[],
2865        const t_pbc*    pbc,
2866        const t_graph*  g,
2867        real            lambda,
2868        real*           dvdlambda,
2869        const t_mdatoms gmx_unused* md,
2870        t_fcdata gmx_unused* fcd,
2871        int gmx_unused* global_atom_index)
2872 {
2873     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2874     int        type, ai, aj, ak, al, i, j;
2875     int        t1, t2, t3;
2876     rvec       r_ij, r_kj, r_kl, m, n;
2877     real       parmA[NR_RBDIHS];
2878     real       parmB[NR_RBDIHS];
2879     real       parm[NR_RBDIHS];
2880     real       cos_phi, phi, rbp, rbpBA;
2881     real       v, ddphi, sin_phi;
2882     real       cosfac, vtot;
2883     real       L1        = 1.0 - lambda;
2884     real       dvdl_term = 0;
2885
2886     vtot = 0.0;
2887     for (i = 0; (i < nbonds);)
2888     {
2889         type = forceatoms[i++];
2890         ai   = forceatoms[i++];
2891         aj   = forceatoms[i++];
2892         ak   = forceatoms[i++];
2893         al   = forceatoms[i++];
2894
2895         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2896
2897         /* Change to polymer convention */
2898         if (phi < c0)
2899         {
2900             phi += M_PI;
2901         }
2902         else
2903         {
2904             phi -= M_PI; /*   1         */
2905         }
2906         cos_phi = std::cos(phi);
2907         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2908         sin_phi = std::sin(phi);
2909
2910         for (j = 0; (j < NR_RBDIHS); j++)
2911         {
2912             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2913             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2914             parm[j]  = L1 * parmA[j] + lambda * parmB[j];
2915         }
2916         /* Calculate cosine powers */
2917         /* Calculate the energy */
2918         /* Calculate the derivative */
2919
2920         v = parm[0];
2921         dvdl_term += (parmB[0] - parmA[0]);
2922         ddphi  = c0;
2923         cosfac = c1;
2924
2925         rbp   = parm[1];
2926         rbpBA = parmB[1] - parmA[1];
2927         ddphi += rbp * cosfac;
2928         cosfac *= cos_phi;
2929         v += cosfac * rbp;
2930         dvdl_term += cosfac * rbpBA;
2931         rbp   = parm[2];
2932         rbpBA = parmB[2] - parmA[2];
2933         ddphi += c2 * rbp * cosfac;
2934         cosfac *= cos_phi;
2935         v += cosfac * rbp;
2936         dvdl_term += cosfac * rbpBA;
2937         rbp   = parm[3];
2938         rbpBA = parmB[3] - parmA[3];
2939         ddphi += c3 * rbp * cosfac;
2940         cosfac *= cos_phi;
2941         v += cosfac * rbp;
2942         dvdl_term += cosfac * rbpBA;
2943         rbp   = parm[4];
2944         rbpBA = parmB[4] - parmA[4];
2945         ddphi += c4 * rbp * cosfac;
2946         cosfac *= cos_phi;
2947         v += cosfac * rbp;
2948         dvdl_term += cosfac * rbpBA;
2949         rbp   = parm[5];
2950         rbpBA = parmB[5] - parmA[5];
2951         ddphi += c5 * rbp * cosfac;
2952         cosfac *= cos_phi;
2953         v += cosfac * rbp;
2954         dvdl_term += cosfac * rbpBA;
2955
2956         ddphi = -ddphi * sin_phi; /*  11                */
2957
2958         do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x, t1,
2959                            t2, t3); /* 112              */
2960         vtot += v;
2961     }
2962     *dvdlambda += dvdl_term;
2963
2964     return vtot;
2965 }
2966
2967 //! \endcond
2968
2969 /*! \brief Mysterious undocumented function */
2970 int cmap_setup_grid_index(int ip, int grid_spacing, int* ipm1, int* ipp1, int* ipp2)
2971 {
2972     int im1, ip1, ip2;
2973
2974     if (ip < 0)
2975     {
2976         ip = ip + grid_spacing - 1;
2977     }
2978     else if (ip > grid_spacing)
2979     {
2980         ip = ip - grid_spacing - 1;
2981     }
2982
2983     im1 = ip - 1;
2984     ip1 = ip + 1;
2985     ip2 = ip + 2;
2986
2987     if (ip == 0)
2988     {
2989         im1 = grid_spacing - 1;
2990     }
2991     else if (ip == grid_spacing - 2)
2992     {
2993         ip2 = 0;
2994     }
2995     else if (ip == grid_spacing - 1)
2996     {
2997         ip1 = 0;
2998         ip2 = 1;
2999     }
3000
3001     *ipm1 = im1;
3002     *ipp1 = ip1;
3003     *ipp2 = ip2;
3004
3005     return ip;
3006 }
3007
3008 } // namespace
3009
3010 real cmap_dihs(int                   nbonds,
3011                const t_iatom         forceatoms[],
3012                const t_iparams       forceparams[],
3013                const gmx_cmap_t*     cmap_grid,
3014                const rvec            x[],
3015                rvec4                 f[],
3016                rvec                  fshift[],
3017                const struct t_pbc*   pbc,
3018                const struct t_graph* g,
3019                real gmx_unused lambda,
3020                real gmx_unused* dvdlambda,
3021                const t_mdatoms gmx_unused* md,
3022                t_fcdata gmx_unused* fcd,
3023                int gmx_unused* global_atom_index)
3024 {
3025     int i, n;
3026     int ai, aj, ak, al, am;
3027     int a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
3028     int type;
3029     int t11, t21, t31, t12, t22, t32;
3030     int iphi1, ip1m1, ip1p1, ip1p2;
3031     int iphi2, ip2m1, ip2p1, ip2p2;
3032     int l1, l2, l3;
3033     int pos1, pos2, pos3, pos4;
3034
3035     real ty[4], ty1[4], ty2[4], ty12[4], tx[16];
3036     real phi1, cos_phi1, sin_phi1, xphi1;
3037     real phi2, cos_phi2, sin_phi2, xphi2;
3038     real dx, tt, tu, e, df1, df2, vtot;
3039     real ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
3040     real ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
3041     real fg1, hg1, fga1, hgb1, gaa1, gbb1;
3042     real fg2, hg2, fga2, hgb2, gaa2, gbb2;
3043     real fac;
3044
3045     rvec r1_ij, r1_kj, r1_kl, m1, n1;
3046     rvec r2_ij, r2_kj, r2_kl, m2, n2;
3047     rvec f1_i, f1_j, f1_k, f1_l;
3048     rvec f2_i, f2_j, f2_k, f2_l;
3049     rvec a1, b1, a2, b2;
3050     rvec f1, g1, h1, f2, g2, h2;
3051     rvec dtf1, dtg1, dth1, dtf2, dtg2, dth2;
3052     ivec jt1, dt1_ij, dt1_kj, dt1_lj;
3053     ivec jt2, dt2_ij, dt2_kj, dt2_lj;
3054
3055     int loop_index[4][4] = { { 0, 4, 8, 12 }, { 1, 5, 9, 13 }, { 2, 6, 10, 14 }, { 3, 7, 11, 15 } };
3056
3057     /* Total CMAP energy */
3058     vtot = 0;
3059
3060     for (n = 0; n < nbonds;)
3061     {
3062         /* Five atoms are involved in the two torsions */
3063         type = forceatoms[n++];
3064         ai   = forceatoms[n++];
3065         aj   = forceatoms[n++];
3066         ak   = forceatoms[n++];
3067         al   = forceatoms[n++];
3068         am   = forceatoms[n++];
3069
3070         /* Which CMAP type is this */
3071         const int   cmapA = forceparams[type].cmap.cmapA;
3072         const real* cmapd = cmap_grid->cmapdata[cmapA].cmap.data();
3073
3074         /* First torsion */
3075         a1i = ai;
3076         a1j = aj;
3077         a1k = ak;
3078         a1l = al;
3079
3080         phi1 = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1, &t11,
3081                          &t21, &t31); /* 84 */
3082
3083         cos_phi1 = std::cos(phi1);
3084
3085         a1[0] = r1_ij[1] * r1_kj[2] - r1_ij[2] * r1_kj[1];
3086         a1[1] = r1_ij[2] * r1_kj[0] - r1_ij[0] * r1_kj[2];
3087         a1[2] = r1_ij[0] * r1_kj[1] - r1_ij[1] * r1_kj[0]; /* 9 */
3088
3089         b1[0] = r1_kl[1] * r1_kj[2] - r1_kl[2] * r1_kj[1];
3090         b1[1] = r1_kl[2] * r1_kj[0] - r1_kl[0] * r1_kj[2];
3091         b1[2] = r1_kl[0] * r1_kj[1] - r1_kl[1] * r1_kj[0]; /* 9 */
3092
3093         pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
3094
3095         ra21 = iprod(a1, a1);       /* 5 */
3096         rb21 = iprod(b1, b1);       /* 5 */
3097         rg21 = iprod(r1_kj, r1_kj); /* 5 */
3098         rg1  = sqrt(rg21);
3099
3100         rgr1  = 1.0 / rg1;
3101         ra2r1 = 1.0 / ra21;
3102         rb2r1 = 1.0 / rb21;
3103         rabr1 = sqrt(ra2r1 * rb2r1);
3104
3105         sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
3106
3107         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
3108         {
3109             phi1 = std::asin(sin_phi1);
3110
3111             if (cos_phi1 < 0)
3112             {
3113                 if (phi1 > 0)
3114                 {
3115                     phi1 = M_PI - phi1;
3116                 }
3117                 else
3118                 {
3119                     phi1 = -M_PI - phi1;
3120                 }
3121             }
3122         }
3123         else
3124         {
3125             phi1 = std::acos(cos_phi1);
3126
3127             if (sin_phi1 < 0)
3128             {
3129                 phi1 = -phi1;
3130             }
3131         }
3132
3133         xphi1 = phi1 + M_PI; /* 1 */
3134
3135         /* Second torsion */
3136         a2i = aj;
3137         a2j = ak;
3138         a2k = al;
3139         a2l = am;
3140
3141         phi2 = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2, &t12,
3142                          &t22, &t32); /* 84 */
3143
3144         cos_phi2 = std::cos(phi2);
3145
3146         a2[0] = r2_ij[1] * r2_kj[2] - r2_ij[2] * r2_kj[1];
3147         a2[1] = r2_ij[2] * r2_kj[0] - r2_ij[0] * r2_kj[2];
3148         a2[2] = r2_ij[0] * r2_kj[1] - r2_ij[1] * r2_kj[0]; /* 9 */
3149
3150         b2[0] = r2_kl[1] * r2_kj[2] - r2_kl[2] * r2_kj[1];
3151         b2[1] = r2_kl[2] * r2_kj[0] - r2_kl[0] * r2_kj[2];
3152         b2[2] = r2_kl[0] * r2_kj[1] - r2_kl[1] * r2_kj[0]; /* 9 */
3153
3154         pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
3155
3156         ra22 = iprod(a2, a2);       /* 5 */
3157         rb22 = iprod(b2, b2);       /* 5 */
3158         rg22 = iprod(r2_kj, r2_kj); /* 5 */
3159         rg2  = sqrt(rg22);
3160
3161         rgr2  = 1.0 / rg2;
3162         ra2r2 = 1.0 / ra22;
3163         rb2r2 = 1.0 / rb22;
3164         rabr2 = sqrt(ra2r2 * rb2r2);
3165
3166         sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
3167
3168         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
3169         {
3170             phi2 = std::asin(sin_phi2);
3171
3172             if (cos_phi2 < 0)
3173             {
3174                 if (phi2 > 0)
3175                 {
3176                     phi2 = M_PI - phi2;
3177                 }
3178                 else
3179                 {
3180                     phi2 = -M_PI - phi2;
3181                 }
3182             }
3183         }
3184         else
3185         {
3186             phi2 = std::acos(cos_phi2);
3187
3188             if (sin_phi2 < 0)
3189             {
3190                 phi2 = -phi2;
3191             }
3192         }
3193
3194         xphi2 = phi2 + M_PI; /* 1 */
3195
3196         /* Range mangling */
3197         if (xphi1 < 0)
3198         {
3199             xphi1 = xphi1 + 2 * M_PI;
3200         }
3201         else if (xphi1 >= 2 * M_PI)
3202         {
3203             xphi1 = xphi1 - 2 * M_PI;
3204         }
3205
3206         if (xphi2 < 0)
3207         {
3208             xphi2 = xphi2 + 2 * M_PI;
3209         }
3210         else if (xphi2 >= 2 * M_PI)
3211         {
3212             xphi2 = xphi2 - 2 * M_PI;
3213         }
3214
3215         /* Number of grid points */
3216         dx = 2 * M_PI / cmap_grid->grid_spacing;
3217
3218         /* Where on the grid are we */
3219         iphi1 = static_cast<int>(xphi1 / dx);
3220         iphi2 = static_cast<int>(xphi2 / dx);
3221
3222         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3223         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3224
3225         pos1 = iphi1 * cmap_grid->grid_spacing + iphi2;
3226         pos2 = ip1p1 * cmap_grid->grid_spacing + iphi2;
3227         pos3 = ip1p1 * cmap_grid->grid_spacing + ip2p1;
3228         pos4 = iphi1 * cmap_grid->grid_spacing + ip2p1;
3229
3230         ty[0] = cmapd[pos1 * 4];
3231         ty[1] = cmapd[pos2 * 4];
3232         ty[2] = cmapd[pos3 * 4];
3233         ty[3] = cmapd[pos4 * 4];
3234
3235         ty1[0] = cmapd[pos1 * 4 + 1];
3236         ty1[1] = cmapd[pos2 * 4 + 1];
3237         ty1[2] = cmapd[pos3 * 4 + 1];
3238         ty1[3] = cmapd[pos4 * 4 + 1];
3239
3240         ty2[0] = cmapd[pos1 * 4 + 2];
3241         ty2[1] = cmapd[pos2 * 4 + 2];
3242         ty2[2] = cmapd[pos3 * 4 + 2];
3243         ty2[3] = cmapd[pos4 * 4 + 2];
3244
3245         ty12[0] = cmapd[pos1 * 4 + 3];
3246         ty12[1] = cmapd[pos2 * 4 + 3];
3247         ty12[2] = cmapd[pos3 * 4 + 3];
3248         ty12[3] = cmapd[pos4 * 4 + 3];
3249
3250         /* Switch to degrees */
3251         dx    = 360.0 / cmap_grid->grid_spacing;
3252         xphi1 = xphi1 * RAD2DEG;
3253         xphi2 = xphi2 * RAD2DEG;
3254
3255         for (i = 0; i < 4; i++) /* 16 */
3256         {
3257             tx[i]      = ty[i];
3258             tx[i + 4]  = ty1[i] * dx;
3259             tx[i + 8]  = ty2[i] * dx;
3260             tx[i + 12] = ty12[i] * dx * dx;
3261         }
3262
3263         real tc[16] = { 0 };
3264         for (int idx = 0; idx < 16; idx++) /* 1056 */
3265         {
3266             for (int k = 0; k < 16; k++)
3267             {
3268                 tc[idx] += cmap_coeff_matrix[k * 16 + idx] * tx[k];
3269             }
3270         }
3271
3272         tt = (xphi1 - iphi1 * dx) / dx;
3273         tu = (xphi2 - iphi2 * dx) / dx;
3274
3275         e   = 0;
3276         df1 = 0;
3277         df2 = 0;
3278
3279         for (i = 3; i >= 0; i--)
3280         {
3281             l1 = loop_index[i][3];
3282             l2 = loop_index[i][2];
3283             l3 = loop_index[i][1];
3284
3285             e = tt * e + ((tc[i * 4 + 3] * tu + tc[i * 4 + 2]) * tu + tc[i * 4 + 1]) * tu + tc[i * 4];
3286             df1 = tu * df1 + (3.0 * tc[l1] * tt + 2.0 * tc[l2]) * tt + tc[l3];
3287             df2 = tt * df2 + (3.0 * tc[i * 4 + 3] * tu + 2.0 * tc[i * 4 + 2]) * tu + tc[i * 4 + 1];
3288         }
3289
3290         fac = RAD2DEG / dx;
3291         df1 = df1 * fac;
3292         df2 = df2 * fac;
3293
3294         /* CMAP energy */
3295         vtot += e;
3296
3297         /* Do forces - first torsion */
3298         fg1  = iprod(r1_ij, r1_kj);
3299         hg1  = iprod(r1_kl, r1_kj);
3300         fga1 = fg1 * ra2r1 * rgr1;
3301         hgb1 = hg1 * rb2r1 * rgr1;
3302         gaa1 = -ra2r1 * rg1;
3303         gbb1 = rb2r1 * rg1;
3304
3305         for (i = 0; i < DIM; i++)
3306         {
3307             dtf1[i] = gaa1 * a1[i];
3308             dtg1[i] = fga1 * a1[i] - hgb1 * b1[i];
3309             dth1[i] = gbb1 * b1[i];
3310
3311             f1[i] = df1 * dtf1[i];
3312             g1[i] = df1 * dtg1[i];
3313             h1[i] = df1 * dth1[i];
3314
3315             f1_i[i] = f1[i];
3316             f1_j[i] = -f1[i] - g1[i];
3317             f1_k[i] = h1[i] + g1[i];
3318             f1_l[i] = -h1[i];
3319
3320             f[a1i][i] = f[a1i][i] + f1_i[i];
3321             f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3322             f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3323             f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3324         }
3325
3326         /* Do forces - second torsion */
3327         fg2  = iprod(r2_ij, r2_kj);
3328         hg2  = iprod(r2_kl, r2_kj);
3329         fga2 = fg2 * ra2r2 * rgr2;
3330         hgb2 = hg2 * rb2r2 * rgr2;
3331         gaa2 = -ra2r2 * rg2;
3332         gbb2 = rb2r2 * rg2;
3333
3334         for (i = 0; i < DIM; i++)
3335         {
3336             dtf2[i] = gaa2 * a2[i];
3337             dtg2[i] = fga2 * a2[i] - hgb2 * b2[i];
3338             dth2[i] = gbb2 * b2[i];
3339
3340             f2[i] = df2 * dtf2[i];
3341             g2[i] = df2 * dtg2[i];
3342             h2[i] = df2 * dth2[i];
3343
3344             f2_i[i] = f2[i];
3345             f2_j[i] = -f2[i] - g2[i];
3346             f2_k[i] = h2[i] + g2[i];
3347             f2_l[i] = -h2[i];
3348
3349             f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3350             f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3351             f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3352             f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3353         }
3354
3355         /* Shift forces */
3356         if (fshift != nullptr)
3357         {
3358             if (g)
3359             {
3360                 copy_ivec(SHIFT_IVEC(g, a1j), jt1);
3361                 ivec_sub(SHIFT_IVEC(g, a1i), jt1, dt1_ij);
3362                 ivec_sub(SHIFT_IVEC(g, a1k), jt1, dt1_kj);
3363                 ivec_sub(SHIFT_IVEC(g, a1l), jt1, dt1_lj);
3364                 t11 = IVEC2IS(dt1_ij);
3365                 t21 = IVEC2IS(dt1_kj);
3366                 t31 = IVEC2IS(dt1_lj);
3367
3368                 copy_ivec(SHIFT_IVEC(g, a2j), jt2);
3369                 ivec_sub(SHIFT_IVEC(g, a2i), jt2, dt2_ij);
3370                 ivec_sub(SHIFT_IVEC(g, a2k), jt2, dt2_kj);
3371                 ivec_sub(SHIFT_IVEC(g, a2l), jt2, dt2_lj);
3372                 t12 = IVEC2IS(dt2_ij);
3373                 t22 = IVEC2IS(dt2_kj);
3374                 t32 = IVEC2IS(dt2_lj);
3375             }
3376             else if (pbc)
3377             {
3378                 t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3379                 t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3380             }
3381             else
3382             {
3383                 t31 = CENTRAL;
3384                 t32 = CENTRAL;
3385             }
3386
3387             rvec_inc(fshift[t11], f1_i);
3388             rvec_inc(fshift[CENTRAL], f1_j);
3389             rvec_inc(fshift[t21], f1_k);
3390             rvec_inc(fshift[t31], f1_l);
3391
3392             rvec_inc(fshift[t12], f2_i);
3393             rvec_inc(fshift[CENTRAL], f2_j);
3394             rvec_inc(fshift[t22], f2_k);
3395             rvec_inc(fshift[t32], f2_l);
3396         }
3397     }
3398     return vtot;
3399 }
3400
3401 namespace
3402 {
3403
3404 //! \cond
3405 /***********************************************************
3406  *
3407  *   G R O M O S  9 6   F U N C T I O N S
3408  *
3409  ***********************************************************/
3410 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
3411 {
3412     const real half = 0.5;
3413     real       L1, kk, x0, dx, dx2;
3414     real       v, f, dvdlambda;
3415
3416     L1 = 1.0 - lambda;
3417     kk = L1 * kA + lambda * kB;
3418     x0 = L1 * xA + lambda * xB;
3419
3420     dx  = x - x0;
3421     dx2 = dx * dx;
3422
3423     f         = -kk * dx;
3424     v         = half * kk * dx2;
3425     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
3426
3427     *F = f;
3428     *V = v;
3429
3430     return dvdlambda;
3431
3432     /* That was 21 flops */
3433 }
3434
3435 template<BondedKernelFlavor flavor>
3436 real g96bonds(int             nbonds,
3437               const t_iatom   forceatoms[],
3438               const t_iparams forceparams[],
3439               const rvec      x[],
3440               rvec4           f[],
3441               rvec            fshift[],
3442               const t_pbc*    pbc,
3443               const t_graph*  g,
3444               real            lambda,
3445               real*           dvdlambda,
3446               const t_mdatoms gmx_unused* md,
3447               t_fcdata gmx_unused* fcd,
3448               int gmx_unused* global_atom_index)
3449 {
3450     int  i, ki, ai, aj, type;
3451     real dr2, fbond, vbond, vtot;
3452     rvec dx;
3453
3454     vtot = 0.0;
3455     for (i = 0; (i < nbonds);)
3456     {
3457         type = forceatoms[i++];
3458         ai   = forceatoms[i++];
3459         aj   = forceatoms[i++];
3460
3461         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3462         dr2 = iprod(dx, dx);                       /*   5               */
3463
3464         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
3465                                   forceparams[type].harmonic.rA, forceparams[type].harmonic.rB, dr2,
3466                                   lambda, &vbond, &fbond);
3467
3468         vtot += 0.5 * vbond; /* 1*/
3469
3470         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
3471     }                                                                  /* 44 TOTAL      */
3472     return vtot;
3473 }
3474
3475 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc* pbc, rvec r_ij, rvec r_kj, int* t1, int* t2)
3476 /* Return value is the angle between the bonds i-j and j-k */
3477 {
3478     real costh;
3479
3480     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3481     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3482
3483     costh = cos_angle(r_ij, r_kj); /* 25                */
3484     /* 41 TOTAL */
3485     return costh;
3486 }
3487
3488 template<BondedKernelFlavor flavor>
3489 real g96angles(int             nbonds,
3490                const t_iatom   forceatoms[],
3491                const t_iparams forceparams[],
3492                const rvec      x[],
3493                rvec4           f[],
3494                rvec            fshift[],
3495                const t_pbc*    pbc,
3496                const t_graph*  g,
3497                real            lambda,
3498                real*           dvdlambda,
3499                const t_mdatoms gmx_unused* md,
3500                t_fcdata gmx_unused* fcd,
3501                int gmx_unused* global_atom_index)
3502 {
3503     int  i, ai, aj, ak, type, m, t1, t2;
3504     rvec r_ij, r_kj;
3505     real cos_theta, dVdt, va, vtot;
3506     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3507     rvec f_i, f_j, f_k;
3508     ivec jt, dt_ij, dt_kj;
3509
3510     vtot = 0.0;
3511     for (i = 0; (i < nbonds);)
3512     {
3513         type = forceatoms[i++];
3514         ai   = forceatoms[i++];
3515         aj   = forceatoms[i++];
3516         ak   = forceatoms[i++];
3517
3518         cos_theta = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3519
3520         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
3521                                   forceparams[type].harmonic.rA, forceparams[type].harmonic.rB,
3522                                   cos_theta, lambda, &va, &dVdt);
3523         vtot += va;
3524
3525         rij_1    = gmx::invsqrt(iprod(r_ij, r_ij));
3526         rkj_1    = gmx::invsqrt(iprod(r_kj, r_kj));
3527         rij_2    = rij_1 * rij_1;
3528         rkj_2    = rkj_1 * rkj_1;
3529         rijrkj_1 = rij_1 * rkj_1; /* 23 */
3530
3531         for (m = 0; (m < DIM); m++) /*  42      */
3532         {
3533             f_i[m] = dVdt * (r_kj[m] * rijrkj_1 - r_ij[m] * rij_2 * cos_theta);
3534             f_k[m] = dVdt * (r_ij[m] * rijrkj_1 - r_kj[m] * rkj_2 * cos_theta);
3535             f_j[m] = -f_i[m] - f_k[m];
3536             f[ai][m] += f_i[m];
3537             f[aj][m] += f_j[m];
3538             f[ak][m] += f_k[m];
3539         }
3540
3541         if (computeVirial(flavor))
3542         {
3543             if (g)
3544             {
3545                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3546
3547                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3548                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3549                 t1 = IVEC2IS(dt_ij);
3550                 t2 = IVEC2IS(dt_kj);
3551             }
3552             rvec_inc(fshift[t1], f_i);
3553             rvec_inc(fshift[CENTRAL], f_j);
3554             rvec_inc(fshift[t2], f_k); /* 9 */
3555         }
3556         /* 163 TOTAL    */
3557     }
3558     return vtot;
3559 }
3560
3561 template<BondedKernelFlavor flavor>
3562 real cross_bond_bond(int             nbonds,
3563                      const t_iatom   forceatoms[],
3564                      const t_iparams forceparams[],
3565                      const rvec      x[],
3566                      rvec4           f[],
3567                      rvec            fshift[],
3568                      const t_pbc*    pbc,
3569                      const t_graph*  g,
3570                      real gmx_unused lambda,
3571                      real gmx_unused* dvdlambda,
3572                      const t_mdatoms gmx_unused* md,
3573                      t_fcdata gmx_unused* fcd,
3574                      int gmx_unused* global_atom_index)
3575 {
3576     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3577      * pp. 842-847
3578      */
3579     int  i, ai, aj, ak, type, m, t1, t2;
3580     rvec r_ij, r_kj;
3581     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3582     rvec f_i, f_j, f_k;
3583     ivec jt, dt_ij, dt_kj;
3584
3585     vtot = 0.0;
3586     for (i = 0; (i < nbonds);)
3587     {
3588         type = forceatoms[i++];
3589         ai   = forceatoms[i++];
3590         aj   = forceatoms[i++];
3591         ak   = forceatoms[i++];
3592         r1e  = forceparams[type].cross_bb.r1e;
3593         r2e  = forceparams[type].cross_bb.r2e;
3594         krr  = forceparams[type].cross_bb.krr;
3595
3596         /* Compute distance vectors ... */
3597         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3598         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3599
3600         /* ... and their lengths */
3601         r1 = norm(r_ij);
3602         r2 = norm(r_kj);
3603
3604         /* Deviations from ideality */
3605         s1 = r1 - r1e;
3606         s2 = r2 - r2e;
3607
3608         /* Energy (can be negative!) */
3609         vrr = krr * s1 * s2;
3610         vtot += vrr;
3611
3612         /* Forces */
3613         svmul(-krr * s2 / r1, r_ij, f_i);
3614         svmul(-krr * s1 / r2, r_kj, f_k);
3615
3616         for (m = 0; (m < DIM); m++) /*  12      */
3617         {
3618             f_j[m] = -f_i[m] - f_k[m];
3619             f[ai][m] += f_i[m];
3620             f[aj][m] += f_j[m];
3621             f[ak][m] += f_k[m];
3622         }
3623
3624         if (computeVirial(flavor))
3625         {
3626             if (g)
3627             {
3628                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3629
3630                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3631                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3632                 t1 = IVEC2IS(dt_ij);
3633                 t2 = IVEC2IS(dt_kj);
3634             }
3635             rvec_inc(fshift[t1], f_i);
3636             rvec_inc(fshift[CENTRAL], f_j);
3637             rvec_inc(fshift[t2], f_k); /* 9 */
3638         }
3639         /* 163 TOTAL    */
3640     }
3641     return vtot;
3642 }
3643
3644 template<BondedKernelFlavor flavor>
3645 real cross_bond_angle(int             nbonds,
3646                       const t_iatom   forceatoms[],
3647                       const t_iparams forceparams[],
3648                       const rvec      x[],
3649                       rvec4           f[],
3650                       rvec            fshift[],
3651                       const t_pbc*    pbc,
3652                       const t_graph*  g,
3653                       real gmx_unused lambda,
3654                       real gmx_unused* dvdlambda,
3655                       const t_mdatoms gmx_unused* md,
3656                       t_fcdata gmx_unused* fcd,
3657                       int gmx_unused* global_atom_index)
3658 {
3659     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3660      * pp. 842-847
3661      */
3662     int  i, ai, aj, ak, type, m, t1, t2;
3663     rvec r_ij, r_kj, r_ik;
3664     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3665     rvec f_i, f_j, f_k;
3666     ivec jt, dt_ij, dt_kj;
3667
3668     vtot = 0.0;
3669     for (i = 0; (i < nbonds);)
3670     {
3671         type = forceatoms[i++];
3672         ai   = forceatoms[i++];
3673         aj   = forceatoms[i++];
3674         ak   = forceatoms[i++];
3675         r1e  = forceparams[type].cross_ba.r1e;
3676         r2e  = forceparams[type].cross_ba.r2e;
3677         r3e  = forceparams[type].cross_ba.r3e;
3678         krt  = forceparams[type].cross_ba.krt;
3679
3680         /* Compute distance vectors ... */
3681         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3682         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3683         pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3684
3685         /* ... and their lengths */
3686         r1 = norm(r_ij);
3687         r2 = norm(r_kj);
3688         r3 = norm(r_ik);
3689
3690         /* Deviations from ideality */
3691         s1 = r1 - r1e;
3692         s2 = r2 - r2e;
3693         s3 = r3 - r3e;
3694
3695         /* Energy (can be negative!) */
3696         vrt = krt * s3 * (s1 + s2);
3697         vtot += vrt;
3698
3699         /* Forces */
3700         k1 = -krt * (s3 / r1);
3701         k2 = -krt * (s3 / r2);
3702         k3 = -krt * (s1 + s2) / r3;
3703         for (m = 0; (m < DIM); m++)
3704         {
3705             f_i[m] = k1 * r_ij[m] + k3 * r_ik[m];
3706             f_k[m] = k2 * r_kj[m] - k3 * r_ik[m];
3707             f_j[m] = -f_i[m] - f_k[m];
3708         }
3709
3710         for (m = 0; (m < DIM); m++) /*  12      */
3711         {
3712             f[ai][m] += f_i[m];
3713             f[aj][m] += f_j[m];
3714             f[ak][m] += f_k[m];
3715         }
3716
3717         if (computeVirial(flavor))
3718         {
3719             if (g)
3720             {
3721                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3722
3723                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3724                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3725                 t1 = IVEC2IS(dt_ij);
3726                 t2 = IVEC2IS(dt_kj);
3727             }
3728             rvec_inc(fshift[t1], f_i);
3729             rvec_inc(fshift[CENTRAL], f_j);
3730             rvec_inc(fshift[t2], f_k); /* 9 */
3731         }
3732         /* 163 TOTAL    */
3733     }
3734     return vtot;
3735 }
3736
3737 /*! \brief Computes the potential and force for a tabulated potential */
3738 real bonded_tab(const char*          type,
3739                 int                  table_nr,
3740                 const bondedtable_t* table,
3741                 real                 kA,
3742                 real                 kB,
3743                 real                 r,
3744                 real                 lambda,
3745                 real*                V,
3746                 real*                F)
3747 {
3748     real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3749     int  n0, nnn;
3750     real dvdlambda;
3751
3752     k = (1.0 - lambda) * kA + lambda * kB;
3753
3754     tabscale = table->scale;
3755     VFtab    = table->data;
3756
3757     rt = r * tabscale;
3758     n0 = static_cast<int>(rt);
3759     if (n0 >= table->n)
3760     {
3761         gmx_fatal(FARGS,
3762                   "A tabulated %s interaction table number %d is out of the table range: r %f, "
3763                   "between table indices %d and %d, table length %d",
3764                   type, table_nr, r, n0, n0 + 1, table->n);
3765     }
3766     eps   = rt - n0;
3767     eps2  = eps * eps;
3768     nnn   = 4 * n0;
3769     Yt    = VFtab[nnn];
3770     Ft    = VFtab[nnn + 1];
3771     Geps  = VFtab[nnn + 2] * eps;
3772     Heps2 = VFtab[nnn + 3] * eps2;
3773     Fp    = Ft + Geps + Heps2;
3774     VV    = Yt + Fp * eps;
3775     FF    = Fp + Geps + 2.0 * Heps2;
3776
3777     *F        = -k * FF * tabscale;
3778     *V        = k * VV;
3779     dvdlambda = (kB - kA) * VV;
3780
3781     return dvdlambda;
3782
3783     /* That was 22 flops */
3784 }
3785
3786 template<BondedKernelFlavor flavor>
3787 real tab_bonds(int             nbonds,
3788                const t_iatom   forceatoms[],
3789                const t_iparams forceparams[],
3790                const rvec      x[],
3791                rvec4           f[],
3792                rvec            fshift[],
3793                const t_pbc*    pbc,
3794                const t_graph*  g,
3795                real            lambda,
3796                real*           dvdlambda,
3797                const t_mdatoms gmx_unused* md,
3798                t_fcdata*                   fcd,
3799                int gmx_unused* global_atom_index)
3800 {
3801     int  i, ki, ai, aj, type, table;
3802     real dr, dr2, fbond, vbond, vtot;
3803     rvec dx;
3804
3805     vtot = 0.0;
3806     for (i = 0; (i < nbonds);)
3807     {
3808         type = forceatoms[i++];
3809         ai   = forceatoms[i++];
3810         aj   = forceatoms[i++];
3811
3812         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3813         dr2 = iprod(dx, dx);                       /*   5               */
3814         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
3815
3816         table = forceparams[type].tab.table;
3817
3818         *dvdlambda += bonded_tab("bond", table, &fcd->bondtab[table], forceparams[type].tab.kA,
3819                                  forceparams[type].tab.kB, dr, lambda, &vbond, &fbond); /*  22 */
3820
3821         if (dr2 == 0.0)
3822         {
3823             continue;
3824         }
3825
3826
3827         vtot += vbond;              /* 1*/
3828         fbond *= gmx::invsqrt(dr2); /*   6              */
3829
3830         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
3831     }                                                                  /* 62 TOTAL      */
3832     return vtot;
3833 }
3834
3835 template<BondedKernelFlavor flavor>
3836 real tab_angles(int             nbonds,
3837                 const t_iatom   forceatoms[],
3838                 const t_iparams forceparams[],
3839                 const rvec      x[],
3840                 rvec4           f[],
3841                 rvec            fshift[],
3842                 const t_pbc*    pbc,
3843                 const t_graph*  g,
3844                 real            lambda,
3845                 real*           dvdlambda,
3846                 const t_mdatoms gmx_unused* md,
3847                 t_fcdata*                   fcd,
3848                 int gmx_unused* global_atom_index)
3849 {
3850     int  i, ai, aj, ak, t1, t2, type, table;
3851     rvec r_ij, r_kj;
3852     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3853     ivec jt, dt_ij, dt_kj;
3854
3855     vtot = 0.0;
3856     for (i = 0; (i < nbonds);)
3857     {
3858         type = forceatoms[i++];
3859         ai   = forceatoms[i++];
3860         aj   = forceatoms[i++];
3861         ak   = forceatoms[i++];
3862
3863         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
3864
3865         table = forceparams[type].tab.table;
3866
3867         *dvdlambda += bonded_tab("angle", table, &fcd->angletab[table], forceparams[type].tab.kA,
3868                                  forceparams[type].tab.kB, theta, lambda, &va, &dVdt); /*  22  */
3869         vtot += va;
3870
3871         cos_theta2 = gmx::square(cos_theta); /*   1             */
3872         if (cos_theta2 < 1)
3873         {
3874             int  m;
3875             real st, sth;
3876             real cik, cii, ckk;
3877             real nrkj2, nrij2;
3878             rvec f_i, f_j, f_k;
3879
3880             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
3881             sth   = st * cos_theta;                      /*   1         */
3882             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
3883             nrij2 = iprod(r_ij, r_ij);
3884
3885             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
3886             cii = sth / nrij2;                      /*  10              */
3887             ckk = sth / nrkj2;                      /*  10              */
3888
3889             for (m = 0; (m < DIM); m++) /*  39          */
3890             {
3891                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
3892                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
3893                 f_j[m] = -f_i[m] - f_k[m];
3894                 f[ai][m] += f_i[m];
3895                 f[aj][m] += f_j[m];
3896                 f[ak][m] += f_k[m];
3897             }
3898
3899             if (computeVirial(flavor))
3900             {
3901                 if (g)
3902                 {
3903                     copy_ivec(SHIFT_IVEC(g, aj), jt);
3904
3905                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3906                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3907                     t1 = IVEC2IS(dt_ij);
3908                     t2 = IVEC2IS(dt_kj);
3909                 }
3910                 rvec_inc(fshift[t1], f_i);
3911                 rvec_inc(fshift[CENTRAL], f_j);
3912                 rvec_inc(fshift[t2], f_k);
3913             }
3914         } /* 169 TOTAL  */
3915     }
3916     return vtot;
3917 }
3918
3919 template<BondedKernelFlavor flavor>
3920 real tab_dihs(int             nbonds,
3921               const t_iatom   forceatoms[],
3922               const t_iparams forceparams[],
3923               const rvec      x[],
3924               rvec4           f[],
3925               rvec            fshift[],
3926               const t_pbc*    pbc,
3927               const t_graph*  g,
3928               real            lambda,
3929               real*           dvdlambda,
3930               const t_mdatoms gmx_unused* md,
3931               t_fcdata*                   fcd,
3932               int gmx_unused* global_atom_index)
3933 {
3934     int  i, type, ai, aj, ak, al, table;
3935     int  t1, t2, t3;
3936     rvec r_ij, r_kj, r_kl, m, n;
3937     real phi, ddphi, vpd, vtot;
3938
3939     vtot = 0.0;
3940     for (i = 0; (i < nbonds);)
3941     {
3942         type = forceatoms[i++];
3943         ai   = forceatoms[i++];
3944         aj   = forceatoms[i++];
3945         ak   = forceatoms[i++];
3946         al   = forceatoms[i++];
3947
3948         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84  */
3949
3950         table = forceparams[type].tab.table;
3951
3952         /* Hopefully phi+M_PI never results in values < 0 */
3953         *dvdlambda += bonded_tab("dihedral", table, &fcd->dihtab[table], forceparams[type].tab.kA,
3954                                  forceparams[type].tab.kB, phi + M_PI, lambda, &vpd, &ddphi);
3955
3956         vtot += vpd;
3957         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x, t1,
3958                            t2, t3); /* 112      */
3959
3960     } /* 227 TOTAL  */
3961
3962     return vtot;
3963 }
3964
3965 struct BondedInteractions
3966 {
3967     BondedFunction function;
3968     int            nrnbIndex;
3969 };
3970
3971 /*! \brief Lookup table of bonded interaction functions
3972  *
3973  * This must have as many entries as interaction_function in ifunc.cpp */
3974 template<BondedKernelFlavor flavor>
3975 const std::array<BondedInteractions, F_NRE> c_bondedInteractionFunctions = {
3976     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_BONDS
3977     BondedInteractions{ g96bonds<flavor>, eNR_BONDS },                    // F_G96BONDS
3978     BondedInteractions{ morse_bonds<flavor>, eNR_MORSE },                 // F_MORSE
3979     BondedInteractions{ cubic_bonds<flavor>, eNR_CUBICBONDS },            // F_CUBICBONDS
3980     BondedInteractions{ unimplemented, -1 },                              // F_CONNBONDS
3981     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_HARMONIC
3982     BondedInteractions{ FENE_bonds<flavor>, eNR_FENEBONDS },              // F_FENEBONDS
3983     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDS
3984     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDSNC
3985     BondedInteractions{ restraint_bonds<flavor>, eNR_RESTRBONDS },        // F_RESTRBONDS
3986     BondedInteractions{ angles<flavor>, eNR_ANGLES },                     // F_ANGLES
3987     BondedInteractions{ g96angles<flavor>, eNR_ANGLES },                  // F_G96ANGLES
3988     BondedInteractions{ restrangles<flavor>, eNR_ANGLES },                // F_RESTRANGLES
3989     BondedInteractions{ linear_angles<flavor>, eNR_ANGLES },              // F_LINEAR_ANGLES
3990     BondedInteractions{ cross_bond_bond<flavor>, eNR_CROSS_BOND_BOND },   // F_CROSS_BOND_BONDS
3991     BondedInteractions{ cross_bond_angle<flavor>, eNR_CROSS_BOND_ANGLE }, // F_CROSS_BOND_ANGLES
3992     BondedInteractions{ urey_bradley<flavor>, eNR_UREY_BRADLEY },         // F_UREY_BRADLEY
3993     BondedInteractions{ quartic_angles<flavor>, eNR_QANGLES },            // F_QUARTIC_ANGLES
3994     BondedInteractions{ tab_angles<flavor>, eNR_TABANGLES },              // F_TABANGLES
3995     BondedInteractions{ pdihs<flavor>, eNR_PROPER },                      // F_PDIHS
3996     BondedInteractions{ rbdihs<flavor>, eNR_RB },                         // F_RBDIHS
3997     BondedInteractions{ restrdihs<flavor>, eNR_PROPER },                  // F_RESTRDIHS
3998     BondedInteractions{ cbtdihs<flavor>, eNR_RB },                        // F_CBTDIHS
3999     BondedInteractions{ rbdihs<flavor>, eNR_FOURDIH },                    // F_FOURDIHS
4000     BondedInteractions{ idihs<flavor>, eNR_IMPROPER },                    // F_IDIHS
4001     BondedInteractions{ pdihs<flavor>, eNR_IMPROPER },                    // F_PIDIHS
4002     BondedInteractions{ tab_dihs<flavor>, eNR_TABDIHS },                  // F_TABDIHS
4003     BondedInteractions{ unimplemented, eNR_CMAP },                        // F_CMAP
4004     BondedInteractions{ unimplemented, -1 },                              // F_GB12_NOLONGERUSED
4005     BondedInteractions{ unimplemented, -1 },                              // F_GB13_NOLONGERUSED
4006     BondedInteractions{ unimplemented, -1 },                              // F_GB14_NOLONGERUSED
4007     BondedInteractions{ unimplemented, -1 },                              // F_GBPOL_NOLONGERUSED
4008     BondedInteractions{ unimplemented, -1 },                       // F_NPSOLVATION_NOLONGERUSED
4009     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJ14
4010     BondedInteractions{ unimplemented, -1 },                       // F_COUL14
4011     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC14_Q
4012     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC_PAIRS_NB
4013     BondedInteractions{ unimplemented, -1 },                       // F_LJ
4014     BondedInteractions{ unimplemented, -1 },                       // F_BHAM
4015     BondedInteractions{ unimplemented, -1 },                       // F_LJ_LR_NOLONGERUSED
4016     BondedInteractions{ unimplemented, -1 },                       // F_BHAM_LR_NOLONGERUSED
4017     BondedInteractions{ unimplemented, -1 },                       // F_DISPCORR
4018     BondedInteractions{ unimplemented, -1 },                       // F_COUL_SR
4019     BondedInteractions{ unimplemented, -1 },                       // F_COUL_LR_NOLONGERUSED
4020     BondedInteractions{ unimplemented, -1 },                       // F_RF_EXCL
4021     BondedInteractions{ unimplemented, -1 },                       // F_COUL_RECIP
4022     BondedInteractions{ unimplemented, -1 },                       // F_LJ_RECIP
4023     BondedInteractions{ unimplemented, -1 },                       // F_DPD
4024     BondedInteractions{ polarize<flavor>, eNR_POLARIZE },          // F_POLARIZATION
4025     BondedInteractions{ water_pol<flavor>, eNR_WPOL },             // F_WATER_POL
4026     BondedInteractions{ thole_pol<flavor>, eNR_THOLE },            // F_THOLE_POL
4027     BondedInteractions{ anharm_polarize<flavor>, eNR_ANHARM_POL }, // F_ANHARM_POL
4028     BondedInteractions{ unimplemented, -1 },                       // F_POSRES
4029     BondedInteractions{ unimplemented, -1 },                       // F_FBPOSRES
4030     BondedInteractions{ ta_disres, eNR_DISRES },                   // F_DISRES
4031     BondedInteractions{ unimplemented, -1 },                       // F_DISRESVIOL
4032     BondedInteractions{ orires, eNR_ORIRES },                      // F_ORIRES
4033     BondedInteractions{ unimplemented, -1 },                       // F_ORIRESDEV
4034     BondedInteractions{ angres<flavor>, eNR_ANGRES },              // F_ANGRES
4035     BondedInteractions{ angresz<flavor>, eNR_ANGRESZ },            // F_ANGRESZ
4036     BondedInteractions{ dihres<flavor>, eNR_DIHRES },              // F_DIHRES
4037     BondedInteractions{ unimplemented, -1 },                       // F_DIHRESVIOL
4038     BondedInteractions{ unimplemented, -1 },                       // F_CONSTR
4039     BondedInteractions{ unimplemented, -1 },                       // F_CONSTRNC
4040     BondedInteractions{ unimplemented, -1 },                       // F_SETTLE
4041     BondedInteractions{ unimplemented, -1 },                       // F_VSITE2
4042     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3
4043     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FD
4044     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FAD
4045     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3OUT
4046     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FD
4047     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FDN
4048     BondedInteractions{ unimplemented, -1 },                       // F_VSITEN
4049     BondedInteractions{ unimplemented, -1 },                       // F_COM_PULL
4050     BondedInteractions{ unimplemented, -1 },                       // F_DENSITYFITTING
4051     BondedInteractions{ unimplemented, -1 },                       // F_EQM
4052     BondedInteractions{ unimplemented, -1 },                       // F_EPOT
4053     BondedInteractions{ unimplemented, -1 },                       // F_EKIN
4054     BondedInteractions{ unimplemented, -1 },                       // F_ETOT
4055     BondedInteractions{ unimplemented, -1 },                       // F_ECONSERVED
4056     BondedInteractions{ unimplemented, -1 },                       // F_TEMP
4057     BondedInteractions{ unimplemented, -1 },                       // F_VTEMP_NOLONGERUSED
4058     BondedInteractions{ unimplemented, -1 },                       // F_PDISPCORR
4059     BondedInteractions{ unimplemented, -1 },                       // F_PRES
4060     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_CONSTR
4061     BondedInteractions{ unimplemented, -1 },                       // F_DVDL
4062     BondedInteractions{ unimplemented, -1 },                       // F_DKDL
4063     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_COUL
4064     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_VDW
4065     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_BONDED
4066     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_RESTRAINT
4067     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_TEMPERATURE
4068 };
4069
4070 /*! \brief List of instantiated BondedInteractions list */
4071 const gmx::EnumerationArray<BondedKernelFlavor, std::array<BondedInteractions, F_NRE>> c_bondedInteractionFunctionsPerFlavor = {
4072     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesSimdWhenAvailable>,
4073     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesNoSimd>,
4074     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>,
4075     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndEnergy>
4076 };
4077
4078 //! \endcond
4079
4080 } // namespace
4081
4082 real calculateSimpleBond(const int            ftype,
4083                          const int            numForceatoms,
4084                          const t_iatom        forceatoms[],
4085                          const t_iparams      forceparams[],
4086                          const rvec           x[],
4087                          rvec4                f[],
4088                          rvec                 fshift[],
4089                          const struct t_pbc*  pbc,
4090                          const struct t_graph gmx_unused* g,
4091                          const real                       lambda,
4092                          real*                            dvdlambda,
4093                          const t_mdatoms*                 md,
4094                          t_fcdata*                        fcd,
4095                          int gmx_unused*          global_atom_index,
4096                          const BondedKernelFlavor bondedKernelFlavor)
4097 {
4098     const BondedInteractions& bonded = c_bondedInteractionFunctionsPerFlavor[bondedKernelFlavor][ftype];
4099
4100     real v = bonded.function(numForceatoms, forceatoms, forceparams, x, f, fshift, pbc, g, lambda,
4101                              dvdlambda, md, fcd, global_atom_index);
4102
4103     return v;
4104 }
4105
4106 int nrnbIndex(int ftype)
4107 {
4108     return c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>[ftype].nrnbIndex;
4109 }