src/gromacs/listed_forces/bonded.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief This file defines low-level functions necessary for
  40  * computing energies and forces for listed interactions.
  41  *
  42  * \author Mark Abraham <mark.j.abraham@gmail.com>
  43  *
  44  * \ingroup module_listed_forces
  45  */
  46 #include "gmxpre.h"
  47
  48 #include "bonded.h"
  49
  50 #include "config.h"
  51
  52 #include <cassert>
  53 #include <cmath>
  54
  55 #include <algorithm>
  56
  57 #include "gromacs/gmxlib/nrnb.h"
  58 #include "gromacs/listed_forces/disre.h"
  59 #include "gromacs/listed_forces/orires.h"
  60 #include "gromacs/math/functions.h"
  61 #include "gromacs/math/units.h"
  62 #include "gromacs/math/utilities.h"
  63 #include "gromacs/math/vec.h"
  64 #include "gromacs/mdtypes/fcdata.h"
  65 #include "gromacs/mdtypes/mdatom.h"
  66 #include "gromacs/pbcutil/ishift.h"
  67 #include "gromacs/pbcutil/mshift.h"
  68 #include "gromacs/pbcutil/pbc.h"
  69 #include "gromacs/pbcutil/pbc_simd.h"
  70 #include "gromacs/simd/simd.h"
  71 #include "gromacs/simd/simd_math.h"
  72 #include "gromacs/simd/vector_operations.h"
  73 #include "gromacs/utility/basedefinitions.h"
  74 #include "gromacs/utility/enumerationhelpers.h"
  75 #include "gromacs/utility/fatalerror.h"
  76 #include "gromacs/utility/real.h"
  77 #include "gromacs/utility/smalloc.h"
  78
  79 #include "listed_internal.h"
  80 #include "restcbt.h"
  81
  82 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  83
  84 namespace
  85 {
  86
  87 //! Type of CPU function to compute a bonded interaction.
  88 using BondedFunction = real(*)(int nbonds, const t_iatom iatoms[],
  89                                const t_iparams iparams[],
  90                                const rvec x[], rvec4 f[], rvec fshift[],
  91                                const t_pbc *pbc, const t_graph *g,
  92                                real lambda, real *dvdlambda,
  93                                const t_mdatoms *md, t_fcdata *fcd,
  94                                int *ddgatindex);
  95
  96 /*! \brief Mysterious CMAP coefficient matrix */
  97 const int cmap_coeff_matrix[] = {
  98     1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4,
  99     0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
 100     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4,
 101     0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
 102     0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2,
 103     0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2,
 104     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
 105     0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
 106     0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2,
 107     0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
 108     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
 109     0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2,
 110     0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
 111     0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
 112     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
 113     0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
 114 };
 115
 116
 117 /*! \brief Compute dx = xi - xj, modulo PBC if non-NULL
 118  *
 119  * \todo This kind of code appears in many places. Consolidate it */
 120 int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
 121 {
 122     if (pbc)
 123     {
 124         return pbc_dx_aiuc(pbc, xi, xj, dx);
 125     }
 126     else
 127     {
 128         rvec_sub(xi, xj, dx);
 129         return CENTRAL;
 130     }
 131 }
 132
 133 } // namespace
 134
 135 //! \cond
 136 real bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
 137                 rvec r_ij, rvec r_kj, real *costh,
 138                 int *t1, int *t2)
 139 /* Return value is the angle between the bonds i-j and j-k */
 140 {
 141     /* 41 FLOPS */
 142     real th;
 143
 144     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 145     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 146
 147     *costh = cos_angle(r_ij, r_kj);        /* 25                */
 148     th     = std::acos(*costh);            /* 10                */
 149     /* 41 TOTAL */
 150     return th;
 151 }
 152
 153 real dih_angle(const rvec xi, const rvec xj, const rvec xk, const rvec xl,
 154                const t_pbc *pbc,
 155                rvec r_ij, rvec r_kj, rvec r_kl, rvec m, rvec n,
 156                int *t1, int *t2, int *t3)
 157 {
 158     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
 159     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 160     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
 161
 162     cprod(r_ij, r_kj, m);                  /*  9        */
 163     cprod(r_kj, r_kl, n);                  /*  9                */
 164     real phi  = gmx_angle(m, n);           /* 49 (assuming 25 for atan2) */
 165     real ipr  = iprod(r_ij, n);            /*  5        */
 166     real sign = (ipr < 0.0) ? -1.0 : 1.0;
 167     phi       = sign*phi;                  /*  1                */
 168     /* 82 TOTAL */
 169     return phi;
 170 }
 171 //! \endcond
 172
 173 void make_dp_periodic(real *dp)  /* 1 flop? */
 174 {
 175     /* dp cannot be outside (-pi,pi) */
 176     if (*dp >= M_PI)
 177     {
 178         *dp -= 2*M_PI;
 179     }
 180     else if (*dp < -M_PI)
 181     {
 182         *dp += 2*M_PI;
 183     }
 184 }
 185
 186 namespace
 187 {
 188
 189 /*! \brief Morse potential bond
 190  *
 191  * By Frank Everdij. Three parameters needed:
 192  *
 193  * b0 = equilibrium distance in nm
 194  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 195  * cb = well depth in kJ/mol
 196  *
 197  * Note: the potential is referenced to be +cb at infinite separation
 198  *       and zero at the equilibrium distance!
 199  */
 200 real morse_bonds(int nbonds,
 201                  const t_iatom forceatoms[], const t_iparams forceparams[],
 202                  const rvec x[], rvec4 f[], rvec fshift[],
 203                  const t_pbc *pbc, const t_graph *g,
 204                  real lambda, real *dvdlambda,
 205                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 206                  int gmx_unused *global_atom_index)
 207 {
 208     const real one = 1.0;
 209     const real two = 2.0;
 210     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, fij, vtot;
 211     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 212     rvec       dx;
 213     int        i, m, ki, type, ai, aj;
 214     ivec       dt;
 215
 216     vtot = 0.0;
 217     for (i = 0; (i < nbonds); )
 218     {
 219         type = forceatoms[i++];
 220         ai   = forceatoms[i++];
 221         aj   = forceatoms[i++];
 222
 223         b0A   = forceparams[type].morse.b0A;
 224         beA   = forceparams[type].morse.betaA;
 225         cbA   = forceparams[type].morse.cbA;
 226
 227         b0B   = forceparams[type].morse.b0B;
 228         beB   = forceparams[type].morse.betaB;
 229         cbB   = forceparams[type].morse.cbB;
 230
 231         L1 = one-lambda;                            /* 1 */
 232         b0 = L1*b0A + lambda*b0B;                   /* 3 */
 233         be = L1*beA + lambda*beB;                   /* 3 */
 234         cb = L1*cbA + lambda*cbB;                   /* 3 */
 235
 236         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 237         dr2  = iprod(dx, dx);                       /*   5          */
 238         dr   = dr2*gmx::invsqrt(dr2);               /*  10          */
 239         temp = std::exp(-be*(dr-b0));               /*  12          */
 240
 241         if (temp == one)
 242         {
 243             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 244             *dvdlambda += cbB-cbA;
 245             continue;
 246         }
 247
 248         omtemp    = one-temp;                                                                                        /*   1          */
 249         cbomtemp  = cb*omtemp;                                                                                       /*   1          */
 250         vbond     = cbomtemp*omtemp;                                                                                 /*   1          */
 251         fbond     = -two*be*temp*cbomtemp*gmx::invsqrt(dr2);                                                         /*   9          */
 252         vtot     += vbond;                                                                                           /*   1          */
 253
 254         *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
 255
 256         if (g)
 257         {
 258             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 259             ki = IVEC2IS(dt);
 260         }
 261
 262         for (m = 0; (m < DIM); m++)                    /*  15          */
 263         {
 264             fij                 = fbond*dx[m];
 265             f[ai][m]           += fij;
 266             f[aj][m]           -= fij;
 267             fshift[ki][m]      += fij;
 268             fshift[CENTRAL][m] -= fij;
 269         }
 270     }                                         /*  83 TOTAL    */
 271     return vtot;
 272 }
 273
 274 //! \cond
 275 real cubic_bonds(int nbonds,
 276                  const t_iatom forceatoms[], const t_iparams forceparams[],
 277                  const rvec x[], rvec4 f[], rvec fshift[],
 278                  const t_pbc *pbc, const t_graph *g,
 279                  real gmx_unused lambda, real gmx_unused *dvdlambda,
 280                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 281                  int gmx_unused *global_atom_index)
 282 {
 283     const real three = 3.0;
 284     const real two   = 2.0;
 285     real       kb, b0, kcub;
 286     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, fij, vtot;
 287     rvec       dx;
 288     int        i, m, ki, type, ai, aj;
 289     ivec       dt;
 290
 291     vtot = 0.0;
 292     for (i = 0; (i < nbonds); )
 293     {
 294         type = forceatoms[i++];
 295         ai   = forceatoms[i++];
 296         aj   = forceatoms[i++];
 297
 298         b0   = forceparams[type].cubic.b0;
 299         kb   = forceparams[type].cubic.kb;
 300         kcub = forceparams[type].cubic.kcub;
 301
 302         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 303         dr2  = iprod(dx, dx);                           /*   5          */
 304
 305         if (dr2 == 0.0)
 306         {
 307             continue;
 308         }
 309
 310         dr         = dr2*gmx::invsqrt(dr2);                  /*  10          */
 311         dist       = dr-b0;
 312         kdist      = kb*dist;
 313         kdist2     = kdist*dist;
 314
 315         vbond      = kdist2 + kcub*kdist2*dist;
 316         fbond      = -(two*kdist + three*kdist2*kcub)/dr;
 317
 318         vtot      += vbond;   /* 21 */
 319
 320         if (g)
 321         {
 322             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 323             ki = IVEC2IS(dt);
 324         }
 325         for (m = 0; (m < DIM); m++)                    /*  15          */
 326         {
 327             fij                 = fbond*dx[m];
 328             f[ai][m]           += fij;
 329             f[aj][m]           -= fij;
 330             fshift[ki][m]      += fij;
 331             fshift[CENTRAL][m] -= fij;
 332         }
 333     }                                         /*  54 TOTAL    */
 334     return vtot;
 335 }
 336
 337 real FENE_bonds(int nbonds,
 338                 const t_iatom forceatoms[], const t_iparams forceparams[],
 339                 const rvec x[], rvec4 f[], rvec fshift[],
 340                 const t_pbc *pbc, const t_graph *g,
 341                 real gmx_unused lambda, real gmx_unused *dvdlambda,
 342                 const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 343                 int *global_atom_index)
 344 {
 345     const real half = 0.5;
 346     const real one  = 1.0;
 347     real       bm, kb;
 348     real       dr2, bm2, omdr2obm2, fbond, vbond, fij, vtot;
 349     rvec       dx;
 350     int        i, m, ki, type, ai, aj;
 351     ivec       dt;
 352
 353     vtot = 0.0;
 354     for (i = 0; (i < nbonds); )
 355     {
 356         type = forceatoms[i++];
 357         ai   = forceatoms[i++];
 358         aj   = forceatoms[i++];
 359
 360         bm   = forceparams[type].fene.bm;
 361         kb   = forceparams[type].fene.kb;
 362
 363         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 364         dr2  = iprod(dx, dx);                           /*   5          */
 365
 366         if (dr2 == 0.0)
 367         {
 368             continue;
 369         }
 370
 371         bm2 = bm*bm;
 372
 373         if (dr2 >= bm2)
 374         {
 375             gmx_fatal(FARGS,
 376                       "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
 377                       dr2, bm2,
 378                       glatnr(global_atom_index, ai),
 379                       glatnr(global_atom_index, aj));
 380         }
 381
 382         omdr2obm2  = one - dr2/bm2;
 383
 384         vbond      = -half*kb*bm2*std::log(omdr2obm2);
 385         fbond      = -kb/omdr2obm2;
 386
 387         vtot      += vbond;   /* 35 */
 388
 389         if (g)
 390         {
 391             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 392             ki = IVEC2IS(dt);
 393         }
 394         for (m = 0; (m < DIM); m++)                    /*  15          */
 395         {
 396             fij                 = fbond*dx[m];
 397             f[ai][m]           += fij;
 398             f[aj][m]           -= fij;
 399             fshift[ki][m]      += fij;
 400             fshift[CENTRAL][m] -= fij;
 401         }
 402     }                                         /*  58 TOTAL    */
 403     return vtot;
 404 }
 405
 406 /*! \brief Computes the potential and force for a harmonic potential with free-energy perturbation */
 407 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
 408               real *V, real *F)
 409 {
 410     const real half = 0.5;
 411     real       L1, kk, x0, dx, dx2;
 412     real       v, f, dvdlambda;
 413
 414     L1    = 1.0-lambda;
 415     kk    = L1*kA+lambda*kB;
 416     x0    = L1*xA+lambda*xB;
 417
 418     dx    = x-x0;
 419     dx2   = dx*dx;
 420
 421     f          = -kk*dx;
 422     v          = half*kk*dx2;
 423     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
 424
 425     *F    = f;
 426     *V    = v;
 427
 428     return dvdlambda;
 429
 430     /* That was 19 flops */
 431 }
 432
 433
 434 real bonds(int nbonds,
 435            const t_iatom forceatoms[], const t_iparams forceparams[],
 436            const rvec x[], rvec4 f[], rvec fshift[],
 437            const t_pbc *pbc, const t_graph *g,
 438            real lambda, real *dvdlambda,
 439            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 440            int gmx_unused *global_atom_index)
 441 {
 442     int  i, m, ki, ai, aj, type;
 443     real dr, dr2, fbond, vbond, fij, vtot;
 444     rvec dx;
 445     ivec dt;
 446
 447     vtot = 0.0;
 448     for (i = 0; (i < nbonds); )
 449     {
 450         type = forceatoms[i++];
 451         ai   = forceatoms[i++];
 452         aj   = forceatoms[i++];
 453
 454         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 455         dr2  = iprod(dx, dx);                       /*   5              */
 456         dr   = std::sqrt(dr2);                      /*  10              */
 457
 458         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 459                                forceparams[type].harmonic.krB,
 460                                forceparams[type].harmonic.rA,
 461                                forceparams[type].harmonic.rB,
 462                                dr, lambda, &vbond, &fbond); /*  19  */
 463
 464         if (dr2 == 0.0)
 465         {
 466             continue;
 467         }
 468
 469
 470         vtot  += vbond;             /* 1*/
 471         fbond *= gmx::invsqrt(dr2); /*   6              */
 472         if (g)
 473         {
 474             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 475             ki = IVEC2IS(dt);
 476         }
 477         for (m = 0; (m < DIM); m++)     /*  15          */
 478         {
 479             fij                 = fbond*dx[m];
 480             f[ai][m]           += fij;
 481             f[aj][m]           -= fij;
 482             fshift[ki][m]      += fij;
 483             fshift[CENTRAL][m] -= fij;
 484         }
 485     }               /* 59 TOTAL */
 486     return vtot;
 487 }
 488
 489 real restraint_bonds(int nbonds,
 490                      const t_iatom forceatoms[], const t_iparams forceparams[],
 491                      const rvec x[], rvec4 f[], rvec fshift[],
 492                      const t_pbc *pbc, const t_graph *g,
 493                      real lambda, real *dvdlambda,
 494                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 495                      int gmx_unused *global_atom_index)
 496 {
 497     int  i, m, ki, ai, aj, type;
 498     real dr, dr2, fbond, vbond, fij, vtot;
 499     real L1;
 500     real low, dlow, up1, dup1, up2, dup2, k, dk;
 501     real drh, drh2;
 502     rvec dx;
 503     ivec dt;
 504
 505     L1   = 1.0 - lambda;
 506
 507     vtot = 0.0;
 508     for (i = 0; (i < nbonds); )
 509     {
 510         type = forceatoms[i++];
 511         ai   = forceatoms[i++];
 512         aj   = forceatoms[i++];
 513
 514         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 515         dr2  = iprod(dx, dx);                       /*   5              */
 516         dr   = dr2*gmx::invsqrt(dr2);               /*  10              */
 517
 518         low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
 519         dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
 520         up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
 521         dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
 522         up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
 523         dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
 524         k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
 525         dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
 526         /* 24 */
 527
 528         if (dr < low)
 529         {
 530             drh         = dr - low;
 531             drh2        = drh*drh;
 532             vbond       = 0.5*k*drh2;
 533             fbond       = -k*drh;
 534             *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
 535         }   /* 11 */
 536         else if (dr <= up1)
 537         {
 538             vbond = 0;
 539             fbond = 0;
 540         }
 541         else if (dr <= up2)
 542         {
 543             drh         = dr - up1;
 544             drh2        = drh*drh;
 545             vbond       = 0.5*k*drh2;
 546             fbond       = -k*drh;
 547             *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
 548         }   /* 11       */
 549         else
 550         {
 551             drh         = dr - up2;
 552             vbond       = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
 553             fbond       = -k*(up2 - up1);
 554             *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
 555                 + k*(dup2 - dup1)*(up2 - up1 + drh)
 556                 - k*(up2 - up1)*dup2;
 557         }
 558
 559         if (dr2 == 0.0)
 560         {
 561             continue;
 562         }
 563
 564         vtot  += vbond;             /* 1*/
 565         fbond *= gmx::invsqrt(dr2); /*   6              */
 566         if (g)
 567         {
 568             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 569             ki = IVEC2IS(dt);
 570         }
 571         for (m = 0; (m < DIM); m++)             /*  15          */
 572         {
 573             fij                 = fbond*dx[m];
 574             f[ai][m]           += fij;
 575             f[aj][m]           -= fij;
 576             fshift[ki][m]      += fij;
 577             fshift[CENTRAL][m] -= fij;
 578         }
 579     }                   /* 59 TOTAL     */
 580
 581     return vtot;
 582 }
 583
 584 real polarize(int nbonds,
 585               const t_iatom forceatoms[], const t_iparams forceparams[],
 586               const rvec x[], rvec4 f[], rvec fshift[],
 587               const t_pbc *pbc, const t_graph *g,
 588               real lambda, real *dvdlambda,
 589               const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 590               int gmx_unused *global_atom_index)
 591 {
 592     int  i, m, ki, ai, aj, type;
 593     real dr, dr2, fbond, vbond, fij, vtot, ksh;
 594     rvec dx;
 595     ivec dt;
 596
 597     vtot = 0.0;
 598     for (i = 0; (i < nbonds); )
 599     {
 600         type = forceatoms[i++];
 601         ai   = forceatoms[i++];
 602         aj   = forceatoms[i++];
 603         ksh  = gmx::square(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
 604
 605         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 606         dr2  = iprod(dx, dx);                                               /*   5              */
 607         dr   = std::sqrt(dr2);                                              /*  10              */
 608
 609         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 610
 611         if (dr2 == 0.0)
 612         {
 613             continue;
 614         }
 615
 616         vtot  += vbond;             /* 1*/
 617         fbond *= gmx::invsqrt(dr2); /*   6              */
 618
 619         if (g)
 620         {
 621             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 622             ki = IVEC2IS(dt);
 623         }
 624         for (m = 0; (m < DIM); m++)     /*  15          */
 625         {
 626             fij                 = fbond*dx[m];
 627             f[ai][m]           += fij;
 628             f[aj][m]           -= fij;
 629             fshift[ki][m]      += fij;
 630             fshift[CENTRAL][m] -= fij;
 631         }
 632     }               /* 59 TOTAL */
 633     return vtot;
 634 }
 635
 636 real anharm_polarize(int nbonds,
 637                      const t_iatom forceatoms[], const t_iparams forceparams[],
 638                      const rvec x[], rvec4 f[], rvec fshift[],
 639                      const t_pbc *pbc, const t_graph *g,
 640                      real lambda, real *dvdlambda,
 641                      const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 642                      int gmx_unused *global_atom_index)
 643 {
 644     int  i, m, ki, ai, aj, type;
 645     real dr, dr2, fbond, vbond, fij, vtot, ksh, khyp, drcut, ddr, ddr3;
 646     rvec dx;
 647     ivec dt;
 648
 649     vtot = 0.0;
 650     for (i = 0; (i < nbonds); )
 651     {
 652         type  = forceatoms[i++];
 653         ai    = forceatoms[i++];
 654         aj    = forceatoms[i++];
 655         ksh   = gmx::square(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
 656         khyp  = forceparams[type].anharm_polarize.khyp;
 657         drcut = forceparams[type].anharm_polarize.drcut;
 658
 659         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 660         dr2  = iprod(dx, dx);                                               /*   5              */
 661         dr   = dr2*gmx::invsqrt(dr2);                                       /*  10              */
 662
 663         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 664
 665         if (dr2 == 0.0)
 666         {
 667             continue;
 668         }
 669
 670         if (dr > drcut)
 671         {
 672             ddr    = dr-drcut;
 673             ddr3   = ddr*ddr*ddr;
 674             vbond += khyp*ddr*ddr3;
 675             fbond -= 4*khyp*ddr3;
 676         }
 677         fbond *= gmx::invsqrt(dr2); /*   6              */
 678         vtot  += vbond;             /* 1*/
 679
 680         if (g)
 681         {
 682             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 683             ki = IVEC2IS(dt);
 684         }
 685         for (m = 0; (m < DIM); m++)     /*  15          */
 686         {
 687             fij                 = fbond*dx[m];
 688             f[ai][m]           += fij;
 689             f[aj][m]           -= fij;
 690             fshift[ki][m]      += fij;
 691             fshift[CENTRAL][m] -= fij;
 692         }
 693     }               /* 72 TOTAL */
 694     return vtot;
 695 }
 696
 697 real water_pol(int nbonds,
 698                const t_iatom forceatoms[], const t_iparams forceparams[],
 699                const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
 700                const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
 701                real gmx_unused lambda, real gmx_unused *dvdlambda,
 702                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 703                int gmx_unused *global_atom_index)
 704 {
 705     /* This routine implements anisotropic polarizibility for water, through
 706      * a shell connected to a dummy with spring constant that differ in the
 707      * three spatial dimensions in the molecular frame.
 708      */
 709     int  i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
 710     ivec dt;
 711     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 712     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 713
 714     vtot = 0.0;
 715     if (nbonds > 0)
 716     {
 717         type0  = forceatoms[0];
 718         aS     = forceatoms[5];
 719         qS     = md->chargeA[aS];
 720         kk[XX] = gmx::square(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
 721         kk[YY] = gmx::square(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
 722         kk[ZZ] = gmx::square(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
 723         r_HH   = 1.0/forceparams[type0].wpol.rHH;
 724         for (i = 0; (i < nbonds); i += 6)
 725         {
 726             type = forceatoms[i];
 727             if (type != type0)
 728             {
 729                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d",
 730                           type, type0, __FILE__, __LINE__);
 731             }
 732             aO   = forceatoms[i+1];
 733             aH1  = forceatoms[i+2];
 734             aH2  = forceatoms[i+3];
 735             aD   = forceatoms[i+4];
 736             aS   = forceatoms[i+5];
 737
 738             /* Compute vectors describing the water frame */
 739             pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
 740             pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
 741             pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
 742             pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
 743             ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
 744             cprod(dOH1, dOH2, nW);
 745
 746             /* Compute inverse length of normal vector
 747              * (this one could be precomputed, but I'm too lazy now)
 748              */
 749             r_nW = gmx::invsqrt(iprod(nW, nW));
 750             /* This is for precision, but does not make a big difference,
 751              * it can go later.
 752              */
 753             r_OD = gmx::invsqrt(iprod(dOD, dOD));
 754
 755             /* Normalize the vectors in the water frame */
 756             svmul(r_nW, nW, nW);
 757             svmul(r_HH, dHH, dHH);
 758             svmul(r_OD, dOD, dOD);
 759
 760             /* Compute displacement of shell along components of the vector */
 761             dx[ZZ] = iprod(dDS, dOD);
 762             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 763             for (m = 0; (m < DIM); m++)
 764             {
 765                 proj[m] = dDS[m]-dx[ZZ]*dOD[m];
 766             }
 767
 768             /*dx[XX] = iprod(dDS,nW);
 769                dx[YY] = iprod(dDS,dHH);*/
 770             dx[XX] = iprod(proj, nW);
 771             for (m = 0; (m < DIM); m++)
 772             {
 773                 proj[m] -= dx[XX]*nW[m];
 774             }
 775             dx[YY] = iprod(proj, dHH);
 776             /* Now compute the forces and energy */
 777             kdx[XX] = kk[XX]*dx[XX];
 778             kdx[YY] = kk[YY]*dx[YY];
 779             kdx[ZZ] = kk[ZZ]*dx[ZZ];
 780             vtot   += iprod(dx, kdx);
 781
 782             if (g)
 783             {
 784                 ivec_sub(SHIFT_IVEC(g, aS), SHIFT_IVEC(g, aD), dt);
 785                 ki = IVEC2IS(dt);
 786             }
 787
 788             for (m = 0; (m < DIM); m++)
 789             {
 790                 /* This is a tensor operation but written out for speed */
 791                 tx                  =  nW[m]*kdx[XX];
 792                 ty                  = dHH[m]*kdx[YY];
 793                 tz                  = dOD[m]*kdx[ZZ];
 794                 fij                 = -tx-ty-tz;
 795                 f[aS][m]           += fij;
 796                 f[aD][m]           -= fij;
 797                 fshift[ki][m]      += fij;
 798                 fshift[CENTRAL][m] -= fij;
 799             }
 800         }
 801     }
 802     return 0.5*vtot;
 803 }
 804
 805 real do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj,
 806                 const t_pbc *pbc, real qq,
 807                 rvec fshift[], real afac)
 808 {
 809     rvec r12;
 810     real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff;
 811     int  m, t;
 812
 813     t      = pbc_rvec_sub(pbc, xi, xj, r12);                      /*  3 */
 814
 815     r12sq  = iprod(r12, r12);                                     /*  5 */
 816     r12_1  = gmx::invsqrt(r12sq);                                 /*  5 */
 817     r12bar = afac/r12_1;                                          /*  5 */
 818     v0     = qq*ONE_4PI_EPS0*r12_1;                               /*  2 */
 819     ebar   = std::exp(-r12bar);                                   /*  5 */
 820     v1     = (1-(1+0.5*r12bar)*ebar);                             /*  4 */
 821     fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
 822
 823     for (m = 0; (m < DIM); m++)
 824     {
 825         fff                 = fscal*r12[m];
 826         fi[m]              += fff;
 827         fj[m]              -= fff;
 828         fshift[t][m]       += fff;
 829         fshift[CENTRAL][m] -= fff;
 830     }             /* 15 */
 831
 832     return v0*v1; /* 1 */
 833     /* 54 */
 834 }
 835
 836 real thole_pol(int nbonds,
 837                const t_iatom forceatoms[], const t_iparams forceparams[],
 838                const rvec x[], rvec4 f[], rvec fshift[],
 839                const t_pbc *pbc, const t_graph gmx_unused *g,
 840                real gmx_unused lambda, real gmx_unused *dvdlambda,
 841                const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 842                int gmx_unused *global_atom_index)
 843 {
 844     /* Interaction between two pairs of particles with opposite charge */
 845     int        i, type, a1, da1, a2, da2;
 846     real       q1, q2, qq, a, al1, al2, afac;
 847     real       V             = 0;
 848
 849     for (i = 0; (i < nbonds); )
 850     {
 851         type  = forceatoms[i++];
 852         a1    = forceatoms[i++];
 853         da1   = forceatoms[i++];
 854         a2    = forceatoms[i++];
 855         da2   = forceatoms[i++];
 856         q1    = md->chargeA[da1];
 857         q2    = md->chargeA[da2];
 858         a     = forceparams[type].thole.a;
 859         al1   = forceparams[type].thole.alpha1;
 860         al2   = forceparams[type].thole.alpha2;
 861         qq    = q1*q2;
 862         afac  = a*gmx::invsixthroot(al1*al2);
 863         V    += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 864         V    += do_1_thole(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 865         V    += do_1_thole(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 866         V    += do_1_thole(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 867     }
 868     /* 290 flops */
 869     return V;
 870 }
 871
 872 template <BondedKernelFlavor flavor>
 873 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
 874 angles(int nbonds,
 875        const t_iatom forceatoms[], const t_iparams forceparams[],
 876        const rvec x[], rvec4 f[], rvec fshift[],
 877        const t_pbc *pbc, const t_graph *g,
 878        real lambda, real *dvdlambda,
 879        const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 880        int gmx_unused *global_atom_index)
 881 {
 882     int  i, ai, aj, ak, t1, t2, type;
 883     rvec r_ij, r_kj;
 884     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
 885     ivec jt, dt_ij, dt_kj;
 886
 887     vtot = 0.0;
 888     for (i = 0; i < nbonds; )
 889     {
 890         type = forceatoms[i++];
 891         ai   = forceatoms[i++];
 892         aj   = forceatoms[i++];
 893         ak   = forceatoms[i++];
 894
 895         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
 896                             r_ij, r_kj, &cos_theta, &t1, &t2);  /*  41          */
 897
 898         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 899                                forceparams[type].harmonic.krB,
 900                                forceparams[type].harmonic.rA*DEG2RAD,
 901                                forceparams[type].harmonic.rB*DEG2RAD,
 902                                theta, lambda, &va, &dVdt);  /*  21  */
 903         vtot += va;
 904
 905         cos_theta2 = gmx::square(cos_theta);
 906         if (cos_theta2 < 1)
 907         {
 908             int  m;
 909             real st, sth;
 910             real cik, cii, ckk;
 911             real nrkj2, nrij2;
 912             real nrkj_1, nrij_1;
 913             rvec f_i, f_j, f_k;
 914
 915             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
 916             sth   = st*cos_theta;                      /*   1           */
 917             nrij2 = iprod(r_ij, r_ij);                 /*   5           */
 918             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
 919
 920             nrij_1 = gmx::invsqrt(nrij2);              /*  10           */
 921             nrkj_1 = gmx::invsqrt(nrkj2);              /*  10           */
 922
 923             cik = st*nrij_1*nrkj_1;                    /*   2           */
 924             cii = sth*nrij_1*nrij_1;                   /*   2           */
 925             ckk = sth*nrkj_1*nrkj_1;                   /*   2           */
 926
 927             for (m = 0; m < DIM; m++)
 928             {           /*  39          */
 929                 f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
 930                 f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
 931                 f_j[m]    = -f_i[m] - f_k[m];
 932                 f[ai][m] += f_i[m];
 933                 f[aj][m] += f_j[m];
 934                 f[ak][m] += f_k[m];
 935             }
 936             if (g != nullptr)
 937             {
 938                 copy_ivec(SHIFT_IVEC(g, aj), jt);
 939
 940                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
 941                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
 942                 t1 = IVEC2IS(dt_ij);
 943                 t2 = IVEC2IS(dt_kj);
 944             }
 945             rvec_inc(fshift[t1], f_i);
 946             rvec_inc(fshift[CENTRAL], f_j);
 947             rvec_inc(fshift[t2], f_k);
 948         }                                           /* 161 TOTAL        */
 949     }
 950
 951     return vtot;
 952 }
 953
 954 #if GMX_SIMD_HAVE_REAL
 955
 956 /* As angles, but using SIMD to calculate many angles at once.
 957  * This routines does not calculate energies and shift forces.
 958  */
 959 template <BondedKernelFlavor flavor>
 960 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
 961 angles(int nbonds,
 962        const t_iatom forceatoms[], const t_iparams forceparams[],
 963        const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
 964        const t_pbc *pbc, const t_graph gmx_unused *g,
 965        real gmx_unused lambda, real gmx_unused *dvdlambda,
 966        const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 967        int gmx_unused *global_atom_index)
 968 {
 969     const int            nfa1 = 4;
 970     int                  i, iu, s;
 971     int                  type;
 972     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ai[GMX_SIMD_REAL_WIDTH];
 973     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    aj[GMX_SIMD_REAL_WIDTH];
 974     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ak[GMX_SIMD_REAL_WIDTH];
 975     alignas(GMX_SIMD_ALIGNMENT) real            coeff[2*GMX_SIMD_REAL_WIDTH];
 976     SimdReal             deg2rad_S(DEG2RAD);
 977     SimdReal             xi_S, yi_S, zi_S;
 978     SimdReal             xj_S, yj_S, zj_S;
 979     SimdReal             xk_S, yk_S, zk_S;
 980     SimdReal             k_S, theta0_S;
 981     SimdReal             rijx_S, rijy_S, rijz_S;
 982     SimdReal             rkjx_S, rkjy_S, rkjz_S;
 983     SimdReal             one_S(1.0);
 984     SimdReal             min_one_plus_eps_S(-1.0 + 2.0*GMX_REAL_EPS); // Smallest number > -1
 985
 986     SimdReal             rij_rkj_S;
 987     SimdReal             nrij2_S, nrij_1_S;
 988     SimdReal             nrkj2_S, nrkj_1_S;
 989     SimdReal             cos_S, invsin_S;
 990     SimdReal             theta_S;
 991     SimdReal             st_S, sth_S;
 992     SimdReal             cik_S, cii_S, ckk_S;
 993     SimdReal             f_ix_S, f_iy_S, f_iz_S;
 994     SimdReal             f_kx_S, f_ky_S, f_kz_S;
 995     alignas(GMX_SIMD_ALIGNMENT) real    pbc_simd[9*GMX_SIMD_REAL_WIDTH];
 996
 997     set_pbc_simd(pbc, pbc_simd);
 998
 999     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1000     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
1001     {
1002         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1003          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1004          */
1005         iu = i;
1006         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1007         {
1008             type  = forceatoms[iu];
1009             ai[s] = forceatoms[iu+1];
1010             aj[s] = forceatoms[iu+2];
1011             ak[s] = forceatoms[iu+3];
1012
1013             /* At the end fill the arrays with the last atoms and 0 params */
1014             if (i + s*nfa1 < nbonds)
1015             {
1016                 coeff[s]                     = forceparams[type].harmonic.krA;
1017                 coeff[GMX_SIMD_REAL_WIDTH+s] = forceparams[type].harmonic.rA;
1018
1019                 if (iu + nfa1 < nbonds)
1020                 {
1021                     iu += nfa1;
1022                 }
1023             }
1024             else
1025             {
1026                 coeff[s]                     = 0;
1027                 coeff[GMX_SIMD_REAL_WIDTH+s] = 0;
1028             }
1029         }
1030
1031         /* Store the non PBC corrected distances packed and aligned */
1032         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi_S, &yi_S, &zi_S);
1033         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj_S, &yj_S, &zj_S);
1034         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ak, &xk_S, &yk_S, &zk_S);
1035         rijx_S = xi_S - xj_S;
1036         rijy_S = yi_S - yj_S;
1037         rijz_S = zi_S - zj_S;
1038         rkjx_S = xk_S - xj_S;
1039         rkjy_S = yk_S - yj_S;
1040         rkjz_S = zk_S - zj_S;
1041
1042         k_S       = load<SimdReal>(coeff);
1043         theta0_S  = load<SimdReal>(coeff+GMX_SIMD_REAL_WIDTH) * deg2rad_S;
1044
1045         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1046         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1047
1048         rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S,
1049                           rkjx_S, rkjy_S, rkjz_S);
1050
1051         nrij2_S   = norm2(rijx_S, rijy_S, rijz_S);
1052         nrkj2_S   = norm2(rkjx_S, rkjy_S, rkjz_S);
1053
1054         nrij_1_S  = invsqrt(nrij2_S);
1055         nrkj_1_S  = invsqrt(nrkj2_S);
1056
1057         cos_S     = rij_rkj_S * nrij_1_S * nrkj_1_S;
1058
1059         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1060          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1061          * This also ensures that rounding errors would cause the argument
1062          * of simdAcos to be < -1.
1063          * Note that we do not take precautions for cos(0)=1, so the outer
1064          * atoms in an angle should not be on top of each other.
1065          */
1066         cos_S     = max(cos_S, min_one_plus_eps_S);
1067
1068         theta_S   = acos(cos_S);
1069
1070         invsin_S  = invsqrt( one_S - cos_S * cos_S );
1071
1072         st_S      = k_S * (theta0_S - theta_S) * invsin_S;
1073         sth_S     = st_S * cos_S;
1074
1075         cik_S     = st_S  * nrij_1_S * nrkj_1_S;
1076         cii_S     = sth_S * nrij_1_S * nrij_1_S;
1077         ckk_S     = sth_S * nrkj_1_S * nrkj_1_S;
1078
1079         f_ix_S    = cii_S * rijx_S;
1080         f_ix_S    = fnma(cik_S, rkjx_S, f_ix_S);
1081         f_iy_S    = cii_S * rijy_S;
1082         f_iy_S    = fnma(cik_S, rkjy_S, f_iy_S);
1083         f_iz_S    = cii_S * rijz_S;
1084         f_iz_S    = fnma(cik_S, rkjz_S, f_iz_S);
1085         f_kx_S    = ckk_S * rkjx_S;
1086         f_kx_S    = fnma(cik_S, rijx_S, f_kx_S);
1087         f_ky_S    = ckk_S * rkjy_S;
1088         f_ky_S    = fnma(cik_S, rijy_S, f_ky_S);
1089         f_kz_S    = ckk_S * rkjz_S;
1090         f_kz_S    = fnma(cik_S, rijz_S, f_kz_S);
1091
1092         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1093         transposeScatterDecrU<4>(reinterpret_cast<real *>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S, f_iz_S + f_kz_S);
1094         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1095     }
1096
1097     return 0;
1098 }
1099
1100 #endif // GMX_SIMD_HAVE_REAL
1101
1102 real linear_angles(int nbonds,
1103                    const t_iatom forceatoms[], const t_iparams forceparams[],
1104                    const rvec x[], rvec4 f[], rvec fshift[],
1105                    const t_pbc *pbc, const t_graph *g,
1106                    real lambda, real *dvdlambda,
1107                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1108                    int gmx_unused *global_atom_index)
1109 {
1110     int  i, m, ai, aj, ak, t1, t2, type;
1111     rvec f_i, f_j, f_k;
1112     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1113     ivec jt, dt_ij, dt_kj;
1114     rvec r_ij, r_kj, r_ik, dx;
1115
1116     L1   = 1-lambda;
1117     vtot = 0.0;
1118     for (i = 0; (i < nbonds); )
1119     {
1120         type = forceatoms[i++];
1121         ai   = forceatoms[i++];
1122         aj   = forceatoms[i++];
1123         ak   = forceatoms[i++];
1124
1125         kA   = forceparams[type].linangle.klinA;
1126         kB   = forceparams[type].linangle.klinB;
1127         klin = L1*kA + lambda*kB;
1128
1129         aA   = forceparams[type].linangle.aA;
1130         aB   = forceparams[type].linangle.aB;
1131         a    = L1*aA+lambda*aB;
1132         b    = 1-a;
1133
1134         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1135         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1136         rvec_sub(r_ij, r_kj, r_ik);
1137
1138         dr2 = 0;
1139         for (m = 0; (m < DIM); m++)
1140         {
1141             dr        = -a * r_ij[m] - b * r_kj[m];
1142             dr2      += dr*dr;
1143             dx[m]     = dr;
1144             f_i[m]    = a*klin*dr;
1145             f_k[m]    = b*klin*dr;
1146             f_j[m]    = -(f_i[m]+f_k[m]);
1147             f[ai][m] += f_i[m];
1148             f[aj][m] += f_j[m];
1149             f[ak][m] += f_k[m];
1150         }
1151         va          = 0.5*klin*dr2;
1152         *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx, r_ik);
1153
1154         vtot += va;
1155
1156         if (g)
1157         {
1158             copy_ivec(SHIFT_IVEC(g, aj), jt);
1159
1160             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1161             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1162             t1 = IVEC2IS(dt_ij);
1163             t2 = IVEC2IS(dt_kj);
1164         }
1165         rvec_inc(fshift[t1], f_i);
1166         rvec_inc(fshift[CENTRAL], f_j);
1167         rvec_inc(fshift[t2], f_k);
1168     }                                         /* 57 TOTAL       */
1169     return vtot;
1170 }
1171
1172 template <BondedKernelFlavor flavor>
1173 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1174 urey_bradley(int nbonds,
1175              const t_iatom forceatoms[], const t_iparams forceparams[],
1176              const rvec x[], rvec4 f[], rvec fshift[],
1177              const t_pbc *pbc, const t_graph *g,
1178              real lambda, real *dvdlambda,
1179              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1180              int gmx_unused *global_atom_index)
1181 {
1182     int  i, m, ai, aj, ak, t1, t2, type, ki;
1183     rvec r_ij, r_kj, r_ik;
1184     real cos_theta, cos_theta2, theta;
1185     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1186     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1187     ivec jt, dt_ij, dt_kj, dt_ik;
1188
1189     vtot = 0.0;
1190     for (i = 0; (i < nbonds); )
1191     {
1192         type  = forceatoms[i++];
1193         ai    = forceatoms[i++];
1194         aj    = forceatoms[i++];
1195         ak    = forceatoms[i++];
1196         th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
1197         kthA  = forceparams[type].u_b.kthetaA;
1198         r13A  = forceparams[type].u_b.r13A;
1199         kUBA  = forceparams[type].u_b.kUBA;
1200         th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
1201         kthB  = forceparams[type].u_b.kthetaB;
1202         r13B  = forceparams[type].u_b.r13B;
1203         kUBB  = forceparams[type].u_b.kUBB;
1204
1205         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1206                             r_ij, r_kj, &cos_theta, &t1, &t2);                     /*  41               */
1207
1208         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1209         vtot       += va;
1210
1211         ki   = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);                               /*   3      */
1212         dr2  = iprod(r_ik, r_ik);                                                   /*   5              */
1213         dr   = dr2*gmx::invsqrt(dr2);                                               /*  10              */
1214
1215         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1216
1217         cos_theta2 = gmx::square(cos_theta);                                        /*   1              */
1218         if (cos_theta2 < 1)
1219         {
1220             real st, sth;
1221             real cik, cii, ckk;
1222             real nrkj2, nrij2;
1223             rvec f_i, f_j, f_k;
1224
1225             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
1226             sth   = st*cos_theta;                      /*   1           */
1227             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
1228             nrij2 = iprod(r_ij, r_ij);
1229
1230             cik = st*gmx::invsqrt(nrkj2*nrij2); /*  12          */
1231             cii = sth/nrij2;                    /*  10          */
1232             ckk = sth/nrkj2;                    /*  10          */
1233
1234             for (m = 0; (m < DIM); m++)         /*  39          */
1235             {
1236                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1237                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1238                 f_j[m]    = -f_i[m]-f_k[m];
1239                 f[ai][m] += f_i[m];
1240                 f[aj][m] += f_j[m];
1241                 f[ak][m] += f_k[m];
1242             }
1243             if (g)
1244             {
1245                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1246
1247                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1248                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1249                 t1 = IVEC2IS(dt_ij);
1250                 t2 = IVEC2IS(dt_kj);
1251             }
1252             rvec_inc(fshift[t1], f_i);
1253             rvec_inc(fshift[CENTRAL], f_j);
1254             rvec_inc(fshift[t2], f_k);
1255         }                                       /* 161 TOTAL    */
1256         /* Time for the bond calculations */
1257         if (dr2 == 0.0)
1258         {
1259             continue;
1260         }
1261
1262         vtot  += vbond;             /* 1*/
1263         fbond *= gmx::invsqrt(dr2); /*   6              */
1264
1265         if (g)
1266         {
1267             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), dt_ik);
1268             ki = IVEC2IS(dt_ik);
1269         }
1270         for (m = 0; (m < DIM); m++)     /*  15          */
1271         {
1272             fik                 = fbond*r_ik[m];
1273             f[ai][m]           += fik;
1274             f[ak][m]           -= fik;
1275             fshift[ki][m]      += fik;
1276             fshift[CENTRAL][m] -= fik;
1277         }
1278     }
1279     return vtot;
1280 }
1281
1282 #if GMX_SIMD_HAVE_REAL
1283
1284 /* As urey_bradley, but using SIMD to calculate many potentials at once.
1285  * This routines does not calculate energies and shift forces.
1286  */
1287 template <BondedKernelFlavor flavor>
1288 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1289 urey_bradley(int nbonds,
1290              const t_iatom forceatoms[], const t_iparams forceparams[],
1291              const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
1292              const t_pbc *pbc, const t_graph gmx_unused *g,
1293              real gmx_unused lambda, real gmx_unused *dvdlambda,
1294              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1295              int gmx_unused *global_atom_index)
1296 {
1297     constexpr int            nfa1 = 4;
1298     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ai[GMX_SIMD_REAL_WIDTH];
1299     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    aj[GMX_SIMD_REAL_WIDTH];
1300     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ak[GMX_SIMD_REAL_WIDTH];
1301     alignas(GMX_SIMD_ALIGNMENT) real            coeff[4*GMX_SIMD_REAL_WIDTH];
1302     alignas(GMX_SIMD_ALIGNMENT) real            pbc_simd[9*GMX_SIMD_REAL_WIDTH];
1303
1304     set_pbc_simd(pbc, pbc_simd);
1305
1306     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1307     for (int i = 0; i < nbonds; i += GMX_SIMD_REAL_WIDTH*nfa1)
1308     {
1309         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1310          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1311          */
1312         int iu = i;
1313         for (int s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1314         {
1315             const int type                     = forceatoms[iu];
1316             ai[s] = forceatoms[iu+1];
1317             aj[s] = forceatoms[iu+2];
1318             ak[s] = forceatoms[iu+3];
1319
1320             /* At the end fill the arrays with the last atoms and 0 params */
1321             if (i + s*nfa1 < nbonds)
1322             {
1323                 coeff[s]                       = forceparams[type].u_b.kthetaA;
1324                 coeff[GMX_SIMD_REAL_WIDTH+s]   = forceparams[type].u_b.thetaA;
1325                 coeff[GMX_SIMD_REAL_WIDTH*2+s] = forceparams[type].u_b.kUBA;
1326                 coeff[GMX_SIMD_REAL_WIDTH*3+s] = forceparams[type].u_b.r13A;
1327
1328                 if (iu + nfa1 < nbonds)
1329                 {
1330                     iu += nfa1;
1331                 }
1332             }
1333             else
1334             {
1335                 coeff[s]                       = 0;
1336                 coeff[GMX_SIMD_REAL_WIDTH+s]   = 0;
1337                 coeff[GMX_SIMD_REAL_WIDTH*2+s] = 0;
1338                 coeff[GMX_SIMD_REAL_WIDTH*3+s] = 0;
1339             }
1340         }
1341
1342         SimdReal xi_S, yi_S, zi_S;
1343         SimdReal xj_S, yj_S, zj_S;
1344         SimdReal xk_S, yk_S, zk_S;
1345
1346         /* Store the non PBC corrected distances packed and aligned */
1347         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi_S, &yi_S, &zi_S);
1348         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj_S, &yj_S, &zj_S);
1349         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ak, &xk_S, &yk_S, &zk_S);
1350         SimdReal       rijx_S = xi_S - xj_S;
1351         SimdReal       rijy_S = yi_S - yj_S;
1352         SimdReal       rijz_S = zi_S - zj_S;
1353         SimdReal       rkjx_S = xk_S - xj_S;
1354         SimdReal       rkjy_S = yk_S - yj_S;
1355         SimdReal       rkjz_S = zk_S - zj_S;
1356         SimdReal       rikx_S = xi_S - xk_S;
1357         SimdReal       riky_S = yi_S - yk_S;
1358         SimdReal       rikz_S = zi_S - zk_S;
1359
1360         const SimdReal ktheta_S = load<SimdReal>(coeff);
1361         const SimdReal theta0_S = load<SimdReal>(coeff+GMX_SIMD_REAL_WIDTH) * DEG2RAD;
1362         const SimdReal kUB_S    = load<SimdReal>(coeff+2*GMX_SIMD_REAL_WIDTH);
1363         const SimdReal r13_S    = load<SimdReal>(coeff+3*GMX_SIMD_REAL_WIDTH);
1364
1365         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1366         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1367         pbc_correct_dx_simd(&rikx_S, &riky_S, &rikz_S, pbc_simd);
1368
1369         const SimdReal rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S,
1370                                          rkjx_S, rkjy_S, rkjz_S);
1371
1372         const SimdReal dr2_S     = iprod(rikx_S, riky_S, rikz_S,
1373                                          rikx_S, riky_S, rikz_S);
1374
1375         const SimdReal nrij2_S   = norm2(rijx_S, rijy_S, rijz_S);
1376         const SimdReal nrkj2_S   = norm2(rkjx_S, rkjy_S, rkjz_S);
1377
1378         const SimdReal nrij_1_S  = invsqrt(nrij2_S);
1379         const SimdReal nrkj_1_S  = invsqrt(nrkj2_S);
1380         const SimdReal invdr2_S  = invsqrt(dr2_S);
1381         const SimdReal dr_S      = dr2_S*invdr2_S;
1382
1383         constexpr real min_one_plus_eps = -1.0 + 2.0*GMX_REAL_EPS; // Smallest number > -1
1384
1385         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1386          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1387          * This also ensures that rounding errors would cause the argument
1388          * of simdAcos to be < -1.
1389          * Note that we do not take precautions for cos(0)=1, so the bonds
1390          * in an angle should not align at an angle of 0 degrees.
1391          */
1392         const SimdReal cos_S     = max(rij_rkj_S * nrij_1_S * nrkj_1_S, min_one_plus_eps);
1393
1394         const SimdReal theta_S   = acos(cos_S);
1395         const SimdReal invsin_S  = invsqrt( 1.0 - cos_S * cos_S );
1396         const SimdReal st_S      = ktheta_S * (theta0_S - theta_S) * invsin_S;
1397         const SimdReal sth_S     = st_S * cos_S;
1398
1399         const SimdReal cik_S     = st_S  * nrij_1_S * nrkj_1_S;
1400         const SimdReal cii_S     = sth_S * nrij_1_S * nrij_1_S;
1401         const SimdReal ckk_S     = sth_S * nrkj_1_S * nrkj_1_S;
1402
1403         const SimdReal sUB_S     = kUB_S * (r13_S - dr_S) * invdr2_S;
1404
1405         const SimdReal f_ikx_S   = sUB_S * rikx_S;
1406         const SimdReal f_iky_S   = sUB_S * riky_S;
1407         const SimdReal f_ikz_S   = sUB_S * rikz_S;
1408
1409         const SimdReal f_ix_S    = fnma(cik_S, rkjx_S, cii_S * rijx_S) + f_ikx_S;
1410         const SimdReal f_iy_S    = fnma(cik_S, rkjy_S, cii_S * rijy_S) + f_iky_S;
1411         const SimdReal f_iz_S    = fnma(cik_S, rkjz_S, cii_S * rijz_S) + f_ikz_S;
1412         const SimdReal f_kx_S    = fnma(cik_S, rijx_S, ckk_S * rkjx_S) - f_ikx_S;
1413         const SimdReal f_ky_S    = fnma(cik_S, rijy_S, ckk_S * rkjy_S) - f_iky_S;
1414         const SimdReal f_kz_S    = fnma(cik_S, rijz_S, ckk_S * rkjz_S) - f_ikz_S;
1415
1416         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1417         transposeScatterDecrU<4>(reinterpret_cast<real *>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S, f_iz_S + f_kz_S);
1418         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1419     }
1420
1421     return 0;
1422 }
1423
1424 #endif // GMX_SIMD_HAVE_REAL
1425
1426 real quartic_angles(int nbonds,
1427                     const t_iatom forceatoms[], const t_iparams forceparams[],
1428                     const rvec x[], rvec4 f[], rvec fshift[],
1429                     const t_pbc *pbc, const t_graph *g,
1430                     real gmx_unused lambda, real gmx_unused *dvdlambda,
1431                     const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1432                     int gmx_unused *global_atom_index)
1433 {
1434     int  i, j, ai, aj, ak, t1, t2, type;
1435     rvec r_ij, r_kj;
1436     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1437     ivec jt, dt_ij, dt_kj;
1438
1439     vtot = 0.0;
1440     for (i = 0; (i < nbonds); )
1441     {
1442         type = forceatoms[i++];
1443         ai   = forceatoms[i++];
1444         aj   = forceatoms[i++];
1445         ak   = forceatoms[i++];
1446
1447         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1448                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
1449
1450         dt = theta - forceparams[type].qangle.theta*DEG2RAD;   /* 2          */
1451
1452         dVdt = 0;
1453         va   = forceparams[type].qangle.c[0];
1454         dtp  = 1.0;
1455         for (j = 1; j <= 4; j++)
1456         {
1457             c     = forceparams[type].qangle.c[j];
1458             dVdt -= j*c*dtp;
1459             dtp  *= dt;
1460             va   += c*dtp;
1461         }
1462         /* 20 */
1463
1464         vtot += va;
1465
1466         cos_theta2 = gmx::square(cos_theta);            /*   1          */
1467         if (cos_theta2 < 1)
1468         {
1469             int  m;
1470             real st, sth;
1471             real cik, cii, ckk;
1472             real nrkj2, nrij2;
1473             rvec f_i, f_j, f_k;
1474
1475             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
1476             sth   = st*cos_theta;                      /*   1           */
1477             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
1478             nrij2 = iprod(r_ij, r_ij);
1479
1480             cik = st*gmx::invsqrt(nrkj2*nrij2); /*  12          */
1481             cii = sth/nrij2;                    /*  10          */
1482             ckk = sth/nrkj2;                    /*  10          */
1483
1484             for (m = 0; (m < DIM); m++)         /*  39          */
1485             {
1486                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1487                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1488                 f_j[m]    = -f_i[m]-f_k[m];
1489                 f[ai][m] += f_i[m];
1490                 f[aj][m] += f_j[m];
1491                 f[ak][m] += f_k[m];
1492             }
1493             if (g)
1494             {
1495                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1496
1497                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1498                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1499                 t1 = IVEC2IS(dt_ij);
1500                 t2 = IVEC2IS(dt_kj);
1501             }
1502             rvec_inc(fshift[t1], f_i);
1503             rvec_inc(fshift[CENTRAL], f_j);
1504             rvec_inc(fshift[t2], f_k);
1505         }                                       /* 153 TOTAL    */
1506     }
1507     return vtot;
1508 }
1509
1510
1511 #if GMX_SIMD_HAVE_REAL
1512
1513 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1514  * also calculates the pre-factor required for the dihedral force update.
1515  * Note that bv and buf should be register aligned.
1516  */
1517 inline void
1518 dih_angle_simd(const rvec *x,
1519                const int *ai, const int *aj, const int *ak, const int *al,
1520                const real *pbc_simd,
1521                SimdReal *phi_S,
1522                SimdReal *mx_S, SimdReal *my_S, SimdReal *mz_S,
1523                SimdReal *nx_S, SimdReal *ny_S, SimdReal *nz_S,
1524                SimdReal *nrkj_m2_S,
1525                SimdReal *nrkj_n2_S,
1526                SimdReal *p_S,
1527                SimdReal *q_S)
1528 {
1529     SimdReal xi_S, yi_S, zi_S;
1530     SimdReal xj_S, yj_S, zj_S;
1531     SimdReal xk_S, yk_S, zk_S;
1532     SimdReal xl_S, yl_S, zl_S;
1533     SimdReal rijx_S, rijy_S, rijz_S;
1534     SimdReal rkjx_S, rkjy_S, rkjz_S;
1535     SimdReal rklx_S, rkly_S, rklz_S;
1536     SimdReal cx_S, cy_S, cz_S;
1537     SimdReal cn_S;
1538     SimdReal s_S;
1539     SimdReal ipr_S;
1540     SimdReal iprm_S, iprn_S;
1541     SimdReal nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1542     SimdReal toler_S;
1543     SimdReal nrkj2_min_S;
1544     SimdReal real_eps_S;
1545
1546     /* Used to avoid division by zero.
1547      * We take into acount that we multiply the result by real_eps_S.
1548      */
1549     nrkj2_min_S = SimdReal(GMX_REAL_MIN/(2*GMX_REAL_EPS));
1550
1551     /* The value of the last significant bit (GMX_REAL_EPS is half of that) */
1552     real_eps_S  = SimdReal(2*GMX_REAL_EPS);
1553
1554     /* Store the non PBC corrected distances packed and aligned */
1555     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi_S, &yi_S, &zi_S);
1556     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj_S, &yj_S, &zj_S);
1557     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ak, &xk_S, &yk_S, &zk_S);
1558     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), al, &xl_S, &yl_S, &zl_S);
1559     rijx_S = xi_S - xj_S;
1560     rijy_S = yi_S - yj_S;
1561     rijz_S = zi_S - zj_S;
1562     rkjx_S = xk_S - xj_S;
1563     rkjy_S = yk_S - yj_S;
1564     rkjz_S = zk_S - zj_S;
1565     rklx_S = xk_S - xl_S;
1566     rkly_S = yk_S - yl_S;
1567     rklz_S = zk_S - zl_S;
1568
1569     pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1570     pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1571     pbc_correct_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc_simd);
1572
1573     cprod(rijx_S, rijy_S, rijz_S,
1574           rkjx_S, rkjy_S, rkjz_S,
1575           mx_S, my_S, mz_S);
1576
1577     cprod(rkjx_S, rkjy_S, rkjz_S,
1578           rklx_S, rkly_S, rklz_S,
1579           nx_S, ny_S, nz_S);
1580
1581     cprod(*mx_S, *my_S, *mz_S,
1582           *nx_S, *ny_S, *nz_S,
1583           &cx_S, &cy_S, &cz_S);
1584
1585     cn_S       = sqrt(norm2(cx_S, cy_S, cz_S));
1586
1587     s_S        = iprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1588
1589     /* Determine the dihedral angle, the sign might need correction */
1590     *phi_S     = atan2(cn_S, s_S);
1591
1592     ipr_S      = iprod(rijx_S, rijy_S, rijz_S,
1593                        *nx_S, *ny_S, *nz_S);
1594
1595     iprm_S     = norm2(*mx_S, *my_S, *mz_S);
1596     iprn_S     = norm2(*nx_S, *ny_S, *nz_S);
1597
1598     nrkj2_S    = norm2(rkjx_S, rkjy_S, rkjz_S);
1599
1600     /* Avoid division by zero. When zero, the result is multiplied by 0
1601      * anyhow, so the 3 max below do not affect the final result.
1602      */
1603     nrkj2_S    = max(nrkj2_S, nrkj2_min_S);
1604     nrkj_1_S   = invsqrt(nrkj2_S);
1605     nrkj_2_S   = nrkj_1_S * nrkj_1_S;
1606     nrkj_S     = nrkj2_S * nrkj_1_S;
1607
1608     toler_S    = nrkj2_S * real_eps_S;
1609
1610     /* Here the plain-C code uses a conditional, but we can't do that in SIMD.
1611      * So we take a max with the tolerance instead. Since we multiply with
1612      * m or n later, the max does not affect the results.
1613      */
1614     iprm_S     = max(iprm_S, toler_S);
1615     iprn_S     = max(iprn_S, toler_S);
1616     *nrkj_m2_S = nrkj_S * inv(iprm_S);
1617     *nrkj_n2_S = nrkj_S * inv(iprn_S);
1618
1619     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1620     *phi_S     = copysign(*phi_S, ipr_S);
1621     *p_S       = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1622     *p_S       = *p_S * nrkj_2_S;
1623
1624     *q_S       = iprod(rklx_S, rkly_S, rklz_S, rkjx_S, rkjy_S, rkjz_S);
1625     *q_S       = *q_S * nrkj_2_S;
1626 }
1627
1628 #endif // GMX_SIMD_HAVE_REAL
1629
1630 }      // namespace
1631
1632 void do_dih_fup(int i, int j, int k, int l, real ddphi,
1633                 rvec r_ij, rvec r_kj, rvec r_kl,
1634                 rvec m, rvec n, rvec4 f[], rvec fshift[],
1635                 const t_pbc *pbc, const t_graph *g,
1636                 const rvec x[], int t1, int t2, int t3)
1637 {
1638     /* 143 FLOPS */
1639     rvec f_i, f_j, f_k, f_l;
1640     rvec uvec, vvec, svec, dx_jl;
1641     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1642     real a, b, p, q, toler;
1643     ivec jt, dt_ij, dt_kj, dt_lj;
1644
1645     iprm  = iprod(m, m);       /*  5    */
1646     iprn  = iprod(n, n);       /*  5    */
1647     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1648     toler = nrkj2*GMX_REAL_EPS;
1649     if ((iprm > toler) && (iprn > toler))
1650     {
1651         nrkj_1 = gmx::invsqrt(nrkj2); /* 10     */
1652         nrkj_2 = nrkj_1*nrkj_1;       /*  1     */
1653         nrkj   = nrkj2*nrkj_1;        /*  1     */
1654         a      = -ddphi*nrkj/iprm;    /* 11     */
1655         svmul(a, m, f_i);             /*  3     */
1656         b     = ddphi*nrkj/iprn;      /* 11     */
1657         svmul(b, n, f_l);             /*  3  */
1658         p     = iprod(r_ij, r_kj);    /*  5     */
1659         p    *= nrkj_2;               /*  1     */
1660         q     = iprod(r_kl, r_kj);    /*  5     */
1661         q    *= nrkj_2;               /*  1     */
1662         svmul(p, f_i, uvec);          /*  3     */
1663         svmul(q, f_l, vvec);          /*  3     */
1664         rvec_sub(uvec, vvec, svec);   /*  3     */
1665         rvec_sub(f_i, svec, f_j);     /*  3     */
1666         rvec_add(f_l, svec, f_k);     /*  3     */
1667         rvec_inc(f[i], f_i);          /*  3     */
1668         rvec_dec(f[j], f_j);          /*  3     */
1669         rvec_dec(f[k], f_k);          /*  3     */
1670         rvec_inc(f[l], f_l);          /*  3     */
1671
1672         if (g)
1673         {
1674             copy_ivec(SHIFT_IVEC(g, j), jt);
1675             ivec_sub(SHIFT_IVEC(g, i), jt, dt_ij);
1676             ivec_sub(SHIFT_IVEC(g, k), jt, dt_kj);
1677             ivec_sub(SHIFT_IVEC(g, l), jt, dt_lj);
1678             t1 = IVEC2IS(dt_ij);
1679             t2 = IVEC2IS(dt_kj);
1680             t3 = IVEC2IS(dt_lj);
1681         }
1682         else if (pbc)
1683         {
1684             t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1685         }
1686         else
1687         {
1688             t3 = CENTRAL;
1689         }
1690
1691         rvec_inc(fshift[t1], f_i);
1692         rvec_dec(fshift[CENTRAL], f_j);
1693         rvec_dec(fshift[t2], f_k);
1694         rvec_inc(fshift[t3], f_l);
1695     }
1696     /* 112 TOTAL    */
1697 }
1698
1699 namespace
1700 {
1701
1702 /*! \brief As do_dih_fup above, but without shift forces */
1703 void
1704 do_dih_fup_noshiftf(int i, int j, int k, int l, real ddphi,
1705                     rvec r_ij, rvec r_kj, rvec r_kl,
1706                     rvec m, rvec n, rvec4 f[])
1707 {
1708     rvec f_i, f_j, f_k, f_l;
1709     rvec uvec, vvec, svec;
1710     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1711     real a, b, p, q, toler;
1712
1713     iprm  = iprod(m, m);       /*  5    */
1714     iprn  = iprod(n, n);       /*  5    */
1715     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1716     toler = nrkj2*GMX_REAL_EPS;
1717     if ((iprm > toler) && (iprn > toler))
1718     {
1719         nrkj_1 = gmx::invsqrt(nrkj2); /* 10     */
1720         nrkj_2 = nrkj_1*nrkj_1;       /*  1     */
1721         nrkj   = nrkj2*nrkj_1;        /*  1     */
1722         a      = -ddphi*nrkj/iprm;    /* 11     */
1723         svmul(a, m, f_i);             /*  3     */
1724         b     = ddphi*nrkj/iprn;      /* 11     */
1725         svmul(b, n, f_l);             /*  3  */
1726         p     = iprod(r_ij, r_kj);    /*  5     */
1727         p    *= nrkj_2;               /*  1     */
1728         q     = iprod(r_kl, r_kj);    /*  5     */
1729         q    *= nrkj_2;               /*  1     */
1730         svmul(p, f_i, uvec);          /*  3     */
1731         svmul(q, f_l, vvec);          /*  3     */
1732         rvec_sub(uvec, vvec, svec);   /*  3     */
1733         rvec_sub(f_i, svec, f_j);     /*  3     */
1734         rvec_add(f_l, svec, f_k);     /*  3     */
1735         rvec_inc(f[i], f_i);          /*  3     */
1736         rvec_dec(f[j], f_j);          /*  3     */
1737         rvec_dec(f[k], f_k);          /*  3     */
1738         rvec_inc(f[l], f_l);          /*  3     */
1739     }
1740 }
1741
1742 #if GMX_SIMD_HAVE_REAL
1743 /* As do_dih_fup_noshiftf above, but with SIMD and pre-calculated pre-factors */
1744 inline void gmx_simdcall
1745 do_dih_fup_noshiftf_simd(const int *ai, const int *aj, const int *ak, const int *al,
1746                          SimdReal p, SimdReal q,
1747                          SimdReal f_i_x,  SimdReal f_i_y,  SimdReal f_i_z,
1748                          SimdReal mf_l_x, SimdReal mf_l_y, SimdReal mf_l_z,
1749                          rvec4 f[])
1750 {
1751     SimdReal sx    = p * f_i_x + q * mf_l_x;
1752     SimdReal sy    = p * f_i_y + q * mf_l_y;
1753     SimdReal sz    = p * f_i_z + q * mf_l_z;
1754     SimdReal f_j_x = f_i_x - sx;
1755     SimdReal f_j_y = f_i_y - sy;
1756     SimdReal f_j_z = f_i_z - sz;
1757     SimdReal f_k_x = mf_l_x - sx;
1758     SimdReal f_k_y = mf_l_y - sy;
1759     SimdReal f_k_z = mf_l_z - sz;
1760     transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ai, f_i_x, f_i_y, f_i_z);
1761     transposeScatterDecrU<4>(reinterpret_cast<real *>(f), aj, f_j_x, f_j_y, f_j_z);
1762     transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ak, f_k_x, f_k_y, f_k_z);
1763     transposeScatterDecrU<4>(reinterpret_cast<real *>(f), al, mf_l_x, mf_l_y, mf_l_z);
1764 }
1765 #endif // GMX_SIMD_HAVE_REAL
1766
1767 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult,
1768              real phi, real lambda, real *V, real *F)
1769 {
1770     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1771     real L1   = 1.0 - lambda;
1772     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1773     real dph0 = (phiB - phiA)*DEG2RAD;
1774     real cp   = L1*cpA + lambda*cpB;
1775
1776     mdphi =  mult*phi - ph0;
1777     sdphi = std::sin(mdphi);
1778     ddphi = -cp*mult*sdphi;
1779     v1    = 1.0 + std::cos(mdphi);
1780     v     = cp*v1;
1781
1782     dvdlambda  = (cpB - cpA)*v1 + cp*dph0*sdphi;
1783
1784     *V = v;
1785     *F = ddphi;
1786
1787     return dvdlambda;
1788
1789     /* That was 40 flops */
1790 }
1791
1792 void
1793 dopdihs_noener(real cpA, real cpB, real phiA, real phiB, int mult,
1794                real phi, real lambda, real *F)
1795 {
1796     real mdphi, sdphi, ddphi;
1797     real L1   = 1.0 - lambda;
1798     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1799     real cp   = L1*cpA + lambda*cpB;
1800
1801     mdphi = mult*phi - ph0;
1802     sdphi = std::sin(mdphi);
1803     ddphi = -cp*mult*sdphi;
1804
1805     *F = ddphi;
1806
1807     /* That was 20 flops */
1808 }
1809
1810 /*! \brief Similar to \p dopdihs(), except for a minus sign and a different treatment of mult/phi0 */
1811 real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult,
1812                  real phi, real lambda, real *V, real *F)
1813 {
1814     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1815     real L1   = 1.0 - lambda;
1816     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1817     real dph0 = (phiB - phiA)*DEG2RAD;
1818     real cp   = L1*cpA + lambda*cpB;
1819
1820     mdphi = mult*(phi-ph0);
1821     sdphi = std::sin(mdphi);
1822     ddphi = cp*mult*sdphi;
1823     v1    = 1.0-std::cos(mdphi);
1824     v     = cp*v1;
1825
1826     dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
1827
1828     *V = v;
1829     *F = ddphi;
1830
1831     return dvdlambda;
1832
1833     /* That was 40 flops */
1834 }
1835
1836 template <BondedKernelFlavor flavor>
1837 std::enable_if_t<flavor == BondedKernelFlavor::ForcesAndVirialAndEnergy, real>
1838 pdihs(int nbonds,
1839       const t_iatom forceatoms[], const t_iparams forceparams[],
1840       const rvec x[], rvec4 f[], rvec fshift[],
1841       const t_pbc *pbc, const t_graph *g,
1842       real lambda, real *dvdlambda,
1843       const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1844       int gmx_unused *global_atom_index)
1845 {
1846     int  i, type, ai, aj, ak, al;
1847     int  t1, t2, t3;
1848     rvec r_ij, r_kj, r_kl, m, n;
1849     real phi, ddphi, vpd, vtot;
1850
1851     vtot = 0.0;
1852
1853     for (i = 0; (i < nbonds); )
1854     {
1855         type = forceatoms[i++];
1856         ai   = forceatoms[i++];
1857         aj   = forceatoms[i++];
1858         ak   = forceatoms[i++];
1859         al   = forceatoms[i++];
1860
1861         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1862                         &t1, &t2, &t3);  /*  84      */
1863         *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
1864                               forceparams[type].pdihs.cpB,
1865                               forceparams[type].pdihs.phiA,
1866                               forceparams[type].pdihs.phiB,
1867                               forceparams[type].pdihs.mult,
1868                               phi, lambda, &vpd, &ddphi);
1869
1870         vtot += vpd;
1871         do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
1872                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
1873
1874     }                                                 /* 223 TOTAL  */
1875
1876     return vtot;
1877 }
1878
1879 /* As pdihs above, but without calculating energies and shift forces */
1880 template <BondedKernelFlavor flavor>
1881 std::enable_if_t<flavor == BondedKernelFlavor::ForcesNoSimd || (!GMX_SIMD_HAVE_REAL &&flavor == BondedKernelFlavor::ForcesSimdWhenAvailable), real>
1882 pdihs(int nbonds,
1883       const t_iatom forceatoms[], const t_iparams forceparams[],
1884       const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
1885       const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
1886       real lambda, real gmx_unused *dvdlambda,
1887       const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1888       int gmx_unused *global_atom_index)
1889 {
1890     int  i, type, ai, aj, ak, al;
1891     int  t1, t2, t3;
1892     rvec r_ij, r_kj, r_kl, m, n;
1893     real phi, ddphi_tot, ddphi;
1894
1895     for (i = 0; (i < nbonds); )
1896     {
1897         ai   = forceatoms[i+1];
1898         aj   = forceatoms[i+2];
1899         ak   = forceatoms[i+3];
1900         al   = forceatoms[i+4];
1901
1902         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1903                         &t1, &t2, &t3);
1904
1905         ddphi_tot = 0;
1906
1907         /* Loop over dihedrals working on the same atoms,
1908          * so we avoid recalculating angles and force distributions.
1909          */
1910         do
1911         {
1912             type = forceatoms[i];
1913             dopdihs_noener(forceparams[type].pdihs.cpA,
1914                            forceparams[type].pdihs.cpB,
1915                            forceparams[type].pdihs.phiA,
1916                            forceparams[type].pdihs.phiB,
1917                            forceparams[type].pdihs.mult,
1918                            phi, lambda, &ddphi);
1919             ddphi_tot += ddphi;
1920
1921             i += 5;
1922         }
1923         while (i < nbonds &&
1924                forceatoms[i+1] == ai &&
1925                forceatoms[i+2] == aj &&
1926                forceatoms[i+3] == ak &&
1927                forceatoms[i+4] == al);
1928
1929         do_dih_fup_noshiftf(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f);
1930     }
1931
1932     return 0;
1933 }
1934
1935
1936 #if GMX_SIMD_HAVE_REAL
1937
1938 /* As pdihs above, but using SIMD to calculate many dihedrals at once */
1939 template <BondedKernelFlavor flavor>
1940 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1941 pdihs(int nbonds,
1942       const t_iatom forceatoms[], const t_iparams forceparams[],
1943       const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
1944       const t_pbc *pbc, const t_graph gmx_unused *g,
1945       real gmx_unused lambda, real gmx_unused *dvdlambda,
1946       const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1947       int gmx_unused *global_atom_index)
1948 {
1949     const int             nfa1 = 5;
1950     int                   i, iu, s;
1951     int                   type;
1952     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ai[GMX_SIMD_REAL_WIDTH];
1953     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    aj[GMX_SIMD_REAL_WIDTH];
1954     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ak[GMX_SIMD_REAL_WIDTH];
1955     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    al[GMX_SIMD_REAL_WIDTH];
1956     alignas(GMX_SIMD_ALIGNMENT) real            buf[3*GMX_SIMD_REAL_WIDTH];
1957     real                 *cp, *phi0, *mult;
1958     SimdReal              deg2rad_S(DEG2RAD);
1959     SimdReal              p_S, q_S;
1960     SimdReal              phi0_S, phi_S;
1961     SimdReal              mx_S, my_S, mz_S;
1962     SimdReal              nx_S, ny_S, nz_S;
1963     SimdReal              nrkj_m2_S, nrkj_n2_S;
1964     SimdReal              cp_S, mdphi_S, mult_S;
1965     SimdReal              sin_S, cos_S;
1966     SimdReal              mddphi_S;
1967     SimdReal              sf_i_S, msf_l_S;
1968     alignas(GMX_SIMD_ALIGNMENT) real            pbc_simd[9*GMX_SIMD_REAL_WIDTH];
1969
1970     /* Extract aligned pointer for parameters and variables */
1971     cp    = buf + 0*GMX_SIMD_REAL_WIDTH;
1972     phi0  = buf + 1*GMX_SIMD_REAL_WIDTH;
1973     mult  = buf + 2*GMX_SIMD_REAL_WIDTH;
1974
1975     set_pbc_simd(pbc, pbc_simd);
1976
1977     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
1978     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
1979     {
1980         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
1981          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1982          */
1983         iu = i;
1984         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1985         {
1986             type  = forceatoms[iu];
1987             ai[s] = forceatoms[iu+1];
1988             aj[s] = forceatoms[iu+2];
1989             ak[s] = forceatoms[iu+3];
1990             al[s] = forceatoms[iu+4];
1991
1992             /* At the end fill the arrays with the last atoms and 0 params */
1993             if (i + s*nfa1 < nbonds)
1994             {
1995                 cp[s]   = forceparams[type].pdihs.cpA;
1996                 phi0[s] = forceparams[type].pdihs.phiA;
1997                 mult[s] = forceparams[type].pdihs.mult;
1998
1999                 if (iu + nfa1 < nbonds)
2000                 {
2001                     iu += nfa1;
2002                 }
2003             }
2004             else
2005             {
2006                 cp[s]   = 0;
2007                 phi0[s] = 0;
2008                 mult[s] = 0;
2009             }
2010         }
2011
2012         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2013         dih_angle_simd(x, ai, aj, ak, al, pbc_simd,
2014                        &phi_S,
2015                        &mx_S, &my_S, &mz_S,
2016                        &nx_S, &ny_S, &nz_S,
2017                        &nrkj_m2_S,
2018                        &nrkj_n2_S,
2019                        &p_S, &q_S);
2020
2021         cp_S     = load<SimdReal>(cp);
2022         phi0_S   = load<SimdReal>(phi0) * deg2rad_S;
2023         mult_S   = load<SimdReal>(mult);
2024
2025         mdphi_S  = fms(mult_S, phi_S, phi0_S);
2026
2027         /* Calculate GMX_SIMD_REAL_WIDTH sines at once */
2028         sincos(mdphi_S, &sin_S, &cos_S);
2029         mddphi_S = cp_S * mult_S * sin_S;
2030         sf_i_S   = mddphi_S * nrkj_m2_S;
2031         msf_l_S  = mddphi_S * nrkj_n2_S;
2032
2033         /* After this m?_S will contain f[i] */
2034         mx_S     = sf_i_S * mx_S;
2035         my_S     = sf_i_S * my_S;
2036         mz_S     = sf_i_S * mz_S;
2037
2038         /* After this m?_S will contain -f[l] */
2039         nx_S     = msf_l_S * nx_S;
2040         ny_S     = msf_l_S * ny_S;
2041         nz_S     = msf_l_S * nz_S;
2042
2043         do_dih_fup_noshiftf_simd(ai, aj, ak, al,
2044                                  p_S, q_S,
2045                                  mx_S, my_S, mz_S,
2046                                  nx_S, ny_S, nz_S,
2047                                  f);
2048     }
2049
2050     return 0;
2051 }
2052
2053 /* This is mostly a copy of pdihs_noener_simd above, but with using
2054  * the RB potential instead of a harmonic potential.
2055  * This function can replace rbdihs() when no energy and virial are needed.
2056  */
2057 template <BondedKernelFlavor flavor>
2058 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
2059 rbdihs(int nbonds,
2060        const t_iatom forceatoms[], const t_iparams forceparams[],
2061        const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
2062        const t_pbc *pbc, const t_graph gmx_unused *g,
2063        real gmx_unused lambda, real gmx_unused *dvdlambda,
2064        const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2065        int gmx_unused *global_atom_index)
2066 {
2067     const int             nfa1 = 5;
2068     int                   i, iu, s, j;
2069     int                   type;
2070     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  ai[GMX_SIMD_REAL_WIDTH];
2071     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  aj[GMX_SIMD_REAL_WIDTH];
2072     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  ak[GMX_SIMD_REAL_WIDTH];
2073     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  al[GMX_SIMD_REAL_WIDTH];
2074     alignas(GMX_SIMD_ALIGNMENT) real          parm[NR_RBDIHS*GMX_SIMD_REAL_WIDTH];
2075
2076     SimdReal              p_S, q_S;
2077     SimdReal              phi_S;
2078     SimdReal              ddphi_S, cosfac_S;
2079     SimdReal              mx_S, my_S, mz_S;
2080     SimdReal              nx_S, ny_S, nz_S;
2081     SimdReal              nrkj_m2_S, nrkj_n2_S;
2082     SimdReal              parm_S, c_S;
2083     SimdReal              sin_S, cos_S;
2084     SimdReal              sf_i_S, msf_l_S;
2085     alignas(GMX_SIMD_ALIGNMENT) real          pbc_simd[9*GMX_SIMD_REAL_WIDTH];
2086
2087     SimdReal              pi_S(M_PI);
2088     SimdReal              one_S(1.0);
2089
2090     set_pbc_simd(pbc, pbc_simd);
2091
2092     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2093     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
2094     {
2095         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2096          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2097          */
2098         iu = i;
2099         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2100         {
2101             type  = forceatoms[iu];
2102             ai[s] = forceatoms[iu+1];
2103             aj[s] = forceatoms[iu+2];
2104             ak[s] = forceatoms[iu+3];
2105             al[s] = forceatoms[iu+4];
2106
2107             /* At the end fill the arrays with the last atoms and 0 params */
2108             if (i + s*nfa1 < nbonds)
2109             {
2110                 /* We don't need the first parameter, since that's a constant
2111                  * which only affects the energies, not the forces.
2112                  */
2113                 for (j = 1; j < NR_RBDIHS; j++)
2114                 {
2115                     parm[j*GMX_SIMD_REAL_WIDTH + s] =
2116                         forceparams[type].rbdihs.rbcA[j];
2117                 }
2118
2119                 if (iu + nfa1 < nbonds)
2120                 {
2121                     iu += nfa1;
2122                 }
2123             }
2124             else
2125             {
2126                 for (j = 1; j < NR_RBDIHS; j++)
2127                 {
2128                     parm[j*GMX_SIMD_REAL_WIDTH + s] = 0;
2129                 }
2130             }
2131         }
2132
2133         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2134         dih_angle_simd(x, ai, aj, ak, al, pbc_simd,
2135                        &phi_S,
2136                        &mx_S, &my_S, &mz_S,
2137                        &nx_S, &ny_S, &nz_S,
2138                        &nrkj_m2_S,
2139                        &nrkj_n2_S,
2140                        &p_S, &q_S);
2141
2142         /* Change to polymer convention */
2143         phi_S = phi_S - pi_S;
2144
2145         sincos(phi_S, &sin_S, &cos_S);
2146
2147         ddphi_S   = setZero();
2148         c_S       = one_S;
2149         cosfac_S  = one_S;
2150         for (j = 1; j < NR_RBDIHS; j++)
2151         {
2152             parm_S   = load<SimdReal>(parm + j*GMX_SIMD_REAL_WIDTH);
2153             ddphi_S  = fma(c_S * parm_S, cosfac_S, ddphi_S);
2154             cosfac_S = cosfac_S * cos_S;
2155             c_S      = c_S + one_S;
2156         }
2157
2158         /* Note that here we do not use the minus sign which is present
2159          * in the normal RB code. This is corrected for through (m)sf below.
2160          */
2161         ddphi_S  = ddphi_S * sin_S;
2162
2163         sf_i_S   = ddphi_S * nrkj_m2_S;
2164         msf_l_S  = ddphi_S * nrkj_n2_S;
2165
2166         /* After this m?_S will contain f[i] */
2167         mx_S     = sf_i_S * mx_S;
2168         my_S     = sf_i_S * my_S;
2169         mz_S     = sf_i_S * mz_S;
2170
2171         /* After this m?_S will contain -f[l] */
2172         nx_S     = msf_l_S * nx_S;
2173         ny_S     = msf_l_S * ny_S;
2174         nz_S     = msf_l_S * nz_S;
2175
2176         do_dih_fup_noshiftf_simd(ai, aj, ak, al,
2177                                  p_S, q_S,
2178                                  mx_S, my_S, mz_S,
2179                                  nx_S, ny_S, nz_S,
2180                                  f);
2181     }
2182
2183     return 0;
2184 }
2185
2186 #endif // GMX_SIMD_HAVE_REAL
2187
2188
2189 real idihs(int nbonds,
2190            const t_iatom forceatoms[], const t_iparams forceparams[],
2191            const rvec x[], rvec4 f[], rvec fshift[],
2192            const t_pbc *pbc, const t_graph *g,
2193            real lambda, real *dvdlambda,
2194            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2195            int gmx_unused *global_atom_index)
2196 {
2197     int  i, type, ai, aj, ak, al;
2198     int  t1, t2, t3;
2199     real phi, phi0, dphi0, ddphi, vtot;
2200     rvec r_ij, r_kj, r_kl, m, n;
2201     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2202
2203     L1        = 1.0-lambda;
2204     dvdl_term = 0;
2205     vtot      = 0.0;
2206     for (i = 0; (i < nbonds); )
2207     {
2208         type = forceatoms[i++];
2209         ai   = forceatoms[i++];
2210         aj   = forceatoms[i++];
2211         ak   = forceatoms[i++];
2212         al   = forceatoms[i++];
2213
2214         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2215                         &t1, &t2, &t3);  /*  84         */
2216
2217         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2218          * force changes if we just apply a normal harmonic.
2219          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2220          * This means we will never have the periodicity problem, unless
2221          * the dihedral is Pi away from phiO, which is very unlikely due to
2222          * the potential.
2223          */
2224         kA = forceparams[type].harmonic.krA;
2225         kB = forceparams[type].harmonic.krB;
2226         pA = forceparams[type].harmonic.rA;
2227         pB = forceparams[type].harmonic.rB;
2228
2229         kk    = L1*kA + lambda*kB;
2230         phi0  = (L1*pA + lambda*pB)*DEG2RAD;
2231         dphi0 = (pB - pA)*DEG2RAD;
2232
2233         dp = phi-phi0;
2234
2235         make_dp_periodic(&dp);
2236
2237         dp2 = dp*dp;
2238
2239         vtot += 0.5*kk*dp2;
2240         ddphi = -kk*dp;
2241
2242         dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
2243
2244         do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
2245                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2246         /* 218 TOTAL    */
2247     }
2248
2249     *dvdlambda += dvdl_term;
2250     return vtot;
2251 }
2252
2253 /*! \brief Computes angle restraints of two different types */
2254 real low_angres(int nbonds,
2255                 const t_iatom forceatoms[], const t_iparams forceparams[],
2256                 const rvec x[], rvec4 f[], rvec fshift[],
2257                 const t_pbc *pbc, const t_graph *g,
2258                 real lambda, real *dvdlambda,
2259                 gmx_bool bZAxis)
2260 {
2261     int  i, m, type, ai, aj, ak, al;
2262     int  t1, t2;
2263     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2264     rvec r_ij, r_kl, f_i, f_k = {0, 0, 0};
2265     real st, sth, nrij2, nrkl2, c, cij, ckl;
2266
2267     ivec dt;
2268     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2269
2270     vtot = 0.0;
2271     ak   = al = 0; /* to avoid warnings */
2272     for (i = 0; i < nbonds; )
2273     {
2274         type = forceatoms[i++];
2275         ai   = forceatoms[i++];
2276         aj   = forceatoms[i++];
2277         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij);       /*  3               */
2278         if (!bZAxis)
2279         {
2280             ak   = forceatoms[i++];
2281             al   = forceatoms[i++];
2282             t2   = pbc_rvec_sub(pbc, x[al], x[ak], r_kl);  /*  3                */
2283         }
2284         else
2285         {
2286             r_kl[XX] = 0;
2287             r_kl[YY] = 0;
2288             r_kl[ZZ] = 1;
2289         }
2290
2291         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2292         phi     = std::acos(cos_phi);    /* 10           */
2293
2294         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
2295                                   forceparams[type].pdihs.cpB,
2296                                   forceparams[type].pdihs.phiA,
2297                                   forceparams[type].pdihs.phiB,
2298                                   forceparams[type].pdihs.mult,
2299                                   phi, lambda, &vid, &dVdphi); /*  40  */
2300
2301         vtot += vid;
2302
2303         cos_phi2 = gmx::square(cos_phi);                /*   1          */
2304         if (cos_phi2 < 1)
2305         {
2306             st    = -dVdphi*gmx::invsqrt(1 - cos_phi2); /*  12          */
2307             sth   = st*cos_phi;                         /*   1          */
2308             nrij2 = iprod(r_ij, r_ij);                  /*   5          */
2309             nrkl2 = iprod(r_kl, r_kl);                  /*   5          */
2310
2311             c   = st*gmx::invsqrt(nrij2*nrkl2);         /*  11          */
2312             cij = sth/nrij2;                            /*  10          */
2313             ckl = sth/nrkl2;                            /*  10          */
2314
2315             for (m = 0; m < DIM; m++)                   /*  18+18       */
2316             {
2317                 f_i[m]    = (c*r_kl[m]-cij*r_ij[m]);
2318                 f[ai][m] += f_i[m];
2319                 f[aj][m] -= f_i[m];
2320                 if (!bZAxis)
2321                 {
2322                     f_k[m]    = (c*r_ij[m]-ckl*r_kl[m]);
2323                     f[ak][m] += f_k[m];
2324                     f[al][m] -= f_k[m];
2325                 }
2326             }
2327
2328             if (g)
2329             {
2330                 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
2331                 t1 = IVEC2IS(dt);
2332             }
2333             rvec_inc(fshift[t1], f_i);
2334             rvec_dec(fshift[CENTRAL], f_i);
2335             if (!bZAxis)
2336             {
2337                 if (g)
2338                 {
2339                     ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, al), dt);
2340                     t2 = IVEC2IS(dt);
2341                 }
2342                 rvec_inc(fshift[t2], f_k);
2343                 rvec_dec(fshift[CENTRAL], f_k);
2344             }
2345         }
2346     }
2347
2348     return vtot; /*  184 / 157 (bZAxis)  total  */
2349 }
2350
2351 real angres(int nbonds,
2352             const t_iatom forceatoms[], const t_iparams forceparams[],
2353             const rvec x[], rvec4 f[], rvec fshift[],
2354             const t_pbc *pbc, const t_graph *g,
2355             real lambda, real *dvdlambda,
2356             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2357             int gmx_unused *global_atom_index)
2358 {
2359     return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2360                       lambda, dvdlambda, FALSE);
2361 }
2362
2363 real angresz(int nbonds,
2364              const t_iatom forceatoms[], const t_iparams forceparams[],
2365              const rvec x[], rvec4 f[], rvec fshift[],
2366              const t_pbc *pbc, const t_graph *g,
2367              real lambda, real *dvdlambda,
2368              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2369              int gmx_unused *global_atom_index)
2370 {
2371     return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2372                       lambda, dvdlambda, TRUE);
2373 }
2374
2375 real dihres(int nbonds,
2376             const t_iatom forceatoms[], const t_iparams forceparams[],
2377             const rvec x[], rvec4 f[], rvec fshift[],
2378             const t_pbc *pbc, const t_graph *g,
2379             real lambda, real *dvdlambda,
2380             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2381             int gmx_unused  *global_atom_index)
2382 {
2383     real vtot = 0;
2384     int  ai, aj, ak, al, i, type, t1, t2, t3;
2385     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2386     real phi, ddphi, ddp, ddp2, dp, d2r, L1;
2387     rvec r_ij, r_kj, r_kl, m, n;
2388
2389     L1 = 1.0-lambda;
2390
2391     d2r = DEG2RAD;
2392
2393     for (i = 0; (i < nbonds); )
2394     {
2395         type = forceatoms[i++];
2396         ai   = forceatoms[i++];
2397         aj   = forceatoms[i++];
2398         ak   = forceatoms[i++];
2399         al   = forceatoms[i++];
2400
2401         phi0A  = forceparams[type].dihres.phiA*d2r;
2402         dphiA  = forceparams[type].dihres.dphiA*d2r;
2403         kfacA  = forceparams[type].dihres.kfacA;
2404
2405         phi0B  = forceparams[type].dihres.phiB*d2r;
2406         dphiB  = forceparams[type].dihres.dphiB*d2r;
2407         kfacB  = forceparams[type].dihres.kfacB;
2408
2409         phi0  = L1*phi0A + lambda*phi0B;
2410         dphi  = L1*dphiA + lambda*dphiB;
2411         kfac  = L1*kfacA + lambda*kfacB;
2412
2413         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2414                         &t1, &t2, &t3);
2415         /* 84 flops */
2416
2417         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2418          * force changes if we just apply a normal harmonic.
2419          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2420          * This means we will never have the periodicity problem, unless
2421          * the dihedral is Pi away from phiO, which is very unlikely due to
2422          * the potential.
2423          */
2424         dp = phi-phi0;
2425         make_dp_periodic(&dp);
2426
2427         if (dp > dphi)
2428         {
2429             ddp = dp-dphi;
2430         }
2431         else if (dp < -dphi)
2432         {
2433             ddp = dp+dphi;
2434         }
2435         else
2436         {
2437             ddp = 0;
2438         }
2439
2440         if (ddp != 0.0)
2441         {
2442             ddp2  = ddp*ddp;
2443             vtot += 0.5*kfac*ddp2;
2444             ddphi = kfac*ddp;
2445
2446             *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
2447             /* lambda dependence from changing restraint distances */
2448             if (ddp > 0)
2449             {
2450                 *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
2451             }
2452             else if (ddp < 0)
2453             {
2454                 *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
2455             }
2456             do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2457                        f, fshift, pbc, g, x, t1, t2, t3);      /* 112           */
2458         }
2459     }
2460     return vtot;
2461 }
2462
2463
2464 real unimplemented(int gmx_unused nbonds,
2465                    const t_iatom gmx_unused forceatoms[], const t_iparams gmx_unused forceparams[],
2466                    const rvec gmx_unused x[], rvec4 gmx_unused f[], rvec gmx_unused fshift[],
2467                    const t_pbc gmx_unused *pbc, const t_graph  gmx_unused *g,
2468                    real gmx_unused lambda, real gmx_unused *dvdlambda,
2469                    const t_mdatoms  gmx_unused *md, t_fcdata gmx_unused *fcd,
2470                    int gmx_unused *global_atom_index)
2471 {
2472     gmx_impl("*** you are using a not implemented function");
2473 }
2474
2475 real restrangles(int nbonds,
2476                  const t_iatom forceatoms[], const t_iparams forceparams[],
2477                  const rvec x[], rvec4 f[], rvec fshift[],
2478                  const t_pbc *pbc, const t_graph *g,
2479                  real gmx_unused lambda, real gmx_unused *dvdlambda,
2480                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2481                  int gmx_unused *global_atom_index)
2482 {
2483     int    i, d, ai, aj, ak, type, m;
2484     int    t1, t2;
2485     real   v, vtot;
2486     ivec   jt, dt_ij, dt_kj;
2487     rvec   f_i, f_j, f_k;
2488     double prefactor, ratio_ante, ratio_post;
2489     rvec   delta_ante, delta_post, vec_temp;
2490
2491     vtot = 0.0;
2492     for (i = 0; (i < nbonds); )
2493     {
2494         type = forceatoms[i++];
2495         ai   = forceatoms[i++];
2496         aj   = forceatoms[i++];
2497         ak   = forceatoms[i++];
2498
2499         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2500         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2501         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_post);
2502
2503
2504         /* This function computes factors needed for restricted angle potential.
2505          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2506          * when three particles align and the dihedral angle and dihedral potential
2507          * cannot be calculated. This potential is calculated using the formula:
2508            real restrangles(int nbonds,
2509             const t_iatom forceatoms[],const t_iparams forceparams[],
2510             const rvec x[],rvec4 f[],rvec fshift[],
2511             const t_pbc *pbc,const t_graph *g,
2512             real gmx_unused lambda,real gmx_unused *dvdlambda,
2513             const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
2514             int gmx_unused *global_atom_index)
2515            {
2516            int  i, d, ai, aj, ak, type, m;
2517            int t1, t2;
2518            rvec r_ij,r_kj;
2519            real v, vtot;
2520            ivec jt,dt_ij,dt_kj;
2521            rvec f_i, f_j, f_k;
2522            real prefactor, ratio_ante, ratio_post;
2523            rvec delta_ante, delta_post, vec_temp;
2524
2525            vtot = 0.0;
2526            for(i=0; (i<nbonds); )
2527            {
2528            type = forceatoms[i++];
2529            ai   = forceatoms[i++];
2530            aj   = forceatoms[i++];
2531            ak   = forceatoms[i++];
2532
2533          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta} \frac{(\cos\theta_i - \cos\theta_0)^2}
2534          * {\sin^2\theta_i}\f] ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2535          * For more explanations see comments file "restcbt.h". */
2536
2537         compute_factors_restangles(type, forceparams,  delta_ante, delta_post,
2538                                    &prefactor, &ratio_ante, &ratio_post, &v);
2539
2540         /*   Forces are computed per component */
2541         for (d = 0; d < DIM; d++)
2542         {
2543             f_i[d] = prefactor * (ratio_ante * delta_ante[d] - delta_post[d]);
2544             f_j[d] = prefactor * ((ratio_post + 1.0) * delta_post[d] - (ratio_ante + 1.0) * delta_ante[d]);
2545             f_k[d] = prefactor * (delta_ante[d] - ratio_post * delta_post[d]);
2546         }
2547
2548         /*   Computation of potential energy   */
2549
2550         vtot += v;
2551
2552         /*   Update forces */
2553
2554         for (m = 0; (m < DIM); m++)
2555         {
2556             f[ai][m] += f_i[m];
2557             f[aj][m] += f_j[m];
2558             f[ak][m] += f_k[m];
2559         }
2560
2561         if (g)
2562         {
2563             copy_ivec(SHIFT_IVEC(g, aj), jt);
2564             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2565             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2566             t1 = IVEC2IS(dt_ij);
2567             t2 = IVEC2IS(dt_kj);
2568         }
2569
2570         rvec_inc(fshift[t1], f_i);
2571         rvec_inc(fshift[CENTRAL], f_j);
2572         rvec_inc(fshift[t2], f_k);
2573     }
2574     return vtot;
2575 }
2576
2577
2578 real restrdihs(int nbonds,
2579                const t_iatom forceatoms[], const t_iparams forceparams[],
2580                const rvec x[], rvec4 f[], rvec fshift[],
2581                const t_pbc *pbc, const t_graph *g,
2582                real gmx_unused lambda, real gmx_unused *dvlambda,
2583                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2584                int gmx_unused *global_atom_index)
2585 {
2586     int  i, d, type, ai, aj, ak, al;
2587     rvec f_i, f_j, f_k, f_l;
2588     rvec dx_jl;
2589     ivec jt, dt_ij, dt_kj, dt_lj;
2590     int  t1, t2, t3;
2591     real v, vtot;
2592     rvec delta_ante,  delta_crnt, delta_post, vec_temp;
2593     real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post;
2594     real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post;
2595     real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post;
2596     real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post;
2597     real prefactor_phi;
2598
2599
2600     vtot = 0.0;
2601     for (i = 0; (i < nbonds); )
2602     {
2603         type = forceatoms[i++];
2604         ai   = forceatoms[i++];
2605         aj   = forceatoms[i++];
2606         ak   = forceatoms[i++];
2607         al   = forceatoms[i++];
2608
2609         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2610         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2611         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2612         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2613         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2614
2615         /* This function computes factors needed for restricted angle potential.
2616          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2617          * when three particles align and the dihedral angle and dihedral potential cannot be calculated.
2618          * This potential is calculated using the formula:
2619          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta}
2620          * \frac{(\cos\theta_i - \cos\theta_0)^2}{\sin^2\theta_i}\f]
2621          * ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2622          * For more explanations see comments file "restcbt.h" */
2623
2624         compute_factors_restrdihs(type, forceparams,
2625                                   delta_ante, delta_crnt, delta_post,
2626                                   &factor_phi_ai_ante, &factor_phi_ai_crnt, &factor_phi_ai_post,
2627                                   &factor_phi_aj_ante, &factor_phi_aj_crnt, &factor_phi_aj_post,
2628                                   &factor_phi_ak_ante, &factor_phi_ak_crnt, &factor_phi_ak_post,
2629                                   &factor_phi_al_ante, &factor_phi_al_crnt, &factor_phi_al_post,
2630                                   &prefactor_phi, &v);
2631
2632
2633         /*      Computation of forces per component */
2634         for (d = 0; d < DIM; d++)
2635         {
2636             f_i[d] = prefactor_phi * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d] + factor_phi_ai_post * delta_post[d]);
2637             f_j[d] = prefactor_phi * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d] + factor_phi_aj_post * delta_post[d]);
2638             f_k[d] = prefactor_phi * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d] + factor_phi_ak_post * delta_post[d]);
2639             f_l[d] = prefactor_phi * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d] + factor_phi_al_post * delta_post[d]);
2640         }
2641         /*      Computation of the energy */
2642
2643         vtot += v;
2644
2645
2646
2647         /*    Updating the forces */
2648
2649         rvec_inc(f[ai], f_i);
2650         rvec_inc(f[aj], f_j);
2651         rvec_inc(f[ak], f_k);
2652         rvec_inc(f[al], f_l);
2653
2654
2655         /* Updating the fshift forces for the pressure coupling */
2656         if (g)
2657         {
2658             copy_ivec(SHIFT_IVEC(g, aj), jt);
2659             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2660             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2661             ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2662             t1 = IVEC2IS(dt_ij);
2663             t2 = IVEC2IS(dt_kj);
2664             t3 = IVEC2IS(dt_lj);
2665         }
2666         else if (pbc)
2667         {
2668             t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2669         }
2670         else
2671         {
2672             t3 = CENTRAL;
2673         }
2674
2675         rvec_inc(fshift[t1], f_i);
2676         rvec_inc(fshift[CENTRAL], f_j);
2677         rvec_inc(fshift[t2], f_k);
2678         rvec_inc(fshift[t3], f_l);
2679
2680     }
2681
2682     return vtot;
2683 }
2684
2685
2686 real cbtdihs(int nbonds,
2687              const t_iatom forceatoms[], const t_iparams forceparams[],
2688              const rvec x[], rvec4 f[], rvec fshift[],
2689              const t_pbc *pbc, const t_graph *g,
2690              real gmx_unused lambda, real gmx_unused *dvdlambda,
2691              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2692              int gmx_unused *global_atom_index)
2693 {
2694     int  type, ai, aj, ak, al, i, d;
2695     int  t1, t2, t3;
2696     real v, vtot;
2697     rvec vec_temp;
2698     rvec f_i, f_j, f_k, f_l;
2699     ivec jt, dt_ij, dt_kj, dt_lj;
2700     rvec dx_jl;
2701     rvec delta_ante, delta_crnt, delta_post;
2702     rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
2703     rvec f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak;
2704     rvec f_theta_post_aj, f_theta_post_ak, f_theta_post_al;
2705
2706
2707
2708
2709     vtot = 0.0;
2710     for (i = 0; (i < nbonds); )
2711     {
2712         type = forceatoms[i++];
2713         ai   = forceatoms[i++];
2714         aj   = forceatoms[i++];
2715         ak   = forceatoms[i++];
2716         al   = forceatoms[i++];
2717
2718
2719         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2720         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2721         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
2722         pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2723         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2724         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2725
2726         /* \brief Compute factors for CBT potential
2727          * The combined bending-torsion potential goes to zero in a very smooth manner, eliminating the numerical
2728          * instabilities, when three coarse-grained particles align and the dihedral angle and standard
2729          * dihedral potentials cannot be calculated. The CBT potential is calculated using the formula:
2730          * \f[V_{\rm CBT}(\theta_{i-1}, \theta_i, \phi_i) = k_{\phi} \sin^3\theta_{i-1} \sin^3\theta_{i}
2731          * \sum_{n=0}^4 { a_n \cos^n\phi_i}\f] ({eq:CBT} and ref \cite{MonicaGoga2013} from the manual).
2732          * It contains in its expression not only the dihedral angle \f$\phi\f$
2733          * but also \f[\theta_{i-1}\f] (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)
2734          * --- the adjacent bending angles.
2735          * For more explanations see comments file "restcbt.h". */
2736
2737         compute_factors_cbtdihs(type, forceparams, delta_ante, delta_crnt, delta_post,
2738                                 f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al,
2739                                 f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak,
2740                                 f_theta_post_aj, f_theta_post_ak, f_theta_post_al,
2741                                 &v);
2742
2743
2744         /*      Acumulate the resuts per beads */
2745         for (d = 0; d < DIM; d++)
2746         {
2747             f_i[d] = f_phi_ai[d] + f_theta_ante_ai[d];
2748             f_j[d] = f_phi_aj[d] + f_theta_ante_aj[d] + f_theta_post_aj[d];
2749             f_k[d] = f_phi_ak[d] + f_theta_ante_ak[d] + f_theta_post_ak[d];
2750             f_l[d] = f_phi_al[d] + f_theta_post_al[d];
2751         }
2752
2753         /*      Compute the potential energy */
2754
2755         vtot += v;
2756
2757
2758         /*  Updating the forces */
2759         rvec_inc(f[ai], f_i);
2760         rvec_inc(f[aj], f_j);
2761         rvec_inc(f[ak], f_k);
2762         rvec_inc(f[al], f_l);
2763
2764
2765         /* Updating the fshift forces for the pressure coupling */
2766         if (g)
2767         {
2768             copy_ivec(SHIFT_IVEC(g, aj), jt);
2769             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2770             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2771             ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2772             t1 = IVEC2IS(dt_ij);
2773             t2 = IVEC2IS(dt_kj);
2774             t3 = IVEC2IS(dt_lj);
2775         }
2776         else if (pbc)
2777         {
2778             t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2779         }
2780         else
2781         {
2782             t3 = CENTRAL;
2783         }
2784
2785         rvec_inc(fshift[t1], f_i);
2786         rvec_inc(fshift[CENTRAL], f_j);
2787         rvec_inc(fshift[t2], f_k);
2788         rvec_inc(fshift[t3], f_l);
2789     }
2790
2791     return vtot;
2792 }
2793
2794 template <BondedKernelFlavor flavor>
2795 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
2796 rbdihs(int nbonds,
2797        const t_iatom forceatoms[], const t_iparams forceparams[],
2798        const rvec x[], rvec4 f[], rvec fshift[],
2799        const t_pbc *pbc, const t_graph *g,
2800        real lambda, real *dvdlambda,
2801        const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2802        int gmx_unused *global_atom_index)
2803 {
2804     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2805     int        type, ai, aj, ak, al, i, j;
2806     int        t1, t2, t3;
2807     rvec       r_ij, r_kj, r_kl, m, n;
2808     real       parmA[NR_RBDIHS];
2809     real       parmB[NR_RBDIHS];
2810     real       parm[NR_RBDIHS];
2811     real       cos_phi, phi, rbp, rbpBA;
2812     real       v, ddphi, sin_phi;
2813     real       cosfac, vtot;
2814     real       L1        = 1.0-lambda;
2815     real       dvdl_term = 0;
2816
2817     vtot = 0.0;
2818     for (i = 0; (i < nbonds); )
2819     {
2820         type = forceatoms[i++];
2821         ai   = forceatoms[i++];
2822         aj   = forceatoms[i++];
2823         ak   = forceatoms[i++];
2824         al   = forceatoms[i++];
2825
2826         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2827                         &t1, &t2, &t3);  /*  84         */
2828
2829         /* Change to polymer convention */
2830         if (phi < c0)
2831         {
2832             phi += M_PI;
2833         }
2834         else
2835         {
2836             phi -= M_PI;    /*   1              */
2837
2838         }
2839         cos_phi = std::cos(phi);
2840         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2841         sin_phi = std::sin(phi);
2842
2843         for (j = 0; (j < NR_RBDIHS); j++)
2844         {
2845             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2846             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2847             parm[j]  = L1*parmA[j]+lambda*parmB[j];
2848         }
2849         /* Calculate cosine powers */
2850         /* Calculate the energy */
2851         /* Calculate the derivative */
2852
2853         v            = parm[0];
2854         dvdl_term   += (parmB[0]-parmA[0]);
2855         ddphi        = c0;
2856         cosfac       = c1;
2857
2858         rbp          = parm[1];
2859         rbpBA        = parmB[1]-parmA[1];
2860         ddphi       += rbp*cosfac;
2861         cosfac      *= cos_phi;
2862         v           += cosfac*rbp;
2863         dvdl_term   += cosfac*rbpBA;
2864         rbp          = parm[2];
2865         rbpBA        = parmB[2]-parmA[2];
2866         ddphi       += c2*rbp*cosfac;
2867         cosfac      *= cos_phi;
2868         v           += cosfac*rbp;
2869         dvdl_term   += cosfac*rbpBA;
2870         rbp          = parm[3];
2871         rbpBA        = parmB[3]-parmA[3];
2872         ddphi       += c3*rbp*cosfac;
2873         cosfac      *= cos_phi;
2874         v           += cosfac*rbp;
2875         dvdl_term   += cosfac*rbpBA;
2876         rbp          = parm[4];
2877         rbpBA        = parmB[4]-parmA[4];
2878         ddphi       += c4*rbp*cosfac;
2879         cosfac      *= cos_phi;
2880         v           += cosfac*rbp;
2881         dvdl_term   += cosfac*rbpBA;
2882         rbp          = parm[5];
2883         rbpBA        = parmB[5]-parmA[5];
2884         ddphi       += c5*rbp*cosfac;
2885         cosfac      *= cos_phi;
2886         v           += cosfac*rbp;
2887         dvdl_term   += cosfac*rbpBA;
2888
2889         ddphi = -ddphi*sin_phi;         /*  11          */
2890
2891         do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2892                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2893         vtot += v;
2894     }
2895     *dvdlambda += dvdl_term;
2896
2897     return vtot;
2898 }
2899
2900 //! \endcond
2901
2902 /*! \brief Mysterious undocumented function */
2903 int
2904 cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
2905 {
2906     int im1, ip1, ip2;
2907
2908     if (ip < 0)
2909     {
2910         ip = ip + grid_spacing - 1;
2911     }
2912     else if (ip > grid_spacing)
2913     {
2914         ip = ip - grid_spacing - 1;
2915     }
2916
2917     im1 = ip - 1;
2918     ip1 = ip + 1;
2919     ip2 = ip + 2;
2920
2921     if (ip == 0)
2922     {
2923         im1 = grid_spacing - 1;
2924     }
2925     else if (ip == grid_spacing-2)
2926     {
2927         ip2 = 0;
2928     }
2929     else if (ip == grid_spacing-1)
2930     {
2931         ip1 = 0;
2932         ip2 = 1;
2933     }
2934
2935     *ipm1 = im1;
2936     *ipp1 = ip1;
2937     *ipp2 = ip2;
2938
2939     return ip;
2940
2941 }
2942
2943 } // namespace
2944
2945 real
2946 cmap_dihs(int nbonds,
2947           const t_iatom forceatoms[], const t_iparams forceparams[],
2948           const gmx_cmap_t *cmap_grid,
2949           const rvec x[], rvec4 f[], rvec fshift[],
2950           const struct t_pbc *pbc, const struct t_graph *g,
2951           real gmx_unused lambda, real gmx_unused *dvdlambda,
2952           const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2953           int  gmx_unused *global_atom_index)
2954 {
2955     int         i, n;
2956     int         ai, aj, ak, al, am;
2957     int         a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
2958     int         type;
2959     int         t11, t21, t31, t12, t22, t32;
2960     int         iphi1, ip1m1, ip1p1, ip1p2;
2961     int         iphi2, ip2m1, ip2p1, ip2p2;
2962     int         l1, l2, l3;
2963     int         pos1, pos2, pos3, pos4;
2964
2965     real        ty[4], ty1[4], ty2[4], ty12[4], tx[16];
2966     real        phi1, cos_phi1, sin_phi1, xphi1;
2967     real        phi2, cos_phi2, sin_phi2, xphi2;
2968     real        dx, tt, tu, e, df1, df2, vtot;
2969     real        ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
2970     real        ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
2971     real        fg1, hg1, fga1, hgb1, gaa1, gbb1;
2972     real        fg2, hg2, fga2, hgb2, gaa2, gbb2;
2973     real        fac;
2974
2975     rvec        r1_ij, r1_kj, r1_kl, m1, n1;
2976     rvec        r2_ij, r2_kj, r2_kl, m2, n2;
2977     rvec        f1_i, f1_j, f1_k, f1_l;
2978     rvec        f2_i, f2_j, f2_k, f2_l;
2979     rvec        a1, b1, a2, b2;
2980     rvec        f1, g1, h1, f2, g2, h2;
2981     rvec        dtf1, dtg1, dth1, dtf2, dtg2, dth2;
2982     ivec        jt1, dt1_ij, dt1_kj, dt1_lj;
2983     ivec        jt2, dt2_ij, dt2_kj, dt2_lj;
2984
2985     int         loop_index[4][4] = {
2986         {0, 4, 8, 12},
2987         {1, 5, 9, 13},
2988         {2, 6, 10, 14},
2989         {3, 7, 11, 15}
2990     };
2991
2992     /* Total CMAP energy */
2993     vtot = 0;
2994
2995     for (n = 0; n < nbonds; )
2996     {
2997         /* Five atoms are involved in the two torsions */
2998         type   = forceatoms[n++];
2999         ai     = forceatoms[n++];
3000         aj     = forceatoms[n++];
3001         ak     = forceatoms[n++];
3002         al     = forceatoms[n++];
3003         am     = forceatoms[n++];
3004
3005         /* Which CMAP type is this */
3006         const int   cmapA = forceparams[type].cmap.cmapA;
3007         const real *cmapd = cmap_grid->cmapdata[cmapA].cmap.data();
3008
3009         /* First torsion */
3010         a1i   = ai;
3011         a1j   = aj;
3012         a1k   = ak;
3013         a1l   = al;
3014
3015         phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
3016                           &t11, &t21, &t31);  /* 84 */
3017
3018         cos_phi1 = std::cos(phi1);
3019
3020         a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
3021         a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
3022         a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
3023
3024         b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
3025         b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
3026         b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
3027
3028         pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
3029
3030         ra21  = iprod(a1, a1);       /* 5 */
3031         rb21  = iprod(b1, b1);       /* 5 */
3032         rg21  = iprod(r1_kj, r1_kj); /* 5 */
3033         rg1   = sqrt(rg21);
3034
3035         rgr1  = 1.0/rg1;
3036         ra2r1 = 1.0/ra21;
3037         rb2r1 = 1.0/rb21;
3038         rabr1 = sqrt(ra2r1*rb2r1);
3039
3040         sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
3041
3042         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
3043         {
3044             phi1 = std::asin(sin_phi1);
3045
3046             if (cos_phi1 < 0)
3047             {
3048                 if (phi1 > 0)
3049                 {
3050                     phi1 = M_PI - phi1;
3051                 }
3052                 else
3053                 {
3054                     phi1 = -M_PI - phi1;
3055                 }
3056             }
3057         }
3058         else
3059         {
3060             phi1 = std::acos(cos_phi1);
3061
3062             if (sin_phi1 < 0)
3063             {
3064                 phi1 = -phi1;
3065             }
3066         }
3067
3068         xphi1 = phi1 + M_PI; /* 1 */
3069
3070         /* Second torsion */
3071         a2i   = aj;
3072         a2j   = ak;
3073         a2k   = al;
3074         a2l   = am;
3075
3076         phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
3077                           &t12, &t22, &t32); /* 84 */
3078
3079         cos_phi2 = std::cos(phi2);
3080
3081         a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
3082         a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
3083         a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
3084
3085         b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
3086         b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
3087         b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
3088
3089         pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
3090
3091         ra22  = iprod(a2, a2);         /* 5 */
3092         rb22  = iprod(b2, b2);         /* 5 */
3093         rg22  = iprod(r2_kj, r2_kj);   /* 5 */
3094         rg2   = sqrt(rg22);
3095
3096         rgr2  = 1.0/rg2;
3097         ra2r2 = 1.0/ra22;
3098         rb2r2 = 1.0/rb22;
3099         rabr2 = sqrt(ra2r2*rb2r2);
3100
3101         sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
3102
3103         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
3104         {
3105             phi2 = std::asin(sin_phi2);
3106
3107             if (cos_phi2 < 0)
3108             {
3109                 if (phi2 > 0)
3110                 {
3111                     phi2 = M_PI - phi2;
3112                 }
3113                 else
3114                 {
3115                     phi2 = -M_PI - phi2;
3116                 }
3117             }
3118         }
3119         else
3120         {
3121             phi2 = std::acos(cos_phi2);
3122
3123             if (sin_phi2 < 0)
3124             {
3125                 phi2 = -phi2;
3126             }
3127         }
3128
3129         xphi2 = phi2 + M_PI; /* 1 */
3130
3131         /* Range mangling */
3132         if (xphi1 < 0)
3133         {
3134             xphi1 = xphi1 + 2*M_PI;
3135         }
3136         else if (xphi1 >= 2*M_PI)
3137         {
3138             xphi1 = xphi1 - 2*M_PI;
3139         }
3140
3141         if (xphi2 < 0)
3142         {
3143             xphi2 = xphi2 + 2*M_PI;
3144         }
3145         else if (xphi2 >= 2*M_PI)
3146         {
3147             xphi2 = xphi2 - 2*M_PI;
3148         }
3149
3150         /* Number of grid points */
3151         dx = 2*M_PI / cmap_grid->grid_spacing;
3152
3153         /* Where on the grid are we */
3154         iphi1 = static_cast<int>(xphi1/dx);
3155         iphi2 = static_cast<int>(xphi2/dx);
3156
3157         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3158         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3159
3160         pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
3161         pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
3162         pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
3163         pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
3164
3165         ty[0]   = cmapd[pos1*4];
3166         ty[1]   = cmapd[pos2*4];
3167         ty[2]   = cmapd[pos3*4];
3168         ty[3]   = cmapd[pos4*4];
3169
3170         ty1[0]   = cmapd[pos1*4+1];
3171         ty1[1]   = cmapd[pos2*4+1];
3172         ty1[2]   = cmapd[pos3*4+1];
3173         ty1[3]   = cmapd[pos4*4+1];
3174
3175         ty2[0]   = cmapd[pos1*4+2];
3176         ty2[1]   = cmapd[pos2*4+2];
3177         ty2[2]   = cmapd[pos3*4+2];
3178         ty2[3]   = cmapd[pos4*4+2];
3179
3180         ty12[0]   = cmapd[pos1*4+3];
3181         ty12[1]   = cmapd[pos2*4+3];
3182         ty12[2]   = cmapd[pos3*4+3];
3183         ty12[3]   = cmapd[pos4*4+3];
3184
3185         /* Switch to degrees */
3186         dx    = 360.0 / cmap_grid->grid_spacing;
3187         xphi1 = xphi1 * RAD2DEG;
3188         xphi2 = xphi2 * RAD2DEG;
3189
3190         for (i = 0; i < 4; i++) /* 16 */
3191         {
3192             tx[i]    = ty[i];
3193             tx[i+4]  = ty1[i]*dx;
3194             tx[i+8]  = ty2[i]*dx;
3195             tx[i+12] = ty12[i]*dx*dx;
3196         }
3197
3198         real tc[16] = {0};
3199         for (int idx = 0; idx < 16; idx++) /* 1056 */
3200         {
3201             for (int k = 0; k < 16; k++)
3202             {
3203                 tc[idx] += cmap_coeff_matrix[k*16+idx]*tx[k];
3204             }
3205         }
3206
3207         tt    = (xphi1-iphi1*dx)/dx;
3208         tu    = (xphi2-iphi2*dx)/dx;
3209
3210         e     = 0;
3211         df1   = 0;
3212         df2   = 0;
3213
3214         for (i = 3; i >= 0; i--)
3215         {
3216             l1 = loop_index[i][3];
3217             l2 = loop_index[i][2];
3218             l3 = loop_index[i][1];
3219
3220             e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
3221             df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
3222             df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
3223         }
3224
3225         fac     = RAD2DEG/dx;
3226         df1     = df1   * fac;
3227         df2     = df2   * fac;
3228
3229         /* CMAP energy */
3230         vtot += e;
3231
3232         /* Do forces - first torsion */
3233         fg1       = iprod(r1_ij, r1_kj);
3234         hg1       = iprod(r1_kl, r1_kj);
3235         fga1      = fg1*ra2r1*rgr1;
3236         hgb1      = hg1*rb2r1*rgr1;
3237         gaa1      = -ra2r1*rg1;
3238         gbb1      = rb2r1*rg1;
3239
3240         for (i = 0; i < DIM; i++)
3241         {
3242             dtf1[i]   = gaa1 * a1[i];
3243             dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
3244             dth1[i]   = gbb1 * b1[i];
3245
3246             f1[i]     = df1  * dtf1[i];
3247             g1[i]     = df1  * dtg1[i];
3248             h1[i]     = df1  * dth1[i];
3249
3250             f1_i[i]   =  f1[i];
3251             f1_j[i]   = -f1[i] - g1[i];
3252             f1_k[i]   =  h1[i] + g1[i];
3253             f1_l[i]   = -h1[i];
3254
3255             f[a1i][i] = f[a1i][i] + f1_i[i];
3256             f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3257             f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3258             f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3259         }
3260
3261         /* Do forces - second torsion */
3262         fg2       = iprod(r2_ij, r2_kj);
3263         hg2       = iprod(r2_kl, r2_kj);
3264         fga2      = fg2*ra2r2*rgr2;
3265         hgb2      = hg2*rb2r2*rgr2;
3266         gaa2      = -ra2r2*rg2;
3267         gbb2      = rb2r2*rg2;
3268
3269         for (i = 0; i < DIM; i++)
3270         {
3271             dtf2[i]   = gaa2 * a2[i];
3272             dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
3273             dth2[i]   = gbb2 * b2[i];
3274
3275             f2[i]     = df2  * dtf2[i];
3276             g2[i]     = df2  * dtg2[i];
3277             h2[i]     = df2  * dth2[i];
3278
3279             f2_i[i]   =  f2[i];
3280             f2_j[i]   = -f2[i] - g2[i];
3281             f2_k[i]   =  h2[i] + g2[i];
3282             f2_l[i]   = -h2[i];
3283
3284             f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3285             f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3286             f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3287             f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3288         }
3289
3290         /* Shift forces */
3291         if (g)
3292         {
3293             copy_ivec(SHIFT_IVEC(g, a1j), jt1);
3294             ivec_sub(SHIFT_IVEC(g, a1i),  jt1, dt1_ij);
3295             ivec_sub(SHIFT_IVEC(g, a1k),  jt1, dt1_kj);
3296             ivec_sub(SHIFT_IVEC(g, a1l),  jt1, dt1_lj);
3297             t11 = IVEC2IS(dt1_ij);
3298             t21 = IVEC2IS(dt1_kj);
3299             t31 = IVEC2IS(dt1_lj);
3300
3301             copy_ivec(SHIFT_IVEC(g, a2j), jt2);
3302             ivec_sub(SHIFT_IVEC(g, a2i),  jt2, dt2_ij);
3303             ivec_sub(SHIFT_IVEC(g, a2k),  jt2, dt2_kj);
3304             ivec_sub(SHIFT_IVEC(g, a2l),  jt2, dt2_lj);
3305             t12 = IVEC2IS(dt2_ij);
3306             t22 = IVEC2IS(dt2_kj);
3307             t32 = IVEC2IS(dt2_lj);
3308         }
3309         else if (pbc)
3310         {
3311             t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3312             t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3313         }
3314         else
3315         {
3316             t31 = CENTRAL;
3317             t32 = CENTRAL;
3318         }
3319
3320         rvec_inc(fshift[t11], f1_i);
3321         rvec_inc(fshift[CENTRAL], f1_j);
3322         rvec_inc(fshift[t21], f1_k);
3323         rvec_inc(fshift[t31], f1_l);
3324
3325         rvec_inc(fshift[t21], f2_i);
3326         rvec_inc(fshift[CENTRAL], f2_j);
3327         rvec_inc(fshift[t22], f2_k);
3328         rvec_inc(fshift[t32], f2_l);
3329     }
3330     return vtot;
3331 }
3332
3333 namespace
3334 {
3335
3336 //! \cond
3337 /***********************************************************
3338  *
3339  *   G R O M O S  9 6   F U N C T I O N S
3340  *
3341  ***********************************************************/
3342 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
3343                  real *V, real *F)
3344 {
3345     const real half = 0.5;
3346     real       L1, kk, x0, dx, dx2;
3347     real       v, f, dvdlambda;
3348
3349     L1    = 1.0-lambda;
3350     kk    = L1*kA+lambda*kB;
3351     x0    = L1*xA+lambda*xB;
3352
3353     dx    = x-x0;
3354     dx2   = dx*dx;
3355
3356     f          = -kk*dx;
3357     v          = half*kk*dx2;
3358     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
3359
3360     *F    = f;
3361     *V    = v;
3362
3363     return dvdlambda;
3364
3365     /* That was 21 flops */
3366 }
3367
3368 real g96bonds(int nbonds,
3369               const t_iatom forceatoms[], const t_iparams forceparams[],
3370               const rvec x[], rvec4 f[], rvec fshift[],
3371               const t_pbc *pbc, const t_graph *g,
3372               real lambda, real *dvdlambda,
3373               const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3374               int gmx_unused *global_atom_index)
3375 {
3376     int  i, m, ki, ai, aj, type;
3377     real dr2, fbond, vbond, fij, vtot;
3378     rvec dx;
3379     ivec dt;
3380
3381     vtot = 0.0;
3382     for (i = 0; (i < nbonds); )
3383     {
3384         type = forceatoms[i++];
3385         ai   = forceatoms[i++];
3386         aj   = forceatoms[i++];
3387
3388         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3389         dr2  = iprod(dx, dx);                       /*   5              */
3390
3391         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3392                                   forceparams[type].harmonic.krB,
3393                                   forceparams[type].harmonic.rA,
3394                                   forceparams[type].harmonic.rB,
3395                                   dr2, lambda, &vbond, &fbond);
3396
3397         vtot  += 0.5*vbond;                         /* 1*/
3398
3399         if (g)
3400         {
3401             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3402             ki = IVEC2IS(dt);
3403         }
3404         for (m = 0; (m < DIM); m++)     /*  15          */
3405         {
3406             fij                 = fbond*dx[m];
3407             f[ai][m]           += fij;
3408             f[aj][m]           -= fij;
3409             fshift[ki][m]      += fij;
3410             fshift[CENTRAL][m] -= fij;
3411         }
3412     }               /* 44 TOTAL */
3413     return vtot;
3414 }
3415
3416 /*! \brief Returns the cosine of the angle between the bonds i-j and j-k */
3417 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
3418                    rvec r_ij, rvec r_kj,
3419                    int *t1, int *t2)
3420 {
3421     real costh;
3422
3423     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3424     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3425
3426     costh = cos_angle(r_ij, r_kj);         /* 25                */
3427     /* 41 TOTAL */
3428     return costh;
3429 }
3430
3431 real g96angles(int nbonds,
3432                const t_iatom forceatoms[], const t_iparams forceparams[],
3433                const rvec x[], rvec4 f[], rvec fshift[],
3434                const t_pbc *pbc, const t_graph *g,
3435                real lambda, real *dvdlambda,
3436                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3437                int gmx_unused *global_atom_index)
3438 {
3439     int  i, ai, aj, ak, type, m, t1, t2;
3440     rvec r_ij, r_kj;
3441     real cos_theta, dVdt, va, vtot;
3442     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3443     rvec f_i, f_j, f_k;
3444     ivec jt, dt_ij, dt_kj;
3445
3446     vtot = 0.0;
3447     for (i = 0; (i < nbonds); )
3448     {
3449         type = forceatoms[i++];
3450         ai   = forceatoms[i++];
3451         aj   = forceatoms[i++];
3452         ak   = forceatoms[i++];
3453
3454         cos_theta  = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3455
3456         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3457                                   forceparams[type].harmonic.krB,
3458                                   forceparams[type].harmonic.rA,
3459                                   forceparams[type].harmonic.rB,
3460                                   cos_theta, lambda, &va, &dVdt);
3461         vtot    += va;
3462
3463         rij_1    = gmx::invsqrt(iprod(r_ij, r_ij));
3464         rkj_1    = gmx::invsqrt(iprod(r_kj, r_kj));
3465         rij_2    = rij_1*rij_1;
3466         rkj_2    = rkj_1*rkj_1;
3467         rijrkj_1 = rij_1*rkj_1;         /* 23 */
3468
3469         for (m = 0; (m < DIM); m++)     /*  42  */
3470         {
3471             f_i[m]    = dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
3472             f_k[m]    = dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
3473             f_j[m]    = -f_i[m]-f_k[m];
3474             f[ai][m] += f_i[m];
3475             f[aj][m] += f_j[m];
3476             f[ak][m] += f_k[m];
3477         }
3478
3479         if (g)
3480         {
3481             copy_ivec(SHIFT_IVEC(g, aj), jt);
3482
3483             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3484             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3485             t1 = IVEC2IS(dt_ij);
3486             t2 = IVEC2IS(dt_kj);
3487         }
3488         rvec_inc(fshift[t1], f_i);
3489         rvec_inc(fshift[CENTRAL], f_j);
3490         rvec_inc(fshift[t2], f_k);          /* 9 */
3491         /* 163 TOTAL    */
3492     }
3493     return vtot;
3494 }
3495
3496 real cross_bond_bond(int nbonds,
3497                      const t_iatom forceatoms[], const t_iparams forceparams[],
3498                      const rvec x[], rvec4 f[], rvec fshift[],
3499                      const t_pbc *pbc, const t_graph *g,
3500                      real gmx_unused lambda, real gmx_unused *dvdlambda,
3501                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused  *fcd,
3502                      int gmx_unused *global_atom_index)
3503 {
3504     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3505      * pp. 842-847
3506      */
3507     int  i, ai, aj, ak, type, m, t1, t2;
3508     rvec r_ij, r_kj;
3509     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3510     rvec f_i, f_j, f_k;
3511     ivec jt, dt_ij, dt_kj;
3512
3513     vtot = 0.0;
3514     for (i = 0; (i < nbonds); )
3515     {
3516         type = forceatoms[i++];
3517         ai   = forceatoms[i++];
3518         aj   = forceatoms[i++];
3519         ak   = forceatoms[i++];
3520         r1e  = forceparams[type].cross_bb.r1e;
3521         r2e  = forceparams[type].cross_bb.r2e;
3522         krr  = forceparams[type].cross_bb.krr;
3523
3524         /* Compute distance vectors ... */
3525         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3526         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3527
3528         /* ... and their lengths */
3529         r1 = norm(r_ij);
3530         r2 = norm(r_kj);
3531
3532         /* Deviations from ideality */
3533         s1 = r1-r1e;
3534         s2 = r2-r2e;
3535
3536         /* Energy (can be negative!) */
3537         vrr   = krr*s1*s2;
3538         vtot += vrr;
3539
3540         /* Forces */
3541         svmul(-krr*s2/r1, r_ij, f_i);
3542         svmul(-krr*s1/r2, r_kj, f_k);
3543
3544         for (m = 0; (m < DIM); m++)     /*  12  */
3545         {
3546             f_j[m]    = -f_i[m] - f_k[m];
3547             f[ai][m] += f_i[m];
3548             f[aj][m] += f_j[m];
3549             f[ak][m] += f_k[m];
3550         }
3551
3552         /* Virial stuff */
3553         if (g)
3554         {
3555             copy_ivec(SHIFT_IVEC(g, aj), jt);
3556
3557             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3558             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3559             t1 = IVEC2IS(dt_ij);
3560             t2 = IVEC2IS(dt_kj);
3561         }
3562         rvec_inc(fshift[t1], f_i);
3563         rvec_inc(fshift[CENTRAL], f_j);
3564         rvec_inc(fshift[t2], f_k);          /* 9 */
3565         /* 163 TOTAL    */
3566     }
3567     return vtot;
3568 }
3569
3570 real cross_bond_angle(int nbonds,
3571                       const t_iatom forceatoms[], const t_iparams forceparams[],
3572                       const rvec x[], rvec4 f[], rvec fshift[],
3573                       const t_pbc *pbc, const t_graph *g,
3574                       real gmx_unused lambda, real gmx_unused *dvdlambda,
3575                       const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3576                       int gmx_unused *global_atom_index)
3577 {
3578     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3579      * pp. 842-847
3580      */
3581     int  i, ai, aj, ak, type, m, t1, t2;
3582     rvec r_ij, r_kj, r_ik;
3583     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3584     rvec f_i, f_j, f_k;
3585     ivec jt, dt_ij, dt_kj;
3586
3587     vtot = 0.0;
3588     for (i = 0; (i < nbonds); )
3589     {
3590         type = forceatoms[i++];
3591         ai   = forceatoms[i++];
3592         aj   = forceatoms[i++];
3593         ak   = forceatoms[i++];
3594         r1e  = forceparams[type].cross_ba.r1e;
3595         r2e  = forceparams[type].cross_ba.r2e;
3596         r3e  = forceparams[type].cross_ba.r3e;
3597         krt  = forceparams[type].cross_ba.krt;
3598
3599         /* Compute distance vectors ... */
3600         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3601         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3602         pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3603
3604         /* ... and their lengths */
3605         r1 = norm(r_ij);
3606         r2 = norm(r_kj);
3607         r3 = norm(r_ik);
3608
3609         /* Deviations from ideality */
3610         s1 = r1-r1e;
3611         s2 = r2-r2e;
3612         s3 = r3-r3e;
3613
3614         /* Energy (can be negative!) */
3615         vrt   = krt*s3*(s1+s2);
3616         vtot += vrt;
3617
3618         /* Forces */
3619         k1 = -krt*(s3/r1);
3620         k2 = -krt*(s3/r2);
3621         k3 = -krt*(s1+s2)/r3;
3622         for (m = 0; (m < DIM); m++)
3623         {
3624             f_i[m] = k1*r_ij[m] + k3*r_ik[m];
3625             f_k[m] = k2*r_kj[m] - k3*r_ik[m];
3626             f_j[m] = -f_i[m] - f_k[m];
3627         }
3628
3629         for (m = 0; (m < DIM); m++)     /*  12  */
3630         {
3631             f[ai][m] += f_i[m];
3632             f[aj][m] += f_j[m];
3633             f[ak][m] += f_k[m];
3634         }
3635
3636         /* Virial stuff */
3637         if (g)
3638         {
3639             copy_ivec(SHIFT_IVEC(g, aj), jt);
3640
3641             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3642             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3643             t1 = IVEC2IS(dt_ij);
3644             t2 = IVEC2IS(dt_kj);
3645         }
3646         rvec_inc(fshift[t1], f_i);
3647         rvec_inc(fshift[CENTRAL], f_j);
3648         rvec_inc(fshift[t2], f_k);          /* 9 */
3649         /* 163 TOTAL    */
3650     }
3651     return vtot;
3652 }
3653
3654 /*! \brief Computes the potential and force for a tabulated potential */
3655 real bonded_tab(const char *type, int table_nr,
3656                 const bondedtable_t *table, real kA, real kB, real r,
3657                 real lambda, real *V, real *F)
3658 {
3659     real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3660     int  n0, nnn;
3661     real dvdlambda;
3662
3663     k = (1.0 - lambda)*kA + lambda*kB;
3664
3665     tabscale = table->scale;
3666     VFtab    = table->data;
3667
3668     rt    = r*tabscale;
3669     n0    = static_cast<int>(rt);
3670     if (n0 >= table->n)
3671     {
3672         gmx_fatal(FARGS, "A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
3673                   type, table_nr, r, n0, n0+1, table->n);
3674     }
3675     eps   = rt - n0;
3676     eps2  = eps*eps;
3677     nnn   = 4*n0;
3678     Yt    = VFtab[nnn];
3679     Ft    = VFtab[nnn+1];
3680     Geps  = VFtab[nnn+2]*eps;
3681     Heps2 = VFtab[nnn+3]*eps2;
3682     Fp    = Ft + Geps + Heps2;
3683     VV    = Yt + Fp*eps;
3684     FF    = Fp + Geps + 2.0*Heps2;
3685
3686     *F         = -k*FF*tabscale;
3687     *V         = k*VV;
3688     dvdlambda  = (kB - kA)*VV;
3689
3690     return dvdlambda;
3691
3692     /* That was 22 flops */
3693 }
3694
3695 real tab_bonds(int nbonds,
3696                const t_iatom forceatoms[], const t_iparams forceparams[],
3697                const rvec x[], rvec4 f[], rvec fshift[],
3698                const t_pbc *pbc, const t_graph *g,
3699                real lambda, real *dvdlambda,
3700                const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3701                int gmx_unused  *global_atom_index)
3702 {
3703     int  i, m, ki, ai, aj, type, table;
3704     real dr, dr2, fbond, vbond, fij, vtot;
3705     rvec dx;
3706     ivec dt;
3707
3708     vtot = 0.0;
3709     for (i = 0; (i < nbonds); )
3710     {
3711         type = forceatoms[i++];
3712         ai   = forceatoms[i++];
3713         aj   = forceatoms[i++];
3714
3715         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3716         dr2  = iprod(dx, dx);                       /*   5              */
3717         dr   = dr2*gmx::invsqrt(dr2);               /*  10              */
3718
3719         table = forceparams[type].tab.table;
3720
3721         *dvdlambda += bonded_tab("bond", table,
3722                                  &fcd->bondtab[table],
3723                                  forceparams[type].tab.kA,
3724                                  forceparams[type].tab.kB,
3725                                  dr, lambda, &vbond, &fbond); /*  22 */
3726
3727         if (dr2 == 0.0)
3728         {
3729             continue;
3730         }
3731
3732
3733         vtot  += vbond;             /* 1*/
3734         fbond *= gmx::invsqrt(dr2); /*   6              */
3735         if (g)
3736         {
3737             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3738             ki = IVEC2IS(dt);
3739         }
3740         for (m = 0; (m < DIM); m++)     /*  15          */
3741         {
3742             fij                 = fbond*dx[m];
3743             f[ai][m]           += fij;
3744             f[aj][m]           -= fij;
3745             fshift[ki][m]      += fij;
3746             fshift[CENTRAL][m] -= fij;
3747         }
3748     }               /* 62 TOTAL */
3749     return vtot;
3750 }
3751
3752 real tab_angles(int nbonds,
3753                 const t_iatom forceatoms[], const t_iparams forceparams[],
3754                 const rvec x[], rvec4 f[], rvec fshift[],
3755                 const t_pbc *pbc, const t_graph *g,
3756                 real lambda, real *dvdlambda,
3757                 const t_mdatoms gmx_unused  *md, t_fcdata *fcd,
3758                 int gmx_unused *global_atom_index)
3759 {
3760     int  i, ai, aj, ak, t1, t2, type, table;
3761     rvec r_ij, r_kj;
3762     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3763     ivec jt, dt_ij, dt_kj;
3764
3765     vtot = 0.0;
3766     for (i = 0; (i < nbonds); )
3767     {
3768         type = forceatoms[i++];
3769         ai   = forceatoms[i++];
3770         aj   = forceatoms[i++];
3771         ak   = forceatoms[i++];
3772
3773         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
3774                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
3775
3776         table = forceparams[type].tab.table;
3777
3778         *dvdlambda += bonded_tab("angle", table,
3779                                  &fcd->angletab[table],
3780                                  forceparams[type].tab.kA,
3781                                  forceparams[type].tab.kB,
3782                                  theta, lambda, &va, &dVdt); /*  22  */
3783         vtot += va;
3784
3785         cos_theta2 = gmx::square(cos_theta);            /*   1          */
3786         if (cos_theta2 < 1)
3787         {
3788             int  m;
3789             real st, sth;
3790             real cik, cii, ckk;
3791             real nrkj2, nrij2;
3792             rvec f_i, f_j, f_k;
3793
3794             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
3795             sth   = st*cos_theta;                      /*   1           */
3796             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
3797             nrij2 = iprod(r_ij, r_ij);
3798
3799             cik = st*gmx::invsqrt(nrkj2*nrij2); /*  12          */
3800             cii = sth/nrij2;                    /*  10          */
3801             ckk = sth/nrkj2;                    /*  10          */
3802
3803             for (m = 0; (m < DIM); m++)         /*  39          */
3804             {
3805                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
3806                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
3807                 f_j[m]    = -f_i[m]-f_k[m];
3808                 f[ai][m] += f_i[m];
3809                 f[aj][m] += f_j[m];
3810                 f[ak][m] += f_k[m];
3811             }
3812             if (g)
3813             {
3814                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3815
3816                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3817                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3818                 t1 = IVEC2IS(dt_ij);
3819                 t2 = IVEC2IS(dt_kj);
3820             }
3821             rvec_inc(fshift[t1], f_i);
3822             rvec_inc(fshift[CENTRAL], f_j);
3823             rvec_inc(fshift[t2], f_k);
3824         }                                       /* 169 TOTAL    */
3825     }
3826     return vtot;
3827 }
3828
3829 real tab_dihs(int nbonds,
3830               const t_iatom forceatoms[], const t_iparams forceparams[],
3831               const rvec x[], rvec4 f[], rvec fshift[],
3832               const t_pbc *pbc, const t_graph *g,
3833               real lambda, real *dvdlambda,
3834               const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3835               int gmx_unused *global_atom_index)
3836 {
3837     int  i, type, ai, aj, ak, al, table;
3838     int  t1, t2, t3;
3839     rvec r_ij, r_kj, r_kl, m, n;
3840     real phi, ddphi, vpd, vtot;
3841
3842     vtot = 0.0;
3843     for (i = 0; (i < nbonds); )
3844     {
3845         type = forceatoms[i++];
3846         ai   = forceatoms[i++];
3847         aj   = forceatoms[i++];
3848         ak   = forceatoms[i++];
3849         al   = forceatoms[i++];
3850
3851         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
3852                         &t1, &t2, &t3);  /*  84  */
3853
3854         table = forceparams[type].tab.table;
3855
3856         /* Hopefully phi+M_PI never results in values < 0 */
3857         *dvdlambda += bonded_tab("dihedral", table,
3858                                  &fcd->dihtab[table],
3859                                  forceparams[type].tab.kA,
3860                                  forceparams[type].tab.kB,
3861                                  phi+M_PI, lambda, &vpd, &ddphi);
3862
3863         vtot += vpd;
3864         do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
3865                    f, fshift, pbc, g, x, t1, t2, t3); /* 112    */
3866
3867     }                                                 /* 227 TOTAL  */
3868
3869     return vtot;
3870 }
3871
3872 struct BondedInteractions
3873 {
3874     BondedFunction function;
3875     int            nrnbIndex;
3876 };
3877
3878 /*! \brief Lookup table of bonded interaction functions
3879  *
3880  * This must have as many entries as interaction_function in ifunc.cpp */
3881 template<BondedKernelFlavor flavor>
3882 const std::array<BondedInteractions, F_NRE> c_bondedInteractionFunctions
3883     = {
3884     BondedInteractions {bonds, eNR_BONDS },                       // F_BONDS
3885     BondedInteractions {g96bonds, eNR_BONDS },                    // F_G96BONDS
3886     BondedInteractions {morse_bonds, eNR_MORSE },                 // F_MORSE
3887     BondedInteractions {cubic_bonds, eNR_CUBICBONDS },            // F_CUBICBONDS
3888     BondedInteractions {unimplemented, -1 },                      // F_CONNBONDS
3889     BondedInteractions {bonds, eNR_BONDS },                       // F_HARMONIC
3890     BondedInteractions {FENE_bonds, eNR_FENEBONDS },              // F_FENEBONDS
3891     BondedInteractions {tab_bonds, eNR_TABBONDS },                // F_TABBONDS
3892     BondedInteractions {tab_bonds, eNR_TABBONDS },                // F_TABBONDSNC
3893     BondedInteractions {restraint_bonds, eNR_RESTRBONDS },        // F_RESTRBONDS
3894     BondedInteractions {angles<flavor>, eNR_ANGLES },             // F_ANGLES
3895     BondedInteractions {g96angles, eNR_ANGLES },                  // F_G96ANGLES
3896     BondedInteractions {restrangles, eNR_ANGLES },                // F_RESTRANGLES
3897     BondedInteractions {linear_angles, eNR_ANGLES },              // F_LINEAR_ANGLES
3898     BondedInteractions {cross_bond_bond, eNR_CROSS_BOND_BOND },   // F_CROSS_BOND_BONDS
3899     BondedInteractions {cross_bond_angle, eNR_CROSS_BOND_ANGLE }, // F_CROSS_BOND_ANGLES
3900     BondedInteractions {urey_bradley<flavor>, eNR_UREY_BRADLEY }, // F_UREY_BRADLEY
3901     BondedInteractions {quartic_angles, eNR_QANGLES },            // F_QUARTIC_ANGLES
3902     BondedInteractions {tab_angles, eNR_TABANGLES },              // F_TABANGLES
3903     BondedInteractions {pdihs<flavor>, eNR_PROPER },              // F_PDIHS
3904     BondedInteractions {rbdihs<flavor>, eNR_RB },                 // F_RBDIHS
3905     BondedInteractions {restrdihs, eNR_PROPER },                  // F_RESTRDIHS
3906     BondedInteractions {cbtdihs, eNR_RB },                        // F_CBTDIHS
3907     BondedInteractions {rbdihs<flavor>, eNR_FOURDIH },            // F_FOURDIHS
3908     BondedInteractions {idihs, eNR_IMPROPER },                    // F_IDIHS
3909     BondedInteractions {pdihs<flavor>, eNR_IMPROPER },            // F_PIDIHS
3910     BondedInteractions {tab_dihs, eNR_TABDIHS },                  // F_TABDIHS
3911     BondedInteractions {unimplemented, eNR_CMAP },                // F_CMAP
3912     BondedInteractions {unimplemented, -1 },                      // F_GB12_NOLONGERUSED
3913     BondedInteractions {unimplemented, -1 },                      // F_GB13_NOLONGERUSED
3914     BondedInteractions {unimplemented, -1 },                      // F_GB14_NOLONGERUSED
3915     BondedInteractions {unimplemented, -1 },                      // F_GBPOL_NOLONGERUSED
3916     BondedInteractions {unimplemented, -1 },                      // F_NPSOLVATION_NOLONGERUSED
3917     BondedInteractions {unimplemented, eNR_NB14 },                // F_LJ14
3918     BondedInteractions {unimplemented, -1 },                      // F_COUL14
3919     BondedInteractions {unimplemented, eNR_NB14 },                // F_LJC14_Q
3920     BondedInteractions {unimplemented, eNR_NB14 },                // F_LJC_PAIRS_NB
3921     BondedInteractions {unimplemented, -1 },                      // F_LJ
3922     BondedInteractions {unimplemented, -1 },                      // F_BHAM
3923     BondedInteractions {unimplemented, -1 },                      // F_LJ_LR_NOLONGERUSED
3924     BondedInteractions {unimplemented, -1 },                      // F_BHAM_LR_NOLONGERUSED
3925     BondedInteractions {unimplemented, -1 },                      // F_DISPCORR
3926     BondedInteractions {unimplemented, -1 },                      // F_COUL_SR
3927     BondedInteractions {unimplemented, -1 },                      // F_COUL_LR_NOLONGERUSED
3928     BondedInteractions {unimplemented, -1 },                      // F_RF_EXCL
3929     BondedInteractions {unimplemented, -1 },                      // F_COUL_RECIP
3930     BondedInteractions {unimplemented, -1 },                      // F_LJ_RECIP
3931     BondedInteractions {unimplemented, -1 },                      // F_DPD
3932     BondedInteractions {polarize, eNR_POLARIZE },                 // F_POLARIZATION
3933     BondedInteractions {water_pol, eNR_WPOL },                    // F_WATER_POL
3934     BondedInteractions {thole_pol, eNR_THOLE },                   // F_THOLE_POL
3935     BondedInteractions {anharm_polarize, eNR_ANHARM_POL },        // F_ANHARM_POL
3936     BondedInteractions {unimplemented, -1 },                      // F_POSRES
3937     BondedInteractions {unimplemented, -1 },                      // F_FBPOSRES
3938     BondedInteractions {ta_disres, eNR_DISRES },                  // F_DISRES
3939     BondedInteractions {unimplemented, -1 },                      // F_DISRESVIOL
3940     BondedInteractions {orires, eNR_ORIRES },                     // F_ORIRES
3941     BondedInteractions {unimplemented, -1 },                      // F_ORIRESDEV
3942     BondedInteractions {angres, eNR_ANGRES },                     // F_ANGRES
3943     BondedInteractions {angresz, eNR_ANGRESZ },                   // F_ANGRESZ
3944     BondedInteractions {dihres, eNR_DIHRES },                     // F_DIHRES
3945     BondedInteractions {unimplemented, -1 },                      // F_DIHRESVIOL
3946     BondedInteractions {unimplemented, -1 },                      // F_CONSTR
3947     BondedInteractions {unimplemented, -1 },                      // F_CONSTRNC
3948     BondedInteractions {unimplemented, -1 },                      // F_SETTLE
3949     BondedInteractions {unimplemented, -1 },                      // F_VSITE2
3950     BondedInteractions {unimplemented, -1 },                      // F_VSITE3
3951     BondedInteractions {unimplemented, -1 },                      // F_VSITE3FD
3952     BondedInteractions {unimplemented, -1 },                      // F_VSITE3FAD
3953     BondedInteractions {unimplemented, -1 },                      // F_VSITE3OUT
3954     BondedInteractions {unimplemented, -1 },                      // F_VSITE4FD
3955     BondedInteractions {unimplemented, -1 },                      // F_VSITE4FDN
3956     BondedInteractions {unimplemented, -1 },                      // F_VSITEN
3957     BondedInteractions {unimplemented, -1 },                      // F_COM_PULL
3958     BondedInteractions {unimplemented, -1 },                      // F_EQM
3959     BondedInteractions {unimplemented, -1 },                      // F_EPOT
3960     BondedInteractions {unimplemented, -1 },                      // F_EKIN
3961     BondedInteractions {unimplemented, -1 },                      // F_ETOT
3962     BondedInteractions {unimplemented, -1 },                      // F_ECONSERVED
3963     BondedInteractions {unimplemented, -1 },                      // F_TEMP
3964     BondedInteractions {unimplemented, -1 },                      // F_VTEMP_NOLONGERUSED
3965     BondedInteractions {unimplemented, -1 },                      // F_PDISPCORR
3966     BondedInteractions {unimplemented, -1 },                      // F_PRES
3967     BondedInteractions {unimplemented, -1 },                      // F_DVDL_CONSTR
3968     BondedInteractions {unimplemented, -1 },                      // F_DVDL
3969     BondedInteractions {unimplemented, -1 },                      // F_DKDL
3970     BondedInteractions {unimplemented, -1 },                      // F_DVDL_COUL
3971     BondedInteractions {unimplemented, -1 },                      // F_DVDL_VDW
3972     BondedInteractions {unimplemented, -1 },                      // F_DVDL_BONDED
3973     BondedInteractions {unimplemented, -1 },                      // F_DVDL_RESTRAINT
3974     BondedInteractions {unimplemented, -1 },                      // F_DVDL_TEMPERATURE
3975     };
3976
3977 /*! \brief List of instantiated BondedInteractions list */
3978 const gmx::EnumerationArray < BondedKernelFlavor, std::array < BondedInteractions, F_NRE>> c_bondedInteractionFunctionsPerFlavor =
3979 {
3980     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesSimdWhenAvailable>,
3981     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesNoSimd>,
3982     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>
3983 };
3984
3985 //! \endcond
3986
3987 } // namespace
3988
3989 real calculateSimpleBond(const int ftype,
3990                          const int numForceatoms, const t_iatom forceatoms[],
3991                          const t_iparams forceparams[],
3992                          const rvec x[], rvec4 f[], rvec fshift[],
3993                          const struct t_pbc *pbc,
3994                          const struct t_graph gmx_unused *g,
3995                          const real lambda, real *dvdlambda,
3996                          const t_mdatoms *md, t_fcdata *fcd,
3997                          int gmx_unused *global_atom_index,
3998                          const BondedKernelFlavor bondedKernelFlavor)
3999 {
4000     const BondedInteractions &bonded =
4001         c_bondedInteractionFunctionsPerFlavor[bondedKernelFlavor][ftype];
4002
4003     real v = bonded.function(numForceatoms, forceatoms,
4004                              forceparams,
4005                              x, f, fshift,
4006                              pbc, g, lambda, dvdlambda,
4007                              md, fcd, global_atom_index);
4008
4009     return v;
4010 }
4011
4012 int nrnbIndex(int ftype)
4013 {
4014     return c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>[ftype].nrnbIndex;
4015 }