src/gromacs/listed-forces/bonded.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief This file defines low-level functions necessary for
  40  * computing energies and forces for listed interactions.
  41  *
  42  * \author Mark Abraham <mark.j.abraham@gmail.com>
  43  *
  44  * \ingroup module_listed-forces
  45  */
  46 #include "gmxpre.h"
  47
  48 #include "bonded.h"
  49
  50 #include "config.h"
  51
  52 #include <cassert>
  53 #include <cmath>
  54
  55 #include <algorithm>
  56
  57 #include "gromacs/listed-forces/pairs.h"
  58 #include "gromacs/math/functions.h"
  59 #include "gromacs/math/units.h"
  60 #include "gromacs/math/utilities.h"
  61 #include "gromacs/math/vec.h"
  62 #include "gromacs/pbcutil/ishift.h"
  63 #include "gromacs/pbcutil/mshift.h"
  64 #include "gromacs/pbcutil/pbc.h"
  65 #include "gromacs/pbcutil/pbc-simd.h"
  66 #include "gromacs/simd/simd.h"
  67 #include "gromacs/simd/simd_math.h"
  68 #include "gromacs/simd/vector_operations.h"
  69 #include "gromacs/utility/basedefinitions.h"
  70 #include "gromacs/utility/fatalerror.h"
  71 #include "gromacs/utility/real.h"
  72 #include "gromacs/utility/smalloc.h"
  73
  74 #include "listed-internal.h"
  75 #include "restcbt.h"
  76
  77 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  78
  79 /*! \brief Mysterious CMAP coefficient matrix */
  80 const int cmap_coeff_matrix[] = {
  81     1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4,
  82     0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
  83     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4,
  84     0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
  85     0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2,
  86     0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2,
  87     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
  88     0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
  89     0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2,
  90     0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
  91     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
  92     0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2,
  93     0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
  94     0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
  95     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
  96     0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
  97 };
  98
  99
 100 /*! \brief Compute dx = xi - xj, modulo PBC if non-NULL
 101  *
 102  * \todo This kind of code appears in many places. Consolidate it */
 103 static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
 104 {
 105     if (pbc)
 106     {
 107         return pbc_dx_aiuc(pbc, xi, xj, dx);
 108     }
 109     else
 110     {
 111         rvec_sub(xi, xj, dx);
 112         return CENTRAL;
 113     }
 114 }
 115
 116 /*! \brief Morse potential bond
 117  *
 118  * By Frank Everdij. Three parameters needed:
 119  *
 120  * b0 = equilibrium distance in nm
 121  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 122  * cb = well depth in kJ/mol
 123  *
 124  * Note: the potential is referenced to be +cb at infinite separation
 125  *       and zero at the equilibrium distance!
 126  */
 127 real morse_bonds(int nbonds,
 128                  const t_iatom forceatoms[], const t_iparams forceparams[],
 129                  const rvec x[], rvec4 f[], rvec fshift[],
 130                  const t_pbc *pbc, const t_graph *g,
 131                  real lambda, real *dvdlambda,
 132                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 133                  int gmx_unused *global_atom_index)
 134 {
 135     const real one = 1.0;
 136     const real two = 2.0;
 137     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, fij, vtot;
 138     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 139     rvec       dx;
 140     int        i, m, ki, type, ai, aj;
 141     ivec       dt;
 142
 143     vtot = 0.0;
 144     for (i = 0; (i < nbonds); )
 145     {
 146         type = forceatoms[i++];
 147         ai   = forceatoms[i++];
 148         aj   = forceatoms[i++];
 149
 150         b0A   = forceparams[type].morse.b0A;
 151         beA   = forceparams[type].morse.betaA;
 152         cbA   = forceparams[type].morse.cbA;
 153
 154         b0B   = forceparams[type].morse.b0B;
 155         beB   = forceparams[type].morse.betaB;
 156         cbB   = forceparams[type].morse.cbB;
 157
 158         L1 = one-lambda;                            /* 1 */
 159         b0 = L1*b0A + lambda*b0B;                   /* 3 */
 160         be = L1*beA + lambda*beB;                   /* 3 */
 161         cb = L1*cbA + lambda*cbB;                   /* 3 */
 162
 163         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 164         dr2  = iprod(dx, dx);                       /*   5          */
 165         dr   = dr2*gmx::invsqrt(dr2);               /*  10          */
 166         temp = std::exp(-be*(dr-b0));               /*  12          */
 167
 168         if (temp == one)
 169         {
 170             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 171             *dvdlambda += cbB-cbA;
 172             continue;
 173         }
 174
 175         omtemp    = one-temp;                                                                                        /*   1          */
 176         cbomtemp  = cb*omtemp;                                                                                       /*   1          */
 177         vbond     = cbomtemp*omtemp;                                                                                 /*   1          */
 178         fbond     = -two*be*temp*cbomtemp*gmx::invsqrt(dr2);                                                         /*   9          */
 179         vtot     += vbond;                                                                                           /*   1          */
 180
 181         *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
 182
 183         if (g)
 184         {
 185             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 186             ki = IVEC2IS(dt);
 187         }
 188
 189         for (m = 0; (m < DIM); m++)                    /*  15          */
 190         {
 191             fij                 = fbond*dx[m];
 192             f[ai][m]           += fij;
 193             f[aj][m]           -= fij;
 194             fshift[ki][m]      += fij;
 195             fshift[CENTRAL][m] -= fij;
 196         }
 197     }                                         /*  83 TOTAL    */
 198     return vtot;
 199 }
 200
 201 //! \cond
 202 real cubic_bonds(int nbonds,
 203                  const t_iatom forceatoms[], const t_iparams forceparams[],
 204                  const rvec x[], rvec4 f[], rvec fshift[],
 205                  const t_pbc *pbc, const t_graph *g,
 206                  real gmx_unused lambda, real gmx_unused *dvdlambda,
 207                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 208                  int gmx_unused *global_atom_index)
 209 {
 210     const real three = 3.0;
 211     const real two   = 2.0;
 212     real       kb, b0, kcub;
 213     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, fij, vtot;
 214     rvec       dx;
 215     int        i, m, ki, type, ai, aj;
 216     ivec       dt;
 217
 218     vtot = 0.0;
 219     for (i = 0; (i < nbonds); )
 220     {
 221         type = forceatoms[i++];
 222         ai   = forceatoms[i++];
 223         aj   = forceatoms[i++];
 224
 225         b0   = forceparams[type].cubic.b0;
 226         kb   = forceparams[type].cubic.kb;
 227         kcub = forceparams[type].cubic.kcub;
 228
 229         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 230         dr2  = iprod(dx, dx);                           /*   5          */
 231
 232         if (dr2 == 0.0)
 233         {
 234             continue;
 235         }
 236
 237         dr         = dr2*gmx::invsqrt(dr2);                  /*  10          */
 238         dist       = dr-b0;
 239         kdist      = kb*dist;
 240         kdist2     = kdist*dist;
 241
 242         vbond      = kdist2 + kcub*kdist2*dist;
 243         fbond      = -(two*kdist + three*kdist2*kcub)/dr;
 244
 245         vtot      += vbond;   /* 21 */
 246
 247         if (g)
 248         {
 249             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 250             ki = IVEC2IS(dt);
 251         }
 252         for (m = 0; (m < DIM); m++)                    /*  15          */
 253         {
 254             fij                 = fbond*dx[m];
 255             f[ai][m]           += fij;
 256             f[aj][m]           -= fij;
 257             fshift[ki][m]      += fij;
 258             fshift[CENTRAL][m] -= fij;
 259         }
 260     }                                         /*  54 TOTAL    */
 261     return vtot;
 262 }
 263
 264 real FENE_bonds(int nbonds,
 265                 const t_iatom forceatoms[], const t_iparams forceparams[],
 266                 const rvec x[], rvec4 f[], rvec fshift[],
 267                 const t_pbc *pbc, const t_graph *g,
 268                 real gmx_unused lambda, real gmx_unused *dvdlambda,
 269                 const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 270                 int *global_atom_index)
 271 {
 272     const real half = 0.5;
 273     const real one  = 1.0;
 274     real       bm, kb;
 275     real       dr2, bm2, omdr2obm2, fbond, vbond, fij, vtot;
 276     rvec       dx;
 277     int        i, m, ki, type, ai, aj;
 278     ivec       dt;
 279
 280     vtot = 0.0;
 281     for (i = 0; (i < nbonds); )
 282     {
 283         type = forceatoms[i++];
 284         ai   = forceatoms[i++];
 285         aj   = forceatoms[i++];
 286
 287         bm   = forceparams[type].fene.bm;
 288         kb   = forceparams[type].fene.kb;
 289
 290         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 291         dr2  = iprod(dx, dx);                           /*   5          */
 292
 293         if (dr2 == 0.0)
 294         {
 295             continue;
 296         }
 297
 298         bm2 = bm*bm;
 299
 300         if (dr2 >= bm2)
 301         {
 302             gmx_fatal(FARGS,
 303                       "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
 304                       dr2, bm2,
 305                       glatnr(global_atom_index, ai),
 306                       glatnr(global_atom_index, aj));
 307         }
 308
 309         omdr2obm2  = one - dr2/bm2;
 310
 311         vbond      = -half*kb*bm2*std::log(omdr2obm2);
 312         fbond      = -kb/omdr2obm2;
 313
 314         vtot      += vbond;   /* 35 */
 315
 316         if (g)
 317         {
 318             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 319             ki = IVEC2IS(dt);
 320         }
 321         for (m = 0; (m < DIM); m++)                    /*  15          */
 322         {
 323             fij                 = fbond*dx[m];
 324             f[ai][m]           += fij;
 325             f[aj][m]           -= fij;
 326             fshift[ki][m]      += fij;
 327             fshift[CENTRAL][m] -= fij;
 328         }
 329     }                                         /*  58 TOTAL    */
 330     return vtot;
 331 }
 332
 333 static real harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
 334                      real *V, real *F)
 335 {
 336     const real half = 0.5;
 337     real       L1, kk, x0, dx, dx2;
 338     real       v, f, dvdlambda;
 339
 340     L1    = 1.0-lambda;
 341     kk    = L1*kA+lambda*kB;
 342     x0    = L1*xA+lambda*xB;
 343
 344     dx    = x-x0;
 345     dx2   = dx*dx;
 346
 347     f          = -kk*dx;
 348     v          = half*kk*dx2;
 349     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
 350
 351     *F    = f;
 352     *V    = v;
 353
 354     return dvdlambda;
 355
 356     /* That was 19 flops */
 357 }
 358
 359
 360 real bonds(int nbonds,
 361            const t_iatom forceatoms[], const t_iparams forceparams[],
 362            const rvec x[], rvec4 f[], rvec fshift[],
 363            const t_pbc *pbc, const t_graph *g,
 364            real lambda, real *dvdlambda,
 365            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 366            int gmx_unused *global_atom_index)
 367 {
 368     int  i, m, ki, ai, aj, type;
 369     real dr, dr2, fbond, vbond, fij, vtot;
 370     rvec dx;
 371     ivec dt;
 372
 373     vtot = 0.0;
 374     for (i = 0; (i < nbonds); )
 375     {
 376         type = forceatoms[i++];
 377         ai   = forceatoms[i++];
 378         aj   = forceatoms[i++];
 379
 380         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 381         dr2  = iprod(dx, dx);                       /*   5              */
 382         dr   = std::sqrt(dr2);                      /*  10              */
 383
 384         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 385                                forceparams[type].harmonic.krB,
 386                                forceparams[type].harmonic.rA,
 387                                forceparams[type].harmonic.rB,
 388                                dr, lambda, &vbond, &fbond); /*  19  */
 389
 390         if (dr2 == 0.0)
 391         {
 392             continue;
 393         }
 394
 395
 396         vtot  += vbond;             /* 1*/
 397         fbond *= gmx::invsqrt(dr2); /*   6              */
 398         if (g)
 399         {
 400             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 401             ki = IVEC2IS(dt);
 402         }
 403         for (m = 0; (m < DIM); m++)     /*  15          */
 404         {
 405             fij                 = fbond*dx[m];
 406             f[ai][m]           += fij;
 407             f[aj][m]           -= fij;
 408             fshift[ki][m]      += fij;
 409             fshift[CENTRAL][m] -= fij;
 410         }
 411     }               /* 59 TOTAL */
 412     return vtot;
 413 }
 414
 415 real restraint_bonds(int nbonds,
 416                      const t_iatom forceatoms[], const t_iparams forceparams[],
 417                      const rvec x[], rvec4 f[], rvec fshift[],
 418                      const t_pbc *pbc, const t_graph *g,
 419                      real lambda, real *dvdlambda,
 420                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 421                      int gmx_unused *global_atom_index)
 422 {
 423     int  i, m, ki, ai, aj, type;
 424     real dr, dr2, fbond, vbond, fij, vtot;
 425     real L1;
 426     real low, dlow, up1, dup1, up2, dup2, k, dk;
 427     real drh, drh2;
 428     rvec dx;
 429     ivec dt;
 430
 431     L1   = 1.0 - lambda;
 432
 433     vtot = 0.0;
 434     for (i = 0; (i < nbonds); )
 435     {
 436         type = forceatoms[i++];
 437         ai   = forceatoms[i++];
 438         aj   = forceatoms[i++];
 439
 440         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 441         dr2  = iprod(dx, dx);                       /*   5              */
 442         dr   = dr2*gmx::invsqrt(dr2);               /*  10              */
 443
 444         low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
 445         dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
 446         up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
 447         dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
 448         up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
 449         dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
 450         k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
 451         dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
 452         /* 24 */
 453
 454         if (dr < low)
 455         {
 456             drh         = dr - low;
 457             drh2        = drh*drh;
 458             vbond       = 0.5*k*drh2;
 459             fbond       = -k*drh;
 460             *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
 461         } /* 11 */
 462         else if (dr <= up1)
 463         {
 464             vbond = 0;
 465             fbond = 0;
 466         }
 467         else if (dr <= up2)
 468         {
 469             drh         = dr - up1;
 470             drh2        = drh*drh;
 471             vbond       = 0.5*k*drh2;
 472             fbond       = -k*drh;
 473             *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
 474         } /* 11 */
 475         else
 476         {
 477             drh         = dr - up2;
 478             vbond       = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
 479             fbond       = -k*(up2 - up1);
 480             *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
 481                 + k*(dup2 - dup1)*(up2 - up1 + drh)
 482                 - k*(up2 - up1)*dup2;
 483         }
 484
 485         if (dr2 == 0.0)
 486         {
 487             continue;
 488         }
 489
 490         vtot  += vbond;             /* 1*/
 491         fbond *= gmx::invsqrt(dr2); /*   6              */
 492         if (g)
 493         {
 494             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 495             ki = IVEC2IS(dt);
 496         }
 497         for (m = 0; (m < DIM); m++)             /*  15          */
 498         {
 499             fij                 = fbond*dx[m];
 500             f[ai][m]           += fij;
 501             f[aj][m]           -= fij;
 502             fshift[ki][m]      += fij;
 503             fshift[CENTRAL][m] -= fij;
 504         }
 505     }                   /* 59 TOTAL     */
 506
 507     return vtot;
 508 }
 509
 510 real polarize(int nbonds,
 511               const t_iatom forceatoms[], const t_iparams forceparams[],
 512               const rvec x[], rvec4 f[], rvec fshift[],
 513               const t_pbc *pbc, const t_graph *g,
 514               real lambda, real *dvdlambda,
 515               const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 516               int gmx_unused *global_atom_index)
 517 {
 518     int  i, m, ki, ai, aj, type;
 519     real dr, dr2, fbond, vbond, fij, vtot, ksh;
 520     rvec dx;
 521     ivec dt;
 522
 523     vtot = 0.0;
 524     for (i = 0; (i < nbonds); )
 525     {
 526         type = forceatoms[i++];
 527         ai   = forceatoms[i++];
 528         aj   = forceatoms[i++];
 529         ksh  = gmx::square(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
 530
 531         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 532         dr2  = iprod(dx, dx);                                               /*   5              */
 533         dr   = std::sqrt(dr2);                                              /*  10              */
 534
 535         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 536
 537         if (dr2 == 0.0)
 538         {
 539             continue;
 540         }
 541
 542         vtot  += vbond;             /* 1*/
 543         fbond *= gmx::invsqrt(dr2); /*   6              */
 544
 545         if (g)
 546         {
 547             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 548             ki = IVEC2IS(dt);
 549         }
 550         for (m = 0; (m < DIM); m++)     /*  15          */
 551         {
 552             fij                 = fbond*dx[m];
 553             f[ai][m]           += fij;
 554             f[aj][m]           -= fij;
 555             fshift[ki][m]      += fij;
 556             fshift[CENTRAL][m] -= fij;
 557         }
 558     }               /* 59 TOTAL */
 559     return vtot;
 560 }
 561
 562 real anharm_polarize(int nbonds,
 563                      const t_iatom forceatoms[], const t_iparams forceparams[],
 564                      const rvec x[], rvec4 f[], rvec fshift[],
 565                      const t_pbc *pbc, const t_graph *g,
 566                      real lambda, real *dvdlambda,
 567                      const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 568                      int gmx_unused *global_atom_index)
 569 {
 570     int  i, m, ki, ai, aj, type;
 571     real dr, dr2, fbond, vbond, fij, vtot, ksh, khyp, drcut, ddr, ddr3;
 572     rvec dx;
 573     ivec dt;
 574
 575     vtot = 0.0;
 576     for (i = 0; (i < nbonds); )
 577     {
 578         type  = forceatoms[i++];
 579         ai    = forceatoms[i++];
 580         aj    = forceatoms[i++];
 581         ksh   = gmx::square(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
 582         khyp  = forceparams[type].anharm_polarize.khyp;
 583         drcut = forceparams[type].anharm_polarize.drcut;
 584
 585         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 586         dr2  = iprod(dx, dx);                                               /*   5              */
 587         dr   = dr2*gmx::invsqrt(dr2);                                       /*  10              */
 588
 589         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 590
 591         if (dr2 == 0.0)
 592         {
 593             continue;
 594         }
 595
 596         if (dr > drcut)
 597         {
 598             ddr    = dr-drcut;
 599             ddr3   = ddr*ddr*ddr;
 600             vbond += khyp*ddr*ddr3;
 601             fbond -= 4*khyp*ddr3;
 602         }
 603         fbond *= gmx::invsqrt(dr2); /*   6              */
 604         vtot  += vbond;             /* 1*/
 605
 606         if (g)
 607         {
 608             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 609             ki = IVEC2IS(dt);
 610         }
 611         for (m = 0; (m < DIM); m++)     /*  15          */
 612         {
 613             fij                 = fbond*dx[m];
 614             f[ai][m]           += fij;
 615             f[aj][m]           -= fij;
 616             fshift[ki][m]      += fij;
 617             fshift[CENTRAL][m] -= fij;
 618         }
 619     }               /* 72 TOTAL */
 620     return vtot;
 621 }
 622
 623 real water_pol(int nbonds,
 624                const t_iatom forceatoms[], const t_iparams forceparams[],
 625                const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
 626                const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
 627                real gmx_unused lambda, real gmx_unused *dvdlambda,
 628                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 629                int gmx_unused *global_atom_index)
 630 {
 631     /* This routine implements anisotropic polarizibility for water, through
 632      * a shell connected to a dummy with spring constant that differ in the
 633      * three spatial dimensions in the molecular frame.
 634      */
 635     int  i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
 636     ivec dt;
 637     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 638     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 639
 640     vtot = 0.0;
 641     if (nbonds > 0)
 642     {
 643         type0  = forceatoms[0];
 644         aS     = forceatoms[5];
 645         qS     = md->chargeA[aS];
 646         kk[XX] = gmx::square(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
 647         kk[YY] = gmx::square(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
 648         kk[ZZ] = gmx::square(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
 649         r_HH   = 1.0/forceparams[type0].wpol.rHH;
 650         for (i = 0; (i < nbonds); i += 6)
 651         {
 652             type = forceatoms[i];
 653             if (type != type0)
 654             {
 655                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d",
 656                           type, type0, __FILE__, __LINE__);
 657             }
 658             aO   = forceatoms[i+1];
 659             aH1  = forceatoms[i+2];
 660             aH2  = forceatoms[i+3];
 661             aD   = forceatoms[i+4];
 662             aS   = forceatoms[i+5];
 663
 664             /* Compute vectors describing the water frame */
 665             pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
 666             pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
 667             pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
 668             pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
 669             ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
 670             cprod(dOH1, dOH2, nW);
 671
 672             /* Compute inverse length of normal vector
 673              * (this one could be precomputed, but I'm too lazy now)
 674              */
 675             r_nW = gmx::invsqrt(iprod(nW, nW));
 676             /* This is for precision, but does not make a big difference,
 677              * it can go later.
 678              */
 679             r_OD = gmx::invsqrt(iprod(dOD, dOD));
 680
 681             /* Normalize the vectors in the water frame */
 682             svmul(r_nW, nW, nW);
 683             svmul(r_HH, dHH, dHH);
 684             svmul(r_OD, dOD, dOD);
 685
 686             /* Compute displacement of shell along components of the vector */
 687             dx[ZZ] = iprod(dDS, dOD);
 688             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 689             for (m = 0; (m < DIM); m++)
 690             {
 691                 proj[m] = dDS[m]-dx[ZZ]*dOD[m];
 692             }
 693
 694             /*dx[XX] = iprod(dDS,nW);
 695                dx[YY] = iprod(dDS,dHH);*/
 696             dx[XX] = iprod(proj, nW);
 697             for (m = 0; (m < DIM); m++)
 698             {
 699                 proj[m] -= dx[XX]*nW[m];
 700             }
 701             dx[YY] = iprod(proj, dHH);
 702             /* Now compute the forces and energy */
 703             kdx[XX] = kk[XX]*dx[XX];
 704             kdx[YY] = kk[YY]*dx[YY];
 705             kdx[ZZ] = kk[ZZ]*dx[ZZ];
 706             vtot   += iprod(dx, kdx);
 707
 708             if (g)
 709             {
 710                 ivec_sub(SHIFT_IVEC(g, aS), SHIFT_IVEC(g, aD), dt);
 711                 ki = IVEC2IS(dt);
 712             }
 713
 714             for (m = 0; (m < DIM); m++)
 715             {
 716                 /* This is a tensor operation but written out for speed */
 717                 tx                  =  nW[m]*kdx[XX];
 718                 ty                  = dHH[m]*kdx[YY];
 719                 tz                  = dOD[m]*kdx[ZZ];
 720                 fij                 = -tx-ty-tz;
 721                 f[aS][m]           += fij;
 722                 f[aD][m]           -= fij;
 723                 fshift[ki][m]      += fij;
 724                 fshift[CENTRAL][m] -= fij;
 725             }
 726         }
 727     }
 728     return 0.5*vtot;
 729 }
 730
 731 static real do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj,
 732                        const t_pbc *pbc, real qq,
 733                        rvec fshift[], real afac)
 734 {
 735     rvec r12;
 736     real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff;
 737     int  m, t;
 738
 739     t      = pbc_rvec_sub(pbc, xi, xj, r12);                      /*  3 */
 740
 741     r12sq  = iprod(r12, r12);                                     /*  5 */
 742     r12_1  = gmx::invsqrt(r12sq);                                 /*  5 */
 743     r12bar = afac/r12_1;                                          /*  5 */
 744     v0     = qq*ONE_4PI_EPS0*r12_1;                               /*  2 */
 745     ebar   = std::exp(-r12bar);                                   /*  5 */
 746     v1     = (1-(1+0.5*r12bar)*ebar);                             /*  4 */
 747     fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
 748
 749     for (m = 0; (m < DIM); m++)
 750     {
 751         fff                 = fscal*r12[m];
 752         fi[m]              += fff;
 753         fj[m]              -= fff;
 754         fshift[t][m]       += fff;
 755         fshift[CENTRAL][m] -= fff;
 756     }             /* 15 */
 757
 758     return v0*v1; /* 1 */
 759     /* 54 */
 760 }
 761
 762 real thole_pol(int nbonds,
 763                const t_iatom forceatoms[], const t_iparams forceparams[],
 764                const rvec x[], rvec4 f[], rvec fshift[],
 765                const t_pbc *pbc, const t_graph gmx_unused *g,
 766                real gmx_unused lambda, real gmx_unused *dvdlambda,
 767                const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 768                int gmx_unused *global_atom_index)
 769 {
 770     /* Interaction between two pairs of particles with opposite charge */
 771     int        i, type, a1, da1, a2, da2;
 772     real       q1, q2, qq, a, al1, al2, afac;
 773     real       V             = 0;
 774
 775     for (i = 0; (i < nbonds); )
 776     {
 777         type  = forceatoms[i++];
 778         a1    = forceatoms[i++];
 779         da1   = forceatoms[i++];
 780         a2    = forceatoms[i++];
 781         da2   = forceatoms[i++];
 782         q1    = md->chargeA[da1];
 783         q2    = md->chargeA[da2];
 784         a     = forceparams[type].thole.a;
 785         al1   = forceparams[type].thole.alpha1;
 786         al2   = forceparams[type].thole.alpha2;
 787         qq    = q1*q2;
 788         afac  = a*gmx::invsixthroot(al1*al2);
 789         V    += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 790         V    += do_1_thole(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 791         V    += do_1_thole(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 792         V    += do_1_thole(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 793     }
 794     /* 290 flops */
 795     return V;
 796 }
 797
 798 real bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
 799                 rvec r_ij, rvec r_kj, real *costh,
 800                 int *t1, int *t2)
 801 /* Return value is the angle between the bonds i-j and j-k */
 802 {
 803     /* 41 FLOPS */
 804     real th;
 805
 806     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 807     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 808
 809     *costh = cos_angle(r_ij, r_kj);        /* 25                */
 810     th     = std::acos(*costh);            /* 10                */
 811     /* 41 TOTAL */
 812     return th;
 813 }
 814
 815 real angles(int nbonds,
 816             const t_iatom forceatoms[], const t_iparams forceparams[],
 817             const rvec x[], rvec4 f[], rvec fshift[],
 818             const t_pbc *pbc, const t_graph *g,
 819             real lambda, real *dvdlambda,
 820             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 821             int gmx_unused *global_atom_index)
 822 {
 823     int  i, ai, aj, ak, t1, t2, type;
 824     rvec r_ij, r_kj;
 825     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
 826     ivec jt, dt_ij, dt_kj;
 827
 828     vtot = 0.0;
 829     for (i = 0; i < nbonds; )
 830     {
 831         type = forceatoms[i++];
 832         ai   = forceatoms[i++];
 833         aj   = forceatoms[i++];
 834         ak   = forceatoms[i++];
 835
 836         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
 837                             r_ij, r_kj, &cos_theta, &t1, &t2);  /*  41          */
 838
 839         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 840                                forceparams[type].harmonic.krB,
 841                                forceparams[type].harmonic.rA*DEG2RAD,
 842                                forceparams[type].harmonic.rB*DEG2RAD,
 843                                theta, lambda, &va, &dVdt);  /*  21  */
 844         vtot += va;
 845
 846         cos_theta2 = gmx::square(cos_theta);
 847         if (cos_theta2 < 1)
 848         {
 849             int  m;
 850             real st, sth;
 851             real cik, cii, ckk;
 852             real nrkj2, nrij2;
 853             real nrkj_1, nrij_1;
 854             rvec f_i, f_j, f_k;
 855
 856             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
 857             sth   = st*cos_theta;                      /*   1           */
 858             nrij2 = iprod(r_ij, r_ij);                 /*   5           */
 859             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
 860
 861             nrij_1 = gmx::invsqrt(nrij2);              /*  10           */
 862             nrkj_1 = gmx::invsqrt(nrkj2);              /*  10           */
 863
 864             cik = st*nrij_1*nrkj_1;                    /*   2           */
 865             cii = sth*nrij_1*nrij_1;                   /*   2           */
 866             ckk = sth*nrkj_1*nrkj_1;                   /*   2           */
 867
 868             for (m = 0; m < DIM; m++)
 869             {           /*  39          */
 870                 f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
 871                 f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
 872                 f_j[m]    = -f_i[m] - f_k[m];
 873                 f[ai][m] += f_i[m];
 874                 f[aj][m] += f_j[m];
 875                 f[ak][m] += f_k[m];
 876             }
 877             if (g != nullptr)
 878             {
 879                 copy_ivec(SHIFT_IVEC(g, aj), jt);
 880
 881                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
 882                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
 883                 t1 = IVEC2IS(dt_ij);
 884                 t2 = IVEC2IS(dt_kj);
 885             }
 886             rvec_inc(fshift[t1], f_i);
 887             rvec_inc(fshift[CENTRAL], f_j);
 888             rvec_inc(fshift[t2], f_k);
 889         }                                           /* 161 TOTAL        */
 890     }
 891
 892     return vtot;
 893 }
 894
 895 #if GMX_SIMD_HAVE_REAL
 896
 897 /* As angles, but using SIMD to calculate many angles at once.
 898  * This routines does not calculate energies and shift forces.
 899  */
 900 void
 901 angles_noener_simd(int nbonds,
 902                    const t_iatom forceatoms[], const t_iparams forceparams[],
 903                    const rvec x[], rvec4 f[],
 904                    const t_pbc *pbc, const t_graph gmx_unused *g,
 905                    real gmx_unused lambda,
 906                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 907                    int gmx_unused *global_atom_index)
 908 {
 909     const int            nfa1 = 4;
 910     int                  i, iu, s;
 911     int                  type;
 912     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ai[GMX_SIMD_REAL_WIDTH];
 913     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    aj[GMX_SIMD_REAL_WIDTH];
 914     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ak[GMX_SIMD_REAL_WIDTH];
 915     alignas(GMX_SIMD_ALIGNMENT) real            coeff[2*GMX_SIMD_REAL_WIDTH];
 916     SimdReal             deg2rad_S(DEG2RAD);
 917     SimdReal             xi_S, yi_S, zi_S;
 918     SimdReal             xj_S, yj_S, zj_S;
 919     SimdReal             xk_S, yk_S, zk_S;
 920     SimdReal             k_S, theta0_S;
 921     SimdReal             rijx_S, rijy_S, rijz_S;
 922     SimdReal             rkjx_S, rkjy_S, rkjz_S;
 923     SimdReal             one_S(1.0);
 924     SimdReal             min_one_plus_eps_S(-1.0 + 2.0*GMX_REAL_EPS); // Smallest number > -1
 925
 926     SimdReal             rij_rkj_S;
 927     SimdReal             nrij2_S, nrij_1_S;
 928     SimdReal             nrkj2_S, nrkj_1_S;
 929     SimdReal             cos_S, invsin_S;
 930     SimdReal             theta_S;
 931     SimdReal             st_S, sth_S;
 932     SimdReal             cik_S, cii_S, ckk_S;
 933     SimdReal             f_ix_S, f_iy_S, f_iz_S;
 934     SimdReal             f_kx_S, f_ky_S, f_kz_S;
 935     alignas(GMX_SIMD_ALIGNMENT) real    pbc_simd[9*GMX_SIMD_REAL_WIDTH];
 936
 937     set_pbc_simd(pbc, pbc_simd);
 938
 939     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
 940     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
 941     {
 942         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
 943          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
 944          */
 945         iu = i;
 946         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
 947         {
 948             type  = forceatoms[iu];
 949             ai[s] = forceatoms[iu+1];
 950             aj[s] = forceatoms[iu+2];
 951             ak[s] = forceatoms[iu+3];
 952
 953             /* At the end fill the arrays with the last atoms and 0 params */
 954             if (i + s*nfa1 < nbonds)
 955             {
 956                 coeff[s]                     = forceparams[type].harmonic.krA;
 957                 coeff[GMX_SIMD_REAL_WIDTH+s] = forceparams[type].harmonic.rA;
 958
 959                 if (iu + nfa1 < nbonds)
 960                 {
 961                     iu += nfa1;
 962                 }
 963             }
 964             else
 965             {
 966                 coeff[s]                     = 0;
 967                 coeff[GMX_SIMD_REAL_WIDTH+s] = 0;
 968             }
 969         }
 970
 971         /* Store the non PBC corrected distances packed and aligned */
 972         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi_S, &yi_S, &zi_S);
 973         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj_S, &yj_S, &zj_S);
 974         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ak, &xk_S, &yk_S, &zk_S);
 975         rijx_S = xi_S - xj_S;
 976         rijy_S = yi_S - yj_S;
 977         rijz_S = zi_S - zj_S;
 978         rkjx_S = xk_S - xj_S;
 979         rkjy_S = yk_S - yj_S;
 980         rkjz_S = zk_S - zj_S;
 981
 982         k_S       = load<SimdReal>(coeff);
 983         theta0_S  = load<SimdReal>(coeff+GMX_SIMD_REAL_WIDTH) * deg2rad_S;
 984
 985         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
 986         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
 987
 988         rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S,
 989                           rkjx_S, rkjy_S, rkjz_S);
 990
 991         nrij2_S   = norm2(rijx_S, rijy_S, rijz_S);
 992         nrkj2_S   = norm2(rkjx_S, rkjy_S, rkjz_S);
 993
 994         nrij_1_S  = invsqrt(nrij2_S);
 995         nrkj_1_S  = invsqrt(nrkj2_S);
 996
 997         cos_S     = rij_rkj_S * nrij_1_S * nrkj_1_S;
 998
 999         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1000          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1001          * This also ensures that rounding errors would cause the argument
1002          * of simdAcos to be < -1.
1003          * Note that we do not take precautions for cos(0)=1, so the outer
1004          * atoms in an angle should not be on top of each other.
1005          */
1006         cos_S     = max(cos_S, min_one_plus_eps_S);
1007
1008         theta_S   = acos(cos_S);
1009
1010         invsin_S  = invsqrt( one_S - cos_S * cos_S );
1011
1012         st_S      = k_S * (theta0_S - theta_S) * invsin_S;
1013         sth_S     = st_S * cos_S;
1014
1015         cik_S     = st_S  * nrij_1_S * nrkj_1_S;
1016         cii_S     = sth_S * nrij_1_S * nrij_1_S;
1017         ckk_S     = sth_S * nrkj_1_S * nrkj_1_S;
1018
1019         f_ix_S    = cii_S * rijx_S;
1020         f_ix_S    = fnma(cik_S, rkjx_S, f_ix_S);
1021         f_iy_S    = cii_S * rijy_S;
1022         f_iy_S    = fnma(cik_S, rkjy_S, f_iy_S);
1023         f_iz_S    = cii_S * rijz_S;
1024         f_iz_S    = fnma(cik_S, rkjz_S, f_iz_S);
1025         f_kx_S    = ckk_S * rkjx_S;
1026         f_kx_S    = fnma(cik_S, rijx_S, f_kx_S);
1027         f_ky_S    = ckk_S * rkjy_S;
1028         f_ky_S    = fnma(cik_S, rijy_S, f_ky_S);
1029         f_kz_S    = ckk_S * rkjz_S;
1030         f_kz_S    = fnma(cik_S, rijz_S, f_kz_S);
1031
1032         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1033         transposeScatterDecrU<4>(reinterpret_cast<real *>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S, f_iz_S + f_kz_S);
1034         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1035     }
1036 }
1037
1038 #endif // GMX_SIMD_HAVE_REAL
1039
1040 real linear_angles(int nbonds,
1041                    const t_iatom forceatoms[], const t_iparams forceparams[],
1042                    const rvec x[], rvec4 f[], rvec fshift[],
1043                    const t_pbc *pbc, const t_graph *g,
1044                    real lambda, real *dvdlambda,
1045                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1046                    int gmx_unused *global_atom_index)
1047 {
1048     int  i, m, ai, aj, ak, t1, t2, type;
1049     rvec f_i, f_j, f_k;
1050     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1051     ivec jt, dt_ij, dt_kj;
1052     rvec r_ij, r_kj, r_ik, dx;
1053
1054     L1   = 1-lambda;
1055     vtot = 0.0;
1056     for (i = 0; (i < nbonds); )
1057     {
1058         type = forceatoms[i++];
1059         ai   = forceatoms[i++];
1060         aj   = forceatoms[i++];
1061         ak   = forceatoms[i++];
1062
1063         kA   = forceparams[type].linangle.klinA;
1064         kB   = forceparams[type].linangle.klinB;
1065         klin = L1*kA + lambda*kB;
1066
1067         aA   = forceparams[type].linangle.aA;
1068         aB   = forceparams[type].linangle.aB;
1069         a    = L1*aA+lambda*aB;
1070         b    = 1-a;
1071
1072         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1073         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1074         rvec_sub(r_ij, r_kj, r_ik);
1075
1076         dr2 = 0;
1077         for (m = 0; (m < DIM); m++)
1078         {
1079             dr        = -a * r_ij[m] - b * r_kj[m];
1080             dr2      += dr*dr;
1081             dx[m]     = dr;
1082             f_i[m]    = a*klin*dr;
1083             f_k[m]    = b*klin*dr;
1084             f_j[m]    = -(f_i[m]+f_k[m]);
1085             f[ai][m] += f_i[m];
1086             f[aj][m] += f_j[m];
1087             f[ak][m] += f_k[m];
1088         }
1089         va          = 0.5*klin*dr2;
1090         *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx, r_ik);
1091
1092         vtot += va;
1093
1094         if (g)
1095         {
1096             copy_ivec(SHIFT_IVEC(g, aj), jt);
1097
1098             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1099             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1100             t1 = IVEC2IS(dt_ij);
1101             t2 = IVEC2IS(dt_kj);
1102         }
1103         rvec_inc(fshift[t1], f_i);
1104         rvec_inc(fshift[CENTRAL], f_j);
1105         rvec_inc(fshift[t2], f_k);
1106     }                                         /* 57 TOTAL       */
1107     return vtot;
1108 }
1109
1110 real urey_bradley(int nbonds,
1111                   const t_iatom forceatoms[], const t_iparams forceparams[],
1112                   const rvec x[], rvec4 f[], rvec fshift[],
1113                   const t_pbc *pbc, const t_graph *g,
1114                   real lambda, real *dvdlambda,
1115                   const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1116                   int gmx_unused *global_atom_index)
1117 {
1118     int  i, m, ai, aj, ak, t1, t2, type, ki;
1119     rvec r_ij, r_kj, r_ik;
1120     real cos_theta, cos_theta2, theta;
1121     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1122     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1123     ivec jt, dt_ij, dt_kj, dt_ik;
1124
1125     vtot = 0.0;
1126     for (i = 0; (i < nbonds); )
1127     {
1128         type  = forceatoms[i++];
1129         ai    = forceatoms[i++];
1130         aj    = forceatoms[i++];
1131         ak    = forceatoms[i++];
1132         th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
1133         kthA  = forceparams[type].u_b.kthetaA;
1134         r13A  = forceparams[type].u_b.r13A;
1135         kUBA  = forceparams[type].u_b.kUBA;
1136         th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
1137         kthB  = forceparams[type].u_b.kthetaB;
1138         r13B  = forceparams[type].u_b.r13B;
1139         kUBB  = forceparams[type].u_b.kUBB;
1140
1141         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1142                             r_ij, r_kj, &cos_theta, &t1, &t2);                     /*  41               */
1143
1144         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1145         vtot       += va;
1146
1147         ki   = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);                               /*   3      */
1148         dr2  = iprod(r_ik, r_ik);                                                   /*   5              */
1149         dr   = dr2*gmx::invsqrt(dr2);                                               /*  10              */
1150
1151         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1152
1153         cos_theta2 = gmx::square(cos_theta);                                        /*   1              */
1154         if (cos_theta2 < 1)
1155         {
1156             real st, sth;
1157             real cik, cii, ckk;
1158             real nrkj2, nrij2;
1159             rvec f_i, f_j, f_k;
1160
1161             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
1162             sth   = st*cos_theta;                      /*   1           */
1163             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
1164             nrij2 = iprod(r_ij, r_ij);
1165
1166             cik = st*gmx::invsqrt(nrkj2*nrij2); /*  12          */
1167             cii = sth/nrij2;                    /*  10          */
1168             ckk = sth/nrkj2;                    /*  10          */
1169
1170             for (m = 0; (m < DIM); m++)         /*  39          */
1171             {
1172                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1173                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1174                 f_j[m]    = -f_i[m]-f_k[m];
1175                 f[ai][m] += f_i[m];
1176                 f[aj][m] += f_j[m];
1177                 f[ak][m] += f_k[m];
1178             }
1179             if (g)
1180             {
1181                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1182
1183                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1184                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1185                 t1 = IVEC2IS(dt_ij);
1186                 t2 = IVEC2IS(dt_kj);
1187             }
1188             rvec_inc(fshift[t1], f_i);
1189             rvec_inc(fshift[CENTRAL], f_j);
1190             rvec_inc(fshift[t2], f_k);
1191         }                                       /* 161 TOTAL    */
1192         /* Time for the bond calculations */
1193         if (dr2 == 0.0)
1194         {
1195             continue;
1196         }
1197
1198         vtot  += vbond;             /* 1*/
1199         fbond *= gmx::invsqrt(dr2); /*   6              */
1200
1201         if (g)
1202         {
1203             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), dt_ik);
1204             ki = IVEC2IS(dt_ik);
1205         }
1206         for (m = 0; (m < DIM); m++)     /*  15          */
1207         {
1208             fik                 = fbond*r_ik[m];
1209             f[ai][m]           += fik;
1210             f[ak][m]           -= fik;
1211             fshift[ki][m]      += fik;
1212             fshift[CENTRAL][m] -= fik;
1213         }
1214     }
1215     return vtot;
1216 }
1217
1218 #if GMX_SIMD_HAVE_REAL
1219
1220 /* As urey_bradley, but using SIMD to calculate many potentials at once.
1221  * This routines does not calculate energies and shift forces.
1222  */
1223 void urey_bradley_noener_simd(int nbonds,
1224                               const t_iatom forceatoms[], const t_iparams forceparams[],
1225                               const rvec x[], rvec4 f[],
1226                               const t_pbc *pbc, const t_graph gmx_unused *g,
1227                               real gmx_unused lambda,
1228                               const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1229                               int gmx_unused *global_atom_index)
1230 {
1231     constexpr int            nfa1 = 4;
1232     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ai[GMX_SIMD_REAL_WIDTH];
1233     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    aj[GMX_SIMD_REAL_WIDTH];
1234     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ak[GMX_SIMD_REAL_WIDTH];
1235     alignas(GMX_SIMD_ALIGNMENT) real            coeff[4*GMX_SIMD_REAL_WIDTH];
1236     alignas(GMX_SIMD_ALIGNMENT) real            pbc_simd[9*GMX_SIMD_REAL_WIDTH];
1237
1238     set_pbc_simd(pbc, pbc_simd);
1239
1240     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1241     for (int i = 0; i < nbonds; i += GMX_SIMD_REAL_WIDTH*nfa1)
1242     {
1243         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1244          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1245          */
1246         int iu = i;
1247         for (int s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1248         {
1249             const int type                     = forceatoms[iu];
1250             ai[s] = forceatoms[iu+1];
1251             aj[s] = forceatoms[iu+2];
1252             ak[s] = forceatoms[iu+3];
1253
1254             /* At the end fill the arrays with the last atoms and 0 params */
1255             if (i + s*nfa1 < nbonds)
1256             {
1257                 coeff[s]                       = forceparams[type].u_b.kthetaA;
1258                 coeff[GMX_SIMD_REAL_WIDTH+s]   = forceparams[type].u_b.thetaA;
1259                 coeff[GMX_SIMD_REAL_WIDTH*2+s] = forceparams[type].u_b.kUBA;
1260                 coeff[GMX_SIMD_REAL_WIDTH*3+s] = forceparams[type].u_b.r13A;
1261
1262                 if (iu + nfa1 < nbonds)
1263                 {
1264                     iu += nfa1;
1265                 }
1266             }
1267             else
1268             {
1269                 coeff[s]                       = 0;
1270                 coeff[GMX_SIMD_REAL_WIDTH+s]   = 0;
1271                 coeff[GMX_SIMD_REAL_WIDTH*2+s] = 0;
1272                 coeff[GMX_SIMD_REAL_WIDTH*3+s] = 0;
1273             }
1274         }
1275
1276         SimdReal xi_S, yi_S, zi_S;
1277         SimdReal xj_S, yj_S, zj_S;
1278         SimdReal xk_S, yk_S, zk_S;
1279
1280         /* Store the non PBC corrected distances packed and aligned */
1281         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi_S, &yi_S, &zi_S);
1282         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj_S, &yj_S, &zj_S);
1283         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ak, &xk_S, &yk_S, &zk_S);
1284         SimdReal       rijx_S = xi_S - xj_S;
1285         SimdReal       rijy_S = yi_S - yj_S;
1286         SimdReal       rijz_S = zi_S - zj_S;
1287         SimdReal       rkjx_S = xk_S - xj_S;
1288         SimdReal       rkjy_S = yk_S - yj_S;
1289         SimdReal       rkjz_S = zk_S - zj_S;
1290         SimdReal       rikx_S = xi_S - xk_S;
1291         SimdReal       riky_S = yi_S - yk_S;
1292         SimdReal       rikz_S = zi_S - zk_S;
1293
1294         const SimdReal ktheta_S = load<SimdReal>(coeff);
1295         const SimdReal theta0_S = load<SimdReal>(coeff+GMX_SIMD_REAL_WIDTH) * DEG2RAD;
1296         const SimdReal kUB_S    = load<SimdReal>(coeff+2*GMX_SIMD_REAL_WIDTH);
1297         const SimdReal r13_S    = load<SimdReal>(coeff+3*GMX_SIMD_REAL_WIDTH);
1298
1299         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1300         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1301         pbc_correct_dx_simd(&rikx_S, &riky_S, &rikz_S, pbc_simd);
1302
1303         const SimdReal rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S,
1304                                          rkjx_S, rkjy_S, rkjz_S);
1305
1306         const SimdReal dr2_S     = iprod(rikx_S, riky_S, rikz_S,
1307                                          rikx_S, riky_S, rikz_S);
1308
1309         const SimdReal nrij2_S   = norm2(rijx_S, rijy_S, rijz_S);
1310         const SimdReal nrkj2_S   = norm2(rkjx_S, rkjy_S, rkjz_S);
1311
1312         const SimdReal nrij_1_S  = invsqrt(nrij2_S);
1313         const SimdReal nrkj_1_S  = invsqrt(nrkj2_S);
1314         const SimdReal invdr2_S  = invsqrt(dr2_S);
1315         const SimdReal dr_S      = dr2_S*invdr2_S;
1316
1317         constexpr real min_one_plus_eps = -1.0 + 2.0*GMX_REAL_EPS; // Smallest number > -1
1318
1319         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1320          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1321          * This also ensures that rounding errors would cause the argument
1322          * of simdAcos to be < -1.
1323          * Note that we do not take precautions for cos(0)=1, so the bonds
1324          * in an angle should not align at an angle of 0 degrees.
1325          */
1326         const SimdReal cos_S     = max(rij_rkj_S * nrij_1_S * nrkj_1_S, min_one_plus_eps);
1327
1328         const SimdReal theta_S   = acos(cos_S);
1329         const SimdReal invsin_S  = invsqrt( 1.0 - cos_S * cos_S );
1330         const SimdReal st_S      = ktheta_S * (theta0_S - theta_S) * invsin_S;
1331         const SimdReal sth_S     = st_S * cos_S;
1332
1333         const SimdReal cik_S     = st_S  * nrij_1_S * nrkj_1_S;
1334         const SimdReal cii_S     = sth_S * nrij_1_S * nrij_1_S;
1335         const SimdReal ckk_S     = sth_S * nrkj_1_S * nrkj_1_S;
1336
1337         const SimdReal sUB_S     = kUB_S * (r13_S - dr_S) * invdr2_S;
1338
1339         const SimdReal f_ikx_S   = sUB_S * rikx_S;
1340         const SimdReal f_iky_S   = sUB_S * riky_S;
1341         const SimdReal f_ikz_S   = sUB_S * rikz_S;
1342
1343         const SimdReal f_ix_S    = fnma(cik_S, rkjx_S, cii_S * rijx_S) + f_ikx_S;
1344         const SimdReal f_iy_S    = fnma(cik_S, rkjy_S, cii_S * rijy_S) + f_iky_S;
1345         const SimdReal f_iz_S    = fnma(cik_S, rkjz_S, cii_S * rijz_S) + f_ikz_S;
1346         const SimdReal f_kx_S    = fnma(cik_S, rijx_S, ckk_S * rkjx_S) - f_ikx_S;
1347         const SimdReal f_ky_S    = fnma(cik_S, rijy_S, ckk_S * rkjy_S) - f_iky_S;
1348         const SimdReal f_kz_S    = fnma(cik_S, rijz_S, ckk_S * rkjz_S) - f_ikz_S;
1349
1350         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1351         transposeScatterDecrU<4>(reinterpret_cast<real *>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S, f_iz_S + f_kz_S);
1352         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1353     }
1354 }
1355
1356 #endif // GMX_SIMD_HAVE_REAL
1357
1358 real quartic_angles(int nbonds,
1359                     const t_iatom forceatoms[], const t_iparams forceparams[],
1360                     const rvec x[], rvec4 f[], rvec fshift[],
1361                     const t_pbc *pbc, const t_graph *g,
1362                     real gmx_unused lambda, real gmx_unused *dvdlambda,
1363                     const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1364                     int gmx_unused *global_atom_index)
1365 {
1366     int  i, j, ai, aj, ak, t1, t2, type;
1367     rvec r_ij, r_kj;
1368     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1369     ivec jt, dt_ij, dt_kj;
1370
1371     vtot = 0.0;
1372     for (i = 0; (i < nbonds); )
1373     {
1374         type = forceatoms[i++];
1375         ai   = forceatoms[i++];
1376         aj   = forceatoms[i++];
1377         ak   = forceatoms[i++];
1378
1379         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1380                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
1381
1382         dt = theta - forceparams[type].qangle.theta*DEG2RAD;   /* 2          */
1383
1384         dVdt = 0;
1385         va   = forceparams[type].qangle.c[0];
1386         dtp  = 1.0;
1387         for (j = 1; j <= 4; j++)
1388         {
1389             c     = forceparams[type].qangle.c[j];
1390             dVdt -= j*c*dtp;
1391             dtp  *= dt;
1392             va   += c*dtp;
1393         }
1394         /* 20 */
1395
1396         vtot += va;
1397
1398         cos_theta2 = gmx::square(cos_theta);            /*   1          */
1399         if (cos_theta2 < 1)
1400         {
1401             int  m;
1402             real st, sth;
1403             real cik, cii, ckk;
1404             real nrkj2, nrij2;
1405             rvec f_i, f_j, f_k;
1406
1407             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
1408             sth   = st*cos_theta;                      /*   1           */
1409             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
1410             nrij2 = iprod(r_ij, r_ij);
1411
1412             cik = st*gmx::invsqrt(nrkj2*nrij2); /*  12          */
1413             cii = sth/nrij2;                    /*  10          */
1414             ckk = sth/nrkj2;                    /*  10          */
1415
1416             for (m = 0; (m < DIM); m++)         /*  39          */
1417             {
1418                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1419                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1420                 f_j[m]    = -f_i[m]-f_k[m];
1421                 f[ai][m] += f_i[m];
1422                 f[aj][m] += f_j[m];
1423                 f[ak][m] += f_k[m];
1424             }
1425             if (g)
1426             {
1427                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1428
1429                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1430                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1431                 t1 = IVEC2IS(dt_ij);
1432                 t2 = IVEC2IS(dt_kj);
1433             }
1434             rvec_inc(fshift[t1], f_i);
1435             rvec_inc(fshift[CENTRAL], f_j);
1436             rvec_inc(fshift[t2], f_k);
1437         }                                       /* 153 TOTAL    */
1438     }
1439     return vtot;
1440 }
1441
1442 real dih_angle(const rvec xi, const rvec xj, const rvec xk, const rvec xl,
1443                const t_pbc *pbc,
1444                rvec r_ij, rvec r_kj, rvec r_kl, rvec m, rvec n,
1445                int *t1, int *t2, int *t3)
1446 {
1447     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
1448     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
1449     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
1450
1451     cprod(r_ij, r_kj, m);                  /*  9        */
1452     cprod(r_kj, r_kl, n);                  /*  9                */
1453     real phi  = gmx_angle(m, n);           /* 49 (assuming 25 for atan2) */
1454     real ipr  = iprod(r_ij, n);            /*  5        */
1455     real sign = (ipr < 0.0) ? -1.0 : 1.0;
1456     phi       = sign*phi;                  /*  1                */
1457     /* 82 TOTAL */
1458     return phi;
1459 }
1460
1461
1462 #if GMX_SIMD_HAVE_REAL
1463
1464 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1465  * also calculates the pre-factor required for the dihedral force update.
1466  * Note that bv and buf should be register aligned.
1467  */
1468 static inline void
1469 dih_angle_simd(const rvec *x,
1470                const int *ai, const int *aj, const int *ak, const int *al,
1471                const real *pbc_simd,
1472                SimdReal *phi_S,
1473                SimdReal *mx_S, SimdReal *my_S, SimdReal *mz_S,
1474                SimdReal *nx_S, SimdReal *ny_S, SimdReal *nz_S,
1475                SimdReal *nrkj_m2_S,
1476                SimdReal *nrkj_n2_S,
1477                SimdReal *p_S,
1478                SimdReal *q_S)
1479 {
1480     SimdReal xi_S, yi_S, zi_S;
1481     SimdReal xj_S, yj_S, zj_S;
1482     SimdReal xk_S, yk_S, zk_S;
1483     SimdReal xl_S, yl_S, zl_S;
1484     SimdReal rijx_S, rijy_S, rijz_S;
1485     SimdReal rkjx_S, rkjy_S, rkjz_S;
1486     SimdReal rklx_S, rkly_S, rklz_S;
1487     SimdReal cx_S, cy_S, cz_S;
1488     SimdReal cn_S;
1489     SimdReal s_S;
1490     SimdReal ipr_S;
1491     SimdReal iprm_S, iprn_S;
1492     SimdReal nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1493     SimdReal toler_S;
1494     SimdReal nrkj2_min_S;
1495     SimdReal real_eps_S;
1496
1497     /* Used to avoid division by zero.
1498      * We take into acount that we multiply the result by real_eps_S.
1499      */
1500     nrkj2_min_S = SimdReal(GMX_REAL_MIN/(2*GMX_REAL_EPS));
1501
1502     /* The value of the last significant bit (GMX_REAL_EPS is half of that) */
1503     real_eps_S  = SimdReal(2*GMX_REAL_EPS);
1504
1505     /* Store the non PBC corrected distances packed and aligned */
1506     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi_S, &yi_S, &zi_S);
1507     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj_S, &yj_S, &zj_S);
1508     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ak, &xk_S, &yk_S, &zk_S);
1509     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), al, &xl_S, &yl_S, &zl_S);
1510     rijx_S = xi_S - xj_S;
1511     rijy_S = yi_S - yj_S;
1512     rijz_S = zi_S - zj_S;
1513     rkjx_S = xk_S - xj_S;
1514     rkjy_S = yk_S - yj_S;
1515     rkjz_S = zk_S - zj_S;
1516     rklx_S = xk_S - xl_S;
1517     rkly_S = yk_S - yl_S;
1518     rklz_S = zk_S - zl_S;
1519
1520     pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1521     pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1522     pbc_correct_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc_simd);
1523
1524     cprod(rijx_S, rijy_S, rijz_S,
1525           rkjx_S, rkjy_S, rkjz_S,
1526           mx_S, my_S, mz_S);
1527
1528     cprod(rkjx_S, rkjy_S, rkjz_S,
1529           rklx_S, rkly_S, rklz_S,
1530           nx_S, ny_S, nz_S);
1531
1532     cprod(*mx_S, *my_S, *mz_S,
1533           *nx_S, *ny_S, *nz_S,
1534           &cx_S, &cy_S, &cz_S);
1535
1536     cn_S       = sqrt(norm2(cx_S, cy_S, cz_S));
1537
1538     s_S        = iprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1539
1540     /* Determine the dihedral angle, the sign might need correction */
1541     *phi_S     = atan2(cn_S, s_S);
1542
1543     ipr_S      = iprod(rijx_S, rijy_S, rijz_S,
1544                        *nx_S, *ny_S, *nz_S);
1545
1546     iprm_S     = norm2(*mx_S, *my_S, *mz_S);
1547     iprn_S     = norm2(*nx_S, *ny_S, *nz_S);
1548
1549     nrkj2_S    = norm2(rkjx_S, rkjy_S, rkjz_S);
1550
1551     /* Avoid division by zero. When zero, the result is multiplied by 0
1552      * anyhow, so the 3 max below do not affect the final result.
1553      */
1554     nrkj2_S    = max(nrkj2_S, nrkj2_min_S);
1555     nrkj_1_S   = invsqrt(nrkj2_S);
1556     nrkj_2_S   = nrkj_1_S * nrkj_1_S;
1557     nrkj_S     = nrkj2_S * nrkj_1_S;
1558
1559     toler_S    = nrkj2_S * real_eps_S;
1560
1561     /* Here the plain-C code uses a conditional, but we can't do that in SIMD.
1562      * So we take a max with the tolerance instead. Since we multiply with
1563      * m or n later, the max does not affect the results.
1564      */
1565     iprm_S     = max(iprm_S, toler_S);
1566     iprn_S     = max(iprn_S, toler_S);
1567     *nrkj_m2_S = nrkj_S * inv(iprm_S);
1568     *nrkj_n2_S = nrkj_S * inv(iprn_S);
1569
1570     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1571     *phi_S     = copysign(*phi_S, ipr_S);
1572     *p_S       = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1573     *p_S       = *p_S * nrkj_2_S;
1574
1575     *q_S       = iprod(rklx_S, rkly_S, rklz_S, rkjx_S, rkjy_S, rkjz_S);
1576     *q_S       = *q_S * nrkj_2_S;
1577 }
1578
1579 #endif // GMX_SIMD_HAVE_REAL
1580
1581 void do_dih_fup(int i, int j, int k, int l, real ddphi,
1582                 rvec r_ij, rvec r_kj, rvec r_kl,
1583                 rvec m, rvec n, rvec4 f[], rvec fshift[],
1584                 const t_pbc *pbc, const t_graph *g,
1585                 const rvec x[], int t1, int t2, int t3)
1586 {
1587     /* 143 FLOPS */
1588     rvec f_i, f_j, f_k, f_l;
1589     rvec uvec, vvec, svec, dx_jl;
1590     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1591     real a, b, p, q, toler;
1592     ivec jt, dt_ij, dt_kj, dt_lj;
1593
1594     iprm  = iprod(m, m);       /*  5    */
1595     iprn  = iprod(n, n);       /*  5    */
1596     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1597     toler = nrkj2*GMX_REAL_EPS;
1598     if ((iprm > toler) && (iprn > toler))
1599     {
1600         nrkj_1 = gmx::invsqrt(nrkj2); /* 10     */
1601         nrkj_2 = nrkj_1*nrkj_1;       /*  1     */
1602         nrkj   = nrkj2*nrkj_1;        /*  1     */
1603         a      = -ddphi*nrkj/iprm;    /* 11     */
1604         svmul(a, m, f_i);             /*  3     */
1605         b     = ddphi*nrkj/iprn;      /* 11     */
1606         svmul(b, n, f_l);             /*  3  */
1607         p     = iprod(r_ij, r_kj);    /*  5     */
1608         p    *= nrkj_2;               /*  1     */
1609         q     = iprod(r_kl, r_kj);    /*  5     */
1610         q    *= nrkj_2;               /*  1     */
1611         svmul(p, f_i, uvec);          /*  3     */
1612         svmul(q, f_l, vvec);          /*  3     */
1613         rvec_sub(uvec, vvec, svec);   /*  3     */
1614         rvec_sub(f_i, svec, f_j);     /*  3     */
1615         rvec_add(f_l, svec, f_k);     /*  3     */
1616         rvec_inc(f[i], f_i);          /*  3     */
1617         rvec_dec(f[j], f_j);          /*  3     */
1618         rvec_dec(f[k], f_k);          /*  3     */
1619         rvec_inc(f[l], f_l);          /*  3     */
1620
1621         if (g)
1622         {
1623             copy_ivec(SHIFT_IVEC(g, j), jt);
1624             ivec_sub(SHIFT_IVEC(g, i), jt, dt_ij);
1625             ivec_sub(SHIFT_IVEC(g, k), jt, dt_kj);
1626             ivec_sub(SHIFT_IVEC(g, l), jt, dt_lj);
1627             t1 = IVEC2IS(dt_ij);
1628             t2 = IVEC2IS(dt_kj);
1629             t3 = IVEC2IS(dt_lj);
1630         }
1631         else if (pbc)
1632         {
1633             t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1634         }
1635         else
1636         {
1637             t3 = CENTRAL;
1638         }
1639
1640         rvec_inc(fshift[t1], f_i);
1641         rvec_dec(fshift[CENTRAL], f_j);
1642         rvec_dec(fshift[t2], f_k);
1643         rvec_inc(fshift[t3], f_l);
1644     }
1645     /* 112 TOTAL    */
1646 }
1647
1648 /* As do_dih_fup above, but without shift forces */
1649 static void
1650 do_dih_fup_noshiftf(int i, int j, int k, int l, real ddphi,
1651                     rvec r_ij, rvec r_kj, rvec r_kl,
1652                     rvec m, rvec n, rvec4 f[])
1653 {
1654     rvec f_i, f_j, f_k, f_l;
1655     rvec uvec, vvec, svec;
1656     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1657     real a, b, p, q, toler;
1658
1659     iprm  = iprod(m, m);       /*  5    */
1660     iprn  = iprod(n, n);       /*  5    */
1661     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1662     toler = nrkj2*GMX_REAL_EPS;
1663     if ((iprm > toler) && (iprn > toler))
1664     {
1665         nrkj_1 = gmx::invsqrt(nrkj2); /* 10     */
1666         nrkj_2 = nrkj_1*nrkj_1;       /*  1     */
1667         nrkj   = nrkj2*nrkj_1;        /*  1     */
1668         a      = -ddphi*nrkj/iprm;    /* 11     */
1669         svmul(a, m, f_i);             /*  3     */
1670         b     = ddphi*nrkj/iprn;      /* 11     */
1671         svmul(b, n, f_l);             /*  3  */
1672         p     = iprod(r_ij, r_kj);    /*  5     */
1673         p    *= nrkj_2;               /*  1     */
1674         q     = iprod(r_kl, r_kj);    /*  5     */
1675         q    *= nrkj_2;               /*  1     */
1676         svmul(p, f_i, uvec);          /*  3     */
1677         svmul(q, f_l, vvec);          /*  3     */
1678         rvec_sub(uvec, vvec, svec);   /*  3     */
1679         rvec_sub(f_i, svec, f_j);     /*  3     */
1680         rvec_add(f_l, svec, f_k);     /*  3     */
1681         rvec_inc(f[i], f_i);          /*  3     */
1682         rvec_dec(f[j], f_j);          /*  3     */
1683         rvec_dec(f[k], f_k);          /*  3     */
1684         rvec_inc(f[l], f_l);          /*  3     */
1685     }
1686 }
1687
1688 #if GMX_SIMD_HAVE_REAL
1689 /* As do_dih_fup_noshiftf above, but with SIMD and pre-calculated pre-factors */
1690 static inline void gmx_simdcall
1691 do_dih_fup_noshiftf_simd(const int *ai, const int *aj, const int *ak, const int *al,
1692                          SimdReal p, SimdReal q,
1693                          SimdReal f_i_x,  SimdReal f_i_y,  SimdReal f_i_z,
1694                          SimdReal mf_l_x, SimdReal mf_l_y, SimdReal mf_l_z,
1695                          rvec4 f[])
1696 {
1697     SimdReal sx    = p * f_i_x + q * mf_l_x;
1698     SimdReal sy    = p * f_i_y + q * mf_l_y;
1699     SimdReal sz    = p * f_i_z + q * mf_l_z;
1700     SimdReal f_j_x = f_i_x - sx;
1701     SimdReal f_j_y = f_i_y - sy;
1702     SimdReal f_j_z = f_i_z - sz;
1703     SimdReal f_k_x = mf_l_x - sx;
1704     SimdReal f_k_y = mf_l_y - sy;
1705     SimdReal f_k_z = mf_l_z - sz;
1706     transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ai, f_i_x, f_i_y, f_i_z);
1707     transposeScatterDecrU<4>(reinterpret_cast<real *>(f), aj, f_j_x, f_j_y, f_j_z);
1708     transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ak, f_k_x, f_k_y, f_k_z);
1709     transposeScatterDecrU<4>(reinterpret_cast<real *>(f), al, mf_l_x, mf_l_y, mf_l_z);
1710 }
1711 #endif // GMX_SIMD_HAVE_REAL
1712
1713 static real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult,
1714                     real phi, real lambda, real *V, real *F)
1715 {
1716     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1717     real L1   = 1.0 - lambda;
1718     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1719     real dph0 = (phiB - phiA)*DEG2RAD;
1720     real cp   = L1*cpA + lambda*cpB;
1721
1722     mdphi =  mult*phi - ph0;
1723     sdphi = std::sin(mdphi);
1724     ddphi = -cp*mult*sdphi;
1725     v1    = 1.0 + std::cos(mdphi);
1726     v     = cp*v1;
1727
1728     dvdlambda  = (cpB - cpA)*v1 + cp*dph0*sdphi;
1729
1730     *V = v;
1731     *F = ddphi;
1732
1733     return dvdlambda;
1734
1735     /* That was 40 flops */
1736 }
1737
1738 static void
1739 dopdihs_noener(real cpA, real cpB, real phiA, real phiB, int mult,
1740                real phi, real lambda, real *F)
1741 {
1742     real mdphi, sdphi, ddphi;
1743     real L1   = 1.0 - lambda;
1744     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1745     real cp   = L1*cpA + lambda*cpB;
1746
1747     mdphi = mult*phi - ph0;
1748     sdphi = std::sin(mdphi);
1749     ddphi = -cp*mult*sdphi;
1750
1751     *F = ddphi;
1752
1753     /* That was 20 flops */
1754 }
1755
1756 static real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult,
1757                         real phi, real lambda, real *V, real *F)
1758 /* similar to dopdihs, except for a minus sign  *
1759  * and a different treatment of mult/phi0       */
1760 {
1761     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1762     real L1   = 1.0 - lambda;
1763     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1764     real dph0 = (phiB - phiA)*DEG2RAD;
1765     real cp   = L1*cpA + lambda*cpB;
1766
1767     mdphi = mult*(phi-ph0);
1768     sdphi = std::sin(mdphi);
1769     ddphi = cp*mult*sdphi;
1770     v1    = 1.0-std::cos(mdphi);
1771     v     = cp*v1;
1772
1773     dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
1774
1775     *V = v;
1776     *F = ddphi;
1777
1778     return dvdlambda;
1779
1780     /* That was 40 flops */
1781 }
1782
1783 real pdihs(int nbonds,
1784            const t_iatom forceatoms[], const t_iparams forceparams[],
1785            const rvec x[], rvec4 f[], rvec fshift[],
1786            const t_pbc *pbc, const t_graph *g,
1787            real lambda, real *dvdlambda,
1788            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1789            int gmx_unused *global_atom_index)
1790 {
1791     int  i, type, ai, aj, ak, al;
1792     int  t1, t2, t3;
1793     rvec r_ij, r_kj, r_kl, m, n;
1794     real phi, ddphi, vpd, vtot;
1795
1796     vtot = 0.0;
1797
1798     for (i = 0; (i < nbonds); )
1799     {
1800         type = forceatoms[i++];
1801         ai   = forceatoms[i++];
1802         aj   = forceatoms[i++];
1803         ak   = forceatoms[i++];
1804         al   = forceatoms[i++];
1805
1806         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1807                         &t1, &t2, &t3);  /*  84      */
1808         *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
1809                               forceparams[type].pdihs.cpB,
1810                               forceparams[type].pdihs.phiA,
1811                               forceparams[type].pdihs.phiB,
1812                               forceparams[type].pdihs.mult,
1813                               phi, lambda, &vpd, &ddphi);
1814
1815         vtot += vpd;
1816         do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
1817                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
1818
1819     }                                                 /* 223 TOTAL  */
1820
1821     return vtot;
1822 }
1823
1824 void make_dp_periodic(real *dp)  /* 1 flop? */
1825 {
1826     /* dp cannot be outside (-pi,pi) */
1827     if (*dp >= M_PI)
1828     {
1829         *dp -= 2*M_PI;
1830     }
1831     else if (*dp < -M_PI)
1832     {
1833         *dp += 2*M_PI;
1834     }
1835     return;
1836 }
1837
1838 /* As pdihs above, but without calculating energies and shift forces */
1839 void
1840 pdihs_noener(int nbonds,
1841              const t_iatom forceatoms[], const t_iparams forceparams[],
1842              const rvec x[], rvec4 f[],
1843              const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
1844              real lambda,
1845              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1846              int gmx_unused *global_atom_index)
1847 {
1848     int  i, type, ai, aj, ak, al;
1849     int  t1, t2, t3;
1850     rvec r_ij, r_kj, r_kl, m, n;
1851     real phi, ddphi_tot, ddphi;
1852
1853     for (i = 0; (i < nbonds); )
1854     {
1855         ai   = forceatoms[i+1];
1856         aj   = forceatoms[i+2];
1857         ak   = forceatoms[i+3];
1858         al   = forceatoms[i+4];
1859
1860         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1861                         &t1, &t2, &t3);
1862
1863         ddphi_tot = 0;
1864
1865         /* Loop over dihedrals working on the same atoms,
1866          * so we avoid recalculating angles and force distributions.
1867          */
1868         do
1869         {
1870             type = forceatoms[i];
1871             dopdihs_noener(forceparams[type].pdihs.cpA,
1872                            forceparams[type].pdihs.cpB,
1873                            forceparams[type].pdihs.phiA,
1874                            forceparams[type].pdihs.phiB,
1875                            forceparams[type].pdihs.mult,
1876                            phi, lambda, &ddphi);
1877             ddphi_tot += ddphi;
1878
1879             i += 5;
1880         }
1881         while (i < nbonds &&
1882                forceatoms[i+1] == ai &&
1883                forceatoms[i+2] == aj &&
1884                forceatoms[i+3] == ak &&
1885                forceatoms[i+4] == al);
1886
1887         do_dih_fup_noshiftf(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f);
1888     }
1889 }
1890
1891
1892 #if GMX_SIMD_HAVE_REAL
1893
1894 /* As pdihs_noner above, but using SIMD to calculate many dihedrals at once */
1895 void
1896 pdihs_noener_simd(int nbonds,
1897                   const t_iatom forceatoms[], const t_iparams forceparams[],
1898                   const rvec x[], rvec4 f[],
1899                   const t_pbc *pbc, const t_graph gmx_unused *g,
1900                   real gmx_unused lambda,
1901                   const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1902                   int gmx_unused *global_atom_index)
1903 {
1904     const int             nfa1 = 5;
1905     int                   i, iu, s;
1906     int                   type;
1907     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ai[GMX_SIMD_REAL_WIDTH];
1908     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    aj[GMX_SIMD_REAL_WIDTH];
1909     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ak[GMX_SIMD_REAL_WIDTH];
1910     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    al[GMX_SIMD_REAL_WIDTH];
1911     alignas(GMX_SIMD_ALIGNMENT) real            buf[3*GMX_SIMD_REAL_WIDTH];
1912     real                 *cp, *phi0, *mult;
1913     SimdReal              deg2rad_S(DEG2RAD);
1914     SimdReal              p_S, q_S;
1915     SimdReal              phi0_S, phi_S;
1916     SimdReal              mx_S, my_S, mz_S;
1917     SimdReal              nx_S, ny_S, nz_S;
1918     SimdReal              nrkj_m2_S, nrkj_n2_S;
1919     SimdReal              cp_S, mdphi_S, mult_S;
1920     SimdReal              sin_S, cos_S;
1921     SimdReal              mddphi_S;
1922     SimdReal              sf_i_S, msf_l_S;
1923     alignas(GMX_SIMD_ALIGNMENT) real            pbc_simd[9*GMX_SIMD_REAL_WIDTH];
1924
1925     /* Extract aligned pointer for parameters and variables */
1926     cp    = buf + 0*GMX_SIMD_REAL_WIDTH;
1927     phi0  = buf + 1*GMX_SIMD_REAL_WIDTH;
1928     mult  = buf + 2*GMX_SIMD_REAL_WIDTH;
1929
1930     set_pbc_simd(pbc, pbc_simd);
1931
1932     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
1933     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
1934     {
1935         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
1936          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1937          */
1938         iu = i;
1939         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1940         {
1941             type  = forceatoms[iu];
1942             ai[s] = forceatoms[iu+1];
1943             aj[s] = forceatoms[iu+2];
1944             ak[s] = forceatoms[iu+3];
1945             al[s] = forceatoms[iu+4];
1946
1947             /* At the end fill the arrays with the last atoms and 0 params */
1948             if (i + s*nfa1 < nbonds)
1949             {
1950                 cp[s]   = forceparams[type].pdihs.cpA;
1951                 phi0[s] = forceparams[type].pdihs.phiA;
1952                 mult[s] = forceparams[type].pdihs.mult;
1953
1954                 if (iu + nfa1 < nbonds)
1955                 {
1956                     iu += nfa1;
1957                 }
1958             }
1959             else
1960             {
1961                 cp[s]   = 0;
1962                 phi0[s] = 0;
1963                 mult[s] = 0;
1964             }
1965         }
1966
1967         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
1968         dih_angle_simd(x, ai, aj, ak, al, pbc_simd,
1969                        &phi_S,
1970                        &mx_S, &my_S, &mz_S,
1971                        &nx_S, &ny_S, &nz_S,
1972                        &nrkj_m2_S,
1973                        &nrkj_n2_S,
1974                        &p_S, &q_S);
1975
1976         cp_S     = load<SimdReal>(cp);
1977         phi0_S   = load<SimdReal>(phi0) * deg2rad_S;
1978         mult_S   = load<SimdReal>(mult);
1979
1980         mdphi_S  = fms(mult_S, phi_S, phi0_S);
1981
1982         /* Calculate GMX_SIMD_REAL_WIDTH sines at once */
1983         sincos(mdphi_S, &sin_S, &cos_S);
1984         mddphi_S = cp_S * mult_S * sin_S;
1985         sf_i_S   = mddphi_S * nrkj_m2_S;
1986         msf_l_S  = mddphi_S * nrkj_n2_S;
1987
1988         /* After this m?_S will contain f[i] */
1989         mx_S     = sf_i_S * mx_S;
1990         my_S     = sf_i_S * my_S;
1991         mz_S     = sf_i_S * mz_S;
1992
1993         /* After this m?_S will contain -f[l] */
1994         nx_S     = msf_l_S * nx_S;
1995         ny_S     = msf_l_S * ny_S;
1996         nz_S     = msf_l_S * nz_S;
1997
1998         do_dih_fup_noshiftf_simd(ai, aj, ak, al,
1999                                  p_S, q_S,
2000                                  mx_S, my_S, mz_S,
2001                                  nx_S, ny_S, nz_S,
2002                                  f);
2003     }
2004 }
2005
2006 /* This is mostly a copy of pdihs_noener_simd above, but with using
2007  * the RB potential instead of a harmonic potential.
2008  * This function can replace rbdihs() when no energy and virial are needed.
2009  */
2010 void
2011 rbdihs_noener_simd(int nbonds,
2012                    const t_iatom forceatoms[], const t_iparams forceparams[],
2013                    const rvec x[], rvec4 f[],
2014                    const t_pbc *pbc, const t_graph gmx_unused *g,
2015                    real gmx_unused lambda,
2016                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2017                    int gmx_unused *global_atom_index)
2018 {
2019     const int             nfa1 = 5;
2020     int                   i, iu, s, j;
2021     int                   type;
2022     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  ai[GMX_SIMD_REAL_WIDTH];
2023     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  aj[GMX_SIMD_REAL_WIDTH];
2024     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  ak[GMX_SIMD_REAL_WIDTH];
2025     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  al[GMX_SIMD_REAL_WIDTH];
2026     alignas(GMX_SIMD_ALIGNMENT) real          parm[NR_RBDIHS*GMX_SIMD_REAL_WIDTH];
2027
2028     SimdReal              p_S, q_S;
2029     SimdReal              phi_S;
2030     SimdReal              ddphi_S, cosfac_S;
2031     SimdReal              mx_S, my_S, mz_S;
2032     SimdReal              nx_S, ny_S, nz_S;
2033     SimdReal              nrkj_m2_S, nrkj_n2_S;
2034     SimdReal              parm_S, c_S;
2035     SimdReal              sin_S, cos_S;
2036     SimdReal              sf_i_S, msf_l_S;
2037     alignas(GMX_SIMD_ALIGNMENT) real          pbc_simd[9*GMX_SIMD_REAL_WIDTH];
2038
2039     SimdReal              pi_S(M_PI);
2040     SimdReal              one_S(1.0);
2041
2042     set_pbc_simd(pbc, pbc_simd);
2043
2044     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2045     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
2046     {
2047         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2048          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2049          */
2050         iu = i;
2051         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2052         {
2053             type  = forceatoms[iu];
2054             ai[s] = forceatoms[iu+1];
2055             aj[s] = forceatoms[iu+2];
2056             ak[s] = forceatoms[iu+3];
2057             al[s] = forceatoms[iu+4];
2058
2059             /* At the end fill the arrays with the last atoms and 0 params */
2060             if (i + s*nfa1 < nbonds)
2061             {
2062                 /* We don't need the first parameter, since that's a constant
2063                  * which only affects the energies, not the forces.
2064                  */
2065                 for (j = 1; j < NR_RBDIHS; j++)
2066                 {
2067                     parm[j*GMX_SIMD_REAL_WIDTH + s] =
2068                         forceparams[type].rbdihs.rbcA[j];
2069                 }
2070
2071                 if (iu + nfa1 < nbonds)
2072                 {
2073                     iu += nfa1;
2074                 }
2075             }
2076             else
2077             {
2078                 for (j = 1; j < NR_RBDIHS; j++)
2079                 {
2080                     parm[j*GMX_SIMD_REAL_WIDTH + s] = 0;
2081                 }
2082             }
2083         }
2084
2085         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2086         dih_angle_simd(x, ai, aj, ak, al, pbc_simd,
2087                        &phi_S,
2088                        &mx_S, &my_S, &mz_S,
2089                        &nx_S, &ny_S, &nz_S,
2090                        &nrkj_m2_S,
2091                        &nrkj_n2_S,
2092                        &p_S, &q_S);
2093
2094         /* Change to polymer convention */
2095         phi_S = phi_S - pi_S;
2096
2097         sincos(phi_S, &sin_S, &cos_S);
2098
2099         ddphi_S   = setZero();
2100         c_S       = one_S;
2101         cosfac_S  = one_S;
2102         for (j = 1; j < NR_RBDIHS; j++)
2103         {
2104             parm_S   = load<SimdReal>(parm + j*GMX_SIMD_REAL_WIDTH);
2105             ddphi_S  = fma(c_S * parm_S, cosfac_S, ddphi_S);
2106             cosfac_S = cosfac_S * cos_S;
2107             c_S      = c_S + one_S;
2108         }
2109
2110         /* Note that here we do not use the minus sign which is present
2111          * in the normal RB code. This is corrected for through (m)sf below.
2112          */
2113         ddphi_S  = ddphi_S * sin_S;
2114
2115         sf_i_S   = ddphi_S * nrkj_m2_S;
2116         msf_l_S  = ddphi_S * nrkj_n2_S;
2117
2118         /* After this m?_S will contain f[i] */
2119         mx_S     = sf_i_S * mx_S;
2120         my_S     = sf_i_S * my_S;
2121         mz_S     = sf_i_S * mz_S;
2122
2123         /* After this m?_S will contain -f[l] */
2124         nx_S     = msf_l_S * nx_S;
2125         ny_S     = msf_l_S * ny_S;
2126         nz_S     = msf_l_S * nz_S;
2127
2128         do_dih_fup_noshiftf_simd(ai, aj, ak, al,
2129                                  p_S, q_S,
2130                                  mx_S, my_S, mz_S,
2131                                  nx_S, ny_S, nz_S,
2132                                  f);
2133     }
2134 }
2135
2136 #endif // GMX_SIMD_HAVE_REAL
2137
2138
2139 real idihs(int nbonds,
2140            const t_iatom forceatoms[], const t_iparams forceparams[],
2141            const rvec x[], rvec4 f[], rvec fshift[],
2142            const t_pbc *pbc, const t_graph *g,
2143            real lambda, real *dvdlambda,
2144            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2145            int gmx_unused *global_atom_index)
2146 {
2147     int  i, type, ai, aj, ak, al;
2148     int  t1, t2, t3;
2149     real phi, phi0, dphi0, ddphi, vtot;
2150     rvec r_ij, r_kj, r_kl, m, n;
2151     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2152
2153     L1        = 1.0-lambda;
2154     dvdl_term = 0;
2155     vtot      = 0.0;
2156     for (i = 0; (i < nbonds); )
2157     {
2158         type = forceatoms[i++];
2159         ai   = forceatoms[i++];
2160         aj   = forceatoms[i++];
2161         ak   = forceatoms[i++];
2162         al   = forceatoms[i++];
2163
2164         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2165                         &t1, &t2, &t3);  /*  84         */
2166
2167         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2168          * force changes if we just apply a normal harmonic.
2169          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2170          * This means we will never have the periodicity problem, unless
2171          * the dihedral is Pi away from phiO, which is very unlikely due to
2172          * the potential.
2173          */
2174         kA = forceparams[type].harmonic.krA;
2175         kB = forceparams[type].harmonic.krB;
2176         pA = forceparams[type].harmonic.rA;
2177         pB = forceparams[type].harmonic.rB;
2178
2179         kk    = L1*kA + lambda*kB;
2180         phi0  = (L1*pA + lambda*pB)*DEG2RAD;
2181         dphi0 = (pB - pA)*DEG2RAD;
2182
2183         dp = phi-phi0;
2184
2185         make_dp_periodic(&dp);
2186
2187         dp2 = dp*dp;
2188
2189         vtot += 0.5*kk*dp2;
2190         ddphi = -kk*dp;
2191
2192         dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
2193
2194         do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
2195                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2196         /* 218 TOTAL    */
2197     }
2198
2199     *dvdlambda += dvdl_term;
2200     return vtot;
2201 }
2202
2203 static real low_angres(int nbonds,
2204                        const t_iatom forceatoms[], const t_iparams forceparams[],
2205                        const rvec x[], rvec4 f[], rvec fshift[],
2206                        const t_pbc *pbc, const t_graph *g,
2207                        real lambda, real *dvdlambda,
2208                        gmx_bool bZAxis)
2209 {
2210     int  i, m, type, ai, aj, ak, al;
2211     int  t1, t2;
2212     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2213     rvec r_ij, r_kl, f_i, f_k = {0, 0, 0};
2214     real st, sth, nrij2, nrkl2, c, cij, ckl;
2215
2216     ivec dt;
2217     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2218
2219     vtot = 0.0;
2220     ak   = al = 0; /* to avoid warnings */
2221     for (i = 0; i < nbonds; )
2222     {
2223         type = forceatoms[i++];
2224         ai   = forceatoms[i++];
2225         aj   = forceatoms[i++];
2226         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij);       /*  3               */
2227         if (!bZAxis)
2228         {
2229             ak   = forceatoms[i++];
2230             al   = forceatoms[i++];
2231             t2   = pbc_rvec_sub(pbc, x[al], x[ak], r_kl);  /*  3                */
2232         }
2233         else
2234         {
2235             r_kl[XX] = 0;
2236             r_kl[YY] = 0;
2237             r_kl[ZZ] = 1;
2238         }
2239
2240         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2241         phi     = std::acos(cos_phi);    /* 10           */
2242
2243         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
2244                                   forceparams[type].pdihs.cpB,
2245                                   forceparams[type].pdihs.phiA,
2246                                   forceparams[type].pdihs.phiB,
2247                                   forceparams[type].pdihs.mult,
2248                                   phi, lambda, &vid, &dVdphi); /*  40  */
2249
2250         vtot += vid;
2251
2252         cos_phi2 = gmx::square(cos_phi);                /*   1          */
2253         if (cos_phi2 < 1)
2254         {
2255             st    = -dVdphi*gmx::invsqrt(1 - cos_phi2); /*  12          */
2256             sth   = st*cos_phi;                         /*   1          */
2257             nrij2 = iprod(r_ij, r_ij);                  /*   5          */
2258             nrkl2 = iprod(r_kl, r_kl);                  /*   5          */
2259
2260             c   = st*gmx::invsqrt(nrij2*nrkl2);         /*  11          */
2261             cij = sth/nrij2;                            /*  10          */
2262             ckl = sth/nrkl2;                            /*  10          */
2263
2264             for (m = 0; m < DIM; m++)                   /*  18+18       */
2265             {
2266                 f_i[m]    = (c*r_kl[m]-cij*r_ij[m]);
2267                 f[ai][m] += f_i[m];
2268                 f[aj][m] -= f_i[m];
2269                 if (!bZAxis)
2270                 {
2271                     f_k[m]    = (c*r_ij[m]-ckl*r_kl[m]);
2272                     f[ak][m] += f_k[m];
2273                     f[al][m] -= f_k[m];
2274                 }
2275             }
2276
2277             if (g)
2278             {
2279                 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
2280                 t1 = IVEC2IS(dt);
2281             }
2282             rvec_inc(fshift[t1], f_i);
2283             rvec_dec(fshift[CENTRAL], f_i);
2284             if (!bZAxis)
2285             {
2286                 if (g)
2287                 {
2288                     ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, al), dt);
2289                     t2 = IVEC2IS(dt);
2290                 }
2291                 rvec_inc(fshift[t2], f_k);
2292                 rvec_dec(fshift[CENTRAL], f_k);
2293             }
2294         }
2295     }
2296
2297     return vtot; /*  184 / 157 (bZAxis)  total  */
2298 }
2299
2300 real angres(int nbonds,
2301             const t_iatom forceatoms[], const t_iparams forceparams[],
2302             const rvec x[], rvec4 f[], rvec fshift[],
2303             const t_pbc *pbc, const t_graph *g,
2304             real lambda, real *dvdlambda,
2305             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2306             int gmx_unused *global_atom_index)
2307 {
2308     return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2309                       lambda, dvdlambda, FALSE);
2310 }
2311
2312 real angresz(int nbonds,
2313              const t_iatom forceatoms[], const t_iparams forceparams[],
2314              const rvec x[], rvec4 f[], rvec fshift[],
2315              const t_pbc *pbc, const t_graph *g,
2316              real lambda, real *dvdlambda,
2317              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2318              int gmx_unused *global_atom_index)
2319 {
2320     return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2321                       lambda, dvdlambda, TRUE);
2322 }
2323
2324 real dihres(int nbonds,
2325             const t_iatom forceatoms[], const t_iparams forceparams[],
2326             const rvec x[], rvec4 f[], rvec fshift[],
2327             const t_pbc *pbc, const t_graph *g,
2328             real lambda, real *dvdlambda,
2329             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2330             int gmx_unused  *global_atom_index)
2331 {
2332     real vtot = 0;
2333     int  ai, aj, ak, al, i, type, t1, t2, t3;
2334     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2335     real phi, ddphi, ddp, ddp2, dp, d2r, L1;
2336     rvec r_ij, r_kj, r_kl, m, n;
2337
2338     L1 = 1.0-lambda;
2339
2340     d2r = DEG2RAD;
2341
2342     for (i = 0; (i < nbonds); )
2343     {
2344         type = forceatoms[i++];
2345         ai   = forceatoms[i++];
2346         aj   = forceatoms[i++];
2347         ak   = forceatoms[i++];
2348         al   = forceatoms[i++];
2349
2350         phi0A  = forceparams[type].dihres.phiA*d2r;
2351         dphiA  = forceparams[type].dihres.dphiA*d2r;
2352         kfacA  = forceparams[type].dihres.kfacA;
2353
2354         phi0B  = forceparams[type].dihres.phiB*d2r;
2355         dphiB  = forceparams[type].dihres.dphiB*d2r;
2356         kfacB  = forceparams[type].dihres.kfacB;
2357
2358         phi0  = L1*phi0A + lambda*phi0B;
2359         dphi  = L1*dphiA + lambda*dphiB;
2360         kfac  = L1*kfacA + lambda*kfacB;
2361
2362         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2363                         &t1, &t2, &t3);
2364         /* 84 flops */
2365
2366         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2367          * force changes if we just apply a normal harmonic.
2368          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2369          * This means we will never have the periodicity problem, unless
2370          * the dihedral is Pi away from phiO, which is very unlikely due to
2371          * the potential.
2372          */
2373         dp = phi-phi0;
2374         make_dp_periodic(&dp);
2375
2376         if (dp > dphi)
2377         {
2378             ddp = dp-dphi;
2379         }
2380         else if (dp < -dphi)
2381         {
2382             ddp = dp+dphi;
2383         }
2384         else
2385         {
2386             ddp = 0;
2387         }
2388
2389         if (ddp != 0.0)
2390         {
2391             ddp2  = ddp*ddp;
2392             vtot += 0.5*kfac*ddp2;
2393             ddphi = kfac*ddp;
2394
2395             *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
2396             /* lambda dependence from changing restraint distances */
2397             if (ddp > 0)
2398             {
2399                 *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
2400             }
2401             else if (ddp < 0)
2402             {
2403                 *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
2404             }
2405             do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2406                        f, fshift, pbc, g, x, t1, t2, t3);      /* 112           */
2407         }
2408     }
2409     return vtot;
2410 }
2411
2412
2413 real unimplemented(int gmx_unused nbonds,
2414                    const t_iatom gmx_unused forceatoms[], const t_iparams gmx_unused forceparams[],
2415                    const rvec gmx_unused x[], rvec4 gmx_unused f[], rvec gmx_unused fshift[],
2416                    const t_pbc gmx_unused *pbc, const t_graph  gmx_unused *g,
2417                    real gmx_unused lambda, real gmx_unused *dvdlambda,
2418                    const t_mdatoms  gmx_unused *md, t_fcdata gmx_unused *fcd,
2419                    int gmx_unused *global_atom_index)
2420 {
2421     gmx_impl("*** you are using a not implemented function");
2422
2423     return 0.0; /* To make the compiler happy */
2424 }
2425
2426 real restrangles(int nbonds,
2427                  const t_iatom forceatoms[], const t_iparams forceparams[],
2428                  const rvec x[], rvec4 f[], rvec fshift[],
2429                  const t_pbc *pbc, const t_graph *g,
2430                  real gmx_unused lambda, real gmx_unused *dvdlambda,
2431                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2432                  int gmx_unused *global_atom_index)
2433 {
2434     int  i, d, ai, aj, ak, type, m;
2435     int  t1, t2;
2436     real v, vtot;
2437     ivec jt, dt_ij, dt_kj;
2438     rvec f_i, f_j, f_k;
2439     real prefactor, ratio_ante, ratio_post;
2440     rvec delta_ante, delta_post, vec_temp;
2441
2442     vtot = 0.0;
2443     for (i = 0; (i < nbonds); )
2444     {
2445         type = forceatoms[i++];
2446         ai   = forceatoms[i++];
2447         aj   = forceatoms[i++];
2448         ak   = forceatoms[i++];
2449
2450         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2451         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2452         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_post);
2453
2454
2455         /* This function computes factors needed for restricted angle potential.
2456          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2457          * when three particles align and the dihedral angle and dihedral potential
2458          * cannot be calculated. This potential is calculated using the formula:
2459            real restrangles(int nbonds,
2460             const t_iatom forceatoms[],const t_iparams forceparams[],
2461             const rvec x[],rvec4 f[],rvec fshift[],
2462             const t_pbc *pbc,const t_graph *g,
2463             real gmx_unused lambda,real gmx_unused *dvdlambda,
2464             const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
2465             int gmx_unused *global_atom_index)
2466            {
2467            int  i, d, ai, aj, ak, type, m;
2468            int t1, t2;
2469            rvec r_ij,r_kj;
2470            real v, vtot;
2471            ivec jt,dt_ij,dt_kj;
2472            rvec f_i, f_j, f_k;
2473            real prefactor, ratio_ante, ratio_post;
2474            rvec delta_ante, delta_post, vec_temp;
2475
2476            vtot = 0.0;
2477            for(i=0; (i<nbonds); )
2478            {
2479            type = forceatoms[i++];
2480            ai   = forceatoms[i++];
2481            aj   = forceatoms[i++];
2482            ak   = forceatoms[i++];
2483
2484          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta} \frac{(\cos\theta_i - \cos\theta_0)^2}
2485          * {\sin^2\theta_i}\f] ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2486          * For more explanations see comments file "restcbt.h". */
2487
2488         compute_factors_restangles(type, forceparams,  delta_ante, delta_post,
2489                                    &prefactor, &ratio_ante, &ratio_post, &v);
2490
2491         /*   Forces are computed per component */
2492         for (d = 0; d < DIM; d++)
2493         {
2494             f_i[d] = prefactor * (ratio_ante * delta_ante[d] - delta_post[d]);
2495             f_j[d] = prefactor * ((ratio_post + 1.0) * delta_post[d] - (ratio_ante + 1.0) * delta_ante[d]);
2496             f_k[d] = prefactor * (delta_ante[d] - ratio_post * delta_post[d]);
2497         }
2498
2499         /*   Computation of potential energy   */
2500
2501         vtot += v;
2502
2503         /*   Update forces */
2504
2505         for (m = 0; (m < DIM); m++)
2506         {
2507             f[ai][m] += f_i[m];
2508             f[aj][m] += f_j[m];
2509             f[ak][m] += f_k[m];
2510         }
2511
2512         if (g)
2513         {
2514             copy_ivec(SHIFT_IVEC(g, aj), jt);
2515             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2516             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2517             t1 = IVEC2IS(dt_ij);
2518             t2 = IVEC2IS(dt_kj);
2519         }
2520
2521         rvec_inc(fshift[t1], f_i);
2522         rvec_inc(fshift[CENTRAL], f_j);
2523         rvec_inc(fshift[t2], f_k);
2524     }
2525     return vtot;
2526 }
2527
2528
2529 real restrdihs(int nbonds,
2530                const t_iatom forceatoms[], const t_iparams forceparams[],
2531                const rvec x[], rvec4 f[], rvec fshift[],
2532                const t_pbc *pbc, const t_graph *g,
2533                real gmx_unused lambda, real gmx_unused *dvlambda,
2534                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2535                int gmx_unused *global_atom_index)
2536 {
2537     int  i, d, type, ai, aj, ak, al;
2538     rvec f_i, f_j, f_k, f_l;
2539     rvec dx_jl;
2540     ivec jt, dt_ij, dt_kj, dt_lj;
2541     int  t1, t2, t3;
2542     real v, vtot;
2543     rvec delta_ante,  delta_crnt, delta_post, vec_temp;
2544     real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post;
2545     real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post;
2546     real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post;
2547     real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post;
2548     real prefactor_phi;
2549
2550
2551     vtot = 0.0;
2552     for (i = 0; (i < nbonds); )
2553     {
2554         type = forceatoms[i++];
2555         ai   = forceatoms[i++];
2556         aj   = forceatoms[i++];
2557         ak   = forceatoms[i++];
2558         al   = forceatoms[i++];
2559
2560         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2561         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2562         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2563         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2564         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2565
2566         /* This function computes factors needed for restricted angle potential.
2567          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2568          * when three particles align and the dihedral angle and dihedral potential cannot be calculated.
2569          * This potential is calculated using the formula:
2570          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta}
2571          * \frac{(\cos\theta_i - \cos\theta_0)^2}{\sin^2\theta_i}\f]
2572          * ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2573          * For more explanations see comments file "restcbt.h" */
2574
2575         compute_factors_restrdihs(type, forceparams,
2576                                   delta_ante, delta_crnt, delta_post,
2577                                   &factor_phi_ai_ante, &factor_phi_ai_crnt, &factor_phi_ai_post,
2578                                   &factor_phi_aj_ante, &factor_phi_aj_crnt, &factor_phi_aj_post,
2579                                   &factor_phi_ak_ante, &factor_phi_ak_crnt, &factor_phi_ak_post,
2580                                   &factor_phi_al_ante, &factor_phi_al_crnt, &factor_phi_al_post,
2581                                   &prefactor_phi, &v);
2582
2583
2584         /*      Computation of forces per component */
2585         for (d = 0; d < DIM; d++)
2586         {
2587             f_i[d] = prefactor_phi * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d] + factor_phi_ai_post * delta_post[d]);
2588             f_j[d] = prefactor_phi * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d] + factor_phi_aj_post * delta_post[d]);
2589             f_k[d] = prefactor_phi * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d] + factor_phi_ak_post * delta_post[d]);
2590             f_l[d] = prefactor_phi * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d] + factor_phi_al_post * delta_post[d]);
2591         }
2592         /*      Computation of the energy */
2593
2594         vtot += v;
2595
2596
2597
2598         /*    Updating the forces */
2599
2600         rvec_inc(f[ai], f_i);
2601         rvec_inc(f[aj], f_j);
2602         rvec_inc(f[ak], f_k);
2603         rvec_inc(f[al], f_l);
2604
2605
2606         /* Updating the fshift forces for the pressure coupling */
2607         if (g)
2608         {
2609             copy_ivec(SHIFT_IVEC(g, aj), jt);
2610             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2611             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2612             ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2613             t1 = IVEC2IS(dt_ij);
2614             t2 = IVEC2IS(dt_kj);
2615             t3 = IVEC2IS(dt_lj);
2616         }
2617         else if (pbc)
2618         {
2619             t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2620         }
2621         else
2622         {
2623             t3 = CENTRAL;
2624         }
2625
2626         rvec_inc(fshift[t1], f_i);
2627         rvec_inc(fshift[CENTRAL], f_j);
2628         rvec_inc(fshift[t2], f_k);
2629         rvec_inc(fshift[t3], f_l);
2630
2631     }
2632
2633     return vtot;
2634 }
2635
2636
2637 real cbtdihs(int nbonds,
2638              const t_iatom forceatoms[], const t_iparams forceparams[],
2639              const rvec x[], rvec4 f[], rvec fshift[],
2640              const t_pbc *pbc, const t_graph *g,
2641              real gmx_unused lambda, real gmx_unused *dvdlambda,
2642              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2643              int gmx_unused *global_atom_index)
2644 {
2645     int  type, ai, aj, ak, al, i, d;
2646     int  t1, t2, t3;
2647     real v, vtot;
2648     rvec vec_temp;
2649     rvec f_i, f_j, f_k, f_l;
2650     ivec jt, dt_ij, dt_kj, dt_lj;
2651     rvec dx_jl;
2652     rvec delta_ante, delta_crnt, delta_post;
2653     rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
2654     rvec f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak;
2655     rvec f_theta_post_aj, f_theta_post_ak, f_theta_post_al;
2656
2657
2658
2659
2660     vtot = 0.0;
2661     for (i = 0; (i < nbonds); )
2662     {
2663         type = forceatoms[i++];
2664         ai   = forceatoms[i++];
2665         aj   = forceatoms[i++];
2666         ak   = forceatoms[i++];
2667         al   = forceatoms[i++];
2668
2669
2670         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2671         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2672         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
2673         pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2674         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2675         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2676
2677         /* \brief Compute factors for CBT potential
2678          * The combined bending-torsion potential goes to zero in a very smooth manner, eliminating the numerical
2679          * instabilities, when three coarse-grained particles align and the dihedral angle and standard
2680          * dihedral potentials cannot be calculated. The CBT potential is calculated using the formula:
2681          * \f[V_{\rm CBT}(\theta_{i-1}, \theta_i, \phi_i) = k_{\phi} \sin^3\theta_{i-1} \sin^3\theta_{i}
2682          * \sum_{n=0}^4 { a_n \cos^n\phi_i}\f] ({eq:CBT} and ref \cite{MonicaGoga2013} from the manual).
2683          * It contains in its expression not only the dihedral angle \f$\phi\f$
2684          * but also \f[\theta_{i-1}\f] (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)
2685          * --- the adjacent bending angles.
2686          * For more explanations see comments file "restcbt.h". */
2687
2688         compute_factors_cbtdihs(type, forceparams, delta_ante, delta_crnt, delta_post,
2689                                 f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al,
2690                                 f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak,
2691                                 f_theta_post_aj, f_theta_post_ak, f_theta_post_al,
2692                                 &v);
2693
2694
2695         /*      Acumulate the resuts per beads */
2696         for (d = 0; d < DIM; d++)
2697         {
2698             f_i[d] = f_phi_ai[d] + f_theta_ante_ai[d];
2699             f_j[d] = f_phi_aj[d] + f_theta_ante_aj[d] + f_theta_post_aj[d];
2700             f_k[d] = f_phi_ak[d] + f_theta_ante_ak[d] + f_theta_post_ak[d];
2701             f_l[d] = f_phi_al[d] + f_theta_post_al[d];
2702         }
2703
2704         /*      Compute the potential energy */
2705
2706         vtot += v;
2707
2708
2709         /*  Updating the forces */
2710         rvec_inc(f[ai], f_i);
2711         rvec_inc(f[aj], f_j);
2712         rvec_inc(f[ak], f_k);
2713         rvec_inc(f[al], f_l);
2714
2715
2716         /* Updating the fshift forces for the pressure coupling */
2717         if (g)
2718         {
2719             copy_ivec(SHIFT_IVEC(g, aj), jt);
2720             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2721             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2722             ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2723             t1 = IVEC2IS(dt_ij);
2724             t2 = IVEC2IS(dt_kj);
2725             t3 = IVEC2IS(dt_lj);
2726         }
2727         else if (pbc)
2728         {
2729             t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2730         }
2731         else
2732         {
2733             t3 = CENTRAL;
2734         }
2735
2736         rvec_inc(fshift[t1], f_i);
2737         rvec_inc(fshift[CENTRAL], f_j);
2738         rvec_inc(fshift[t2], f_k);
2739         rvec_inc(fshift[t3], f_l);
2740     }
2741
2742     return vtot;
2743 }
2744
2745 real rbdihs(int nbonds,
2746             const t_iatom forceatoms[], const t_iparams forceparams[],
2747             const rvec x[], rvec4 f[], rvec fshift[],
2748             const t_pbc *pbc, const t_graph *g,
2749             real lambda, real *dvdlambda,
2750             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2751             int gmx_unused *global_atom_index)
2752 {
2753     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2754     int        type, ai, aj, ak, al, i, j;
2755     int        t1, t2, t3;
2756     rvec       r_ij, r_kj, r_kl, m, n;
2757     real       parmA[NR_RBDIHS];
2758     real       parmB[NR_RBDIHS];
2759     real       parm[NR_RBDIHS];
2760     real       cos_phi, phi, rbp, rbpBA;
2761     real       v, ddphi, sin_phi;
2762     real       cosfac, vtot;
2763     real       L1        = 1.0-lambda;
2764     real       dvdl_term = 0;
2765
2766     vtot = 0.0;
2767     for (i = 0; (i < nbonds); )
2768     {
2769         type = forceatoms[i++];
2770         ai   = forceatoms[i++];
2771         aj   = forceatoms[i++];
2772         ak   = forceatoms[i++];
2773         al   = forceatoms[i++];
2774
2775         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2776                         &t1, &t2, &t3);  /*  84         */
2777
2778         /* Change to polymer convention */
2779         if (phi < c0)
2780         {
2781             phi += M_PI;
2782         }
2783         else
2784         {
2785             phi -= M_PI;    /*   1              */
2786
2787         }
2788         cos_phi = std::cos(phi);
2789         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2790         sin_phi = std::sin(phi);
2791
2792         for (j = 0; (j < NR_RBDIHS); j++)
2793         {
2794             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2795             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2796             parm[j]  = L1*parmA[j]+lambda*parmB[j];
2797         }
2798         /* Calculate cosine powers */
2799         /* Calculate the energy */
2800         /* Calculate the derivative */
2801
2802         v            = parm[0];
2803         dvdl_term   += (parmB[0]-parmA[0]);
2804         ddphi        = c0;
2805         cosfac       = c1;
2806
2807         rbp          = parm[1];
2808         rbpBA        = parmB[1]-parmA[1];
2809         ddphi       += rbp*cosfac;
2810         cosfac      *= cos_phi;
2811         v           += cosfac*rbp;
2812         dvdl_term   += cosfac*rbpBA;
2813         rbp          = parm[2];
2814         rbpBA        = parmB[2]-parmA[2];
2815         ddphi       += c2*rbp*cosfac;
2816         cosfac      *= cos_phi;
2817         v           += cosfac*rbp;
2818         dvdl_term   += cosfac*rbpBA;
2819         rbp          = parm[3];
2820         rbpBA        = parmB[3]-parmA[3];
2821         ddphi       += c3*rbp*cosfac;
2822         cosfac      *= cos_phi;
2823         v           += cosfac*rbp;
2824         dvdl_term   += cosfac*rbpBA;
2825         rbp          = parm[4];
2826         rbpBA        = parmB[4]-parmA[4];
2827         ddphi       += c4*rbp*cosfac;
2828         cosfac      *= cos_phi;
2829         v           += cosfac*rbp;
2830         dvdl_term   += cosfac*rbpBA;
2831         rbp          = parm[5];
2832         rbpBA        = parmB[5]-parmA[5];
2833         ddphi       += c5*rbp*cosfac;
2834         cosfac      *= cos_phi;
2835         v           += cosfac*rbp;
2836         dvdl_term   += cosfac*rbpBA;
2837
2838         ddphi = -ddphi*sin_phi;         /*  11          */
2839
2840         do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2841                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2842         vtot += v;
2843     }
2844     *dvdlambda += dvdl_term;
2845
2846     return vtot;
2847 }
2848
2849 //! \endcond
2850
2851 /*! \brief Mysterious undocumented function */
2852 static int
2853 cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
2854 {
2855     int im1, ip1, ip2;
2856
2857     if (ip < 0)
2858     {
2859         ip = ip + grid_spacing - 1;
2860     }
2861     else if (ip > grid_spacing)
2862     {
2863         ip = ip - grid_spacing - 1;
2864     }
2865
2866     im1 = ip - 1;
2867     ip1 = ip + 1;
2868     ip2 = ip + 2;
2869
2870     if (ip == 0)
2871     {
2872         im1 = grid_spacing - 1;
2873     }
2874     else if (ip == grid_spacing-2)
2875     {
2876         ip2 = 0;
2877     }
2878     else if (ip == grid_spacing-1)
2879     {
2880         ip1 = 0;
2881         ip2 = 1;
2882     }
2883
2884     *ipm1 = im1;
2885     *ipp1 = ip1;
2886     *ipp2 = ip2;
2887
2888     return ip;
2889
2890 }
2891
2892 real
2893 cmap_dihs(int nbonds,
2894           const t_iatom forceatoms[], const t_iparams forceparams[],
2895           const gmx_cmap_t *cmap_grid,
2896           const rvec x[], rvec4 f[], rvec fshift[],
2897           const struct t_pbc *pbc, const struct t_graph *g,
2898           real gmx_unused lambda, real gmx_unused *dvdlambda,
2899           const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2900           int  gmx_unused *global_atom_index)
2901 {
2902     int         i, n;
2903     int         ai, aj, ak, al, am;
2904     int         a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
2905     int         type, cmapA;
2906     int         t11, t21, t31, t12, t22, t32;
2907     int         iphi1, ip1m1, ip1p1, ip1p2;
2908     int         iphi2, ip2m1, ip2p1, ip2p2;
2909     int         l1, l2, l3;
2910     int         pos1, pos2, pos3, pos4;
2911
2912     real        ty[4], ty1[4], ty2[4], ty12[4], tx[16];
2913     real        phi1, cos_phi1, sin_phi1, xphi1;
2914     real        phi2, cos_phi2, sin_phi2, xphi2;
2915     real        dx, tt, tu, e, df1, df2, vtot;
2916     real        ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
2917     real        ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
2918     real        fg1, hg1, fga1, hgb1, gaa1, gbb1;
2919     real        fg2, hg2, fga2, hgb2, gaa2, gbb2;
2920     real        fac;
2921
2922     rvec        r1_ij, r1_kj, r1_kl, m1, n1;
2923     rvec        r2_ij, r2_kj, r2_kl, m2, n2;
2924     rvec        f1_i, f1_j, f1_k, f1_l;
2925     rvec        f2_i, f2_j, f2_k, f2_l;
2926     rvec        a1, b1, a2, b2;
2927     rvec        f1, g1, h1, f2, g2, h2;
2928     rvec        dtf1, dtg1, dth1, dtf2, dtg2, dth2;
2929     ivec        jt1, dt1_ij, dt1_kj, dt1_lj;
2930     ivec        jt2, dt2_ij, dt2_kj, dt2_lj;
2931
2932     const real *cmapd;
2933
2934     int         loop_index[4][4] = {
2935         {0, 4, 8, 12},
2936         {1, 5, 9, 13},
2937         {2, 6, 10, 14},
2938         {3, 7, 11, 15}
2939     };
2940
2941     /* Total CMAP energy */
2942     vtot = 0;
2943
2944     for (n = 0; n < nbonds; )
2945     {
2946         /* Five atoms are involved in the two torsions */
2947         type   = forceatoms[n++];
2948         ai     = forceatoms[n++];
2949         aj     = forceatoms[n++];
2950         ak     = forceatoms[n++];
2951         al     = forceatoms[n++];
2952         am     = forceatoms[n++];
2953
2954         /* Which CMAP type is this */
2955         cmapA = forceparams[type].cmap.cmapA;
2956         cmapd = cmap_grid->cmapdata[cmapA].cmap;
2957
2958         /* First torsion */
2959         a1i   = ai;
2960         a1j   = aj;
2961         a1k   = ak;
2962         a1l   = al;
2963
2964         phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
2965                           &t11, &t21, &t31);  /* 84 */
2966
2967         cos_phi1 = std::cos(phi1);
2968
2969         a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
2970         a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
2971         a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
2972
2973         b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
2974         b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
2975         b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
2976
2977         pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
2978
2979         ra21  = iprod(a1, a1);       /* 5 */
2980         rb21  = iprod(b1, b1);       /* 5 */
2981         rg21  = iprod(r1_kj, r1_kj); /* 5 */
2982         rg1   = sqrt(rg21);
2983
2984         rgr1  = 1.0/rg1;
2985         ra2r1 = 1.0/ra21;
2986         rb2r1 = 1.0/rb21;
2987         rabr1 = sqrt(ra2r1*rb2r1);
2988
2989         sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
2990
2991         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
2992         {
2993             phi1 = std::asin(sin_phi1);
2994
2995             if (cos_phi1 < 0)
2996             {
2997                 if (phi1 > 0)
2998                 {
2999                     phi1 = M_PI - phi1;
3000                 }
3001                 else
3002                 {
3003                     phi1 = -M_PI - phi1;
3004                 }
3005             }
3006         }
3007         else
3008         {
3009             phi1 = std::acos(cos_phi1);
3010
3011             if (sin_phi1 < 0)
3012             {
3013                 phi1 = -phi1;
3014             }
3015         }
3016
3017         xphi1 = phi1 + M_PI; /* 1 */
3018
3019         /* Second torsion */
3020         a2i   = aj;
3021         a2j   = ak;
3022         a2k   = al;
3023         a2l   = am;
3024
3025         phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
3026                           &t12, &t22, &t32); /* 84 */
3027
3028         cos_phi2 = std::cos(phi2);
3029
3030         a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
3031         a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
3032         a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
3033
3034         b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
3035         b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
3036         b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
3037
3038         pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
3039
3040         ra22  = iprod(a2, a2);         /* 5 */
3041         rb22  = iprod(b2, b2);         /* 5 */
3042         rg22  = iprod(r2_kj, r2_kj);   /* 5 */
3043         rg2   = sqrt(rg22);
3044
3045         rgr2  = 1.0/rg2;
3046         ra2r2 = 1.0/ra22;
3047         rb2r2 = 1.0/rb22;
3048         rabr2 = sqrt(ra2r2*rb2r2);
3049
3050         sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
3051
3052         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
3053         {
3054             phi2 = std::asin(sin_phi2);
3055
3056             if (cos_phi2 < 0)
3057             {
3058                 if (phi2 > 0)
3059                 {
3060                     phi2 = M_PI - phi2;
3061                 }
3062                 else
3063                 {
3064                     phi2 = -M_PI - phi2;
3065                 }
3066             }
3067         }
3068         else
3069         {
3070             phi2 = std::acos(cos_phi2);
3071
3072             if (sin_phi2 < 0)
3073             {
3074                 phi2 = -phi2;
3075             }
3076         }
3077
3078         xphi2 = phi2 + M_PI; /* 1 */
3079
3080         /* Range mangling */
3081         if (xphi1 < 0)
3082         {
3083             xphi1 = xphi1 + 2*M_PI;
3084         }
3085         else if (xphi1 >= 2*M_PI)
3086         {
3087             xphi1 = xphi1 - 2*M_PI;
3088         }
3089
3090         if (xphi2 < 0)
3091         {
3092             xphi2 = xphi2 + 2*M_PI;
3093         }
3094         else if (xphi2 >= 2*M_PI)
3095         {
3096             xphi2 = xphi2 - 2*M_PI;
3097         }
3098
3099         /* Number of grid points */
3100         dx = 2*M_PI / cmap_grid->grid_spacing;
3101
3102         /* Where on the grid are we */
3103         iphi1 = static_cast<int>(xphi1/dx);
3104         iphi2 = static_cast<int>(xphi2/dx);
3105
3106         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3107         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3108
3109         pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
3110         pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
3111         pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
3112         pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
3113
3114         ty[0]   = cmapd[pos1*4];
3115         ty[1]   = cmapd[pos2*4];
3116         ty[2]   = cmapd[pos3*4];
3117         ty[3]   = cmapd[pos4*4];
3118
3119         ty1[0]   = cmapd[pos1*4+1];
3120         ty1[1]   = cmapd[pos2*4+1];
3121         ty1[2]   = cmapd[pos3*4+1];
3122         ty1[3]   = cmapd[pos4*4+1];
3123
3124         ty2[0]   = cmapd[pos1*4+2];
3125         ty2[1]   = cmapd[pos2*4+2];
3126         ty2[2]   = cmapd[pos3*4+2];
3127         ty2[3]   = cmapd[pos4*4+2];
3128
3129         ty12[0]   = cmapd[pos1*4+3];
3130         ty12[1]   = cmapd[pos2*4+3];
3131         ty12[2]   = cmapd[pos3*4+3];
3132         ty12[3]   = cmapd[pos4*4+3];
3133
3134         /* Switch to degrees */
3135         dx    = 360.0 / cmap_grid->grid_spacing;
3136         xphi1 = xphi1 * RAD2DEG;
3137         xphi2 = xphi2 * RAD2DEG;
3138
3139         for (i = 0; i < 4; i++) /* 16 */
3140         {
3141             tx[i]    = ty[i];
3142             tx[i+4]  = ty1[i]*dx;
3143             tx[i+8]  = ty2[i]*dx;
3144             tx[i+12] = ty12[i]*dx*dx;
3145         }
3146
3147         real tc[16] = {0};
3148         for (int idx = 0; idx < 16; idx++) /* 1056 */
3149         {
3150             for (int k = 0; k < 16; k++)
3151             {
3152                 tc[idx] += cmap_coeff_matrix[k*16+idx]*tx[k];
3153             }
3154         }
3155
3156         tt    = (xphi1-iphi1*dx)/dx;
3157         tu    = (xphi2-iphi2*dx)/dx;
3158
3159         e     = 0;
3160         df1   = 0;
3161         df2   = 0;
3162
3163         for (i = 3; i >= 0; i--)
3164         {
3165             l1 = loop_index[i][3];
3166             l2 = loop_index[i][2];
3167             l3 = loop_index[i][1];
3168
3169             e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
3170             df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
3171             df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
3172         }
3173
3174         fac     = RAD2DEG/dx;
3175         df1     = df1   * fac;
3176         df2     = df2   * fac;
3177
3178         /* CMAP energy */
3179         vtot += e;
3180
3181         /* Do forces - first torsion */
3182         fg1       = iprod(r1_ij, r1_kj);
3183         hg1       = iprod(r1_kl, r1_kj);
3184         fga1      = fg1*ra2r1*rgr1;
3185         hgb1      = hg1*rb2r1*rgr1;
3186         gaa1      = -ra2r1*rg1;
3187         gbb1      = rb2r1*rg1;
3188
3189         for (i = 0; i < DIM; i++)
3190         {
3191             dtf1[i]   = gaa1 * a1[i];
3192             dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
3193             dth1[i]   = gbb1 * b1[i];
3194
3195             f1[i]     = df1  * dtf1[i];
3196             g1[i]     = df1  * dtg1[i];
3197             h1[i]     = df1  * dth1[i];
3198
3199             f1_i[i]   =  f1[i];
3200             f1_j[i]   = -f1[i] - g1[i];
3201             f1_k[i]   =  h1[i] + g1[i];
3202             f1_l[i]   = -h1[i];
3203
3204             f[a1i][i] = f[a1i][i] + f1_i[i];
3205             f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3206             f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3207             f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3208         }
3209
3210         /* Do forces - second torsion */
3211         fg2       = iprod(r2_ij, r2_kj);
3212         hg2       = iprod(r2_kl, r2_kj);
3213         fga2      = fg2*ra2r2*rgr2;
3214         hgb2      = hg2*rb2r2*rgr2;
3215         gaa2      = -ra2r2*rg2;
3216         gbb2      = rb2r2*rg2;
3217
3218         for (i = 0; i < DIM; i++)
3219         {
3220             dtf2[i]   = gaa2 * a2[i];
3221             dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
3222             dth2[i]   = gbb2 * b2[i];
3223
3224             f2[i]     = df2  * dtf2[i];
3225             g2[i]     = df2  * dtg2[i];
3226             h2[i]     = df2  * dth2[i];
3227
3228             f2_i[i]   =  f2[i];
3229             f2_j[i]   = -f2[i] - g2[i];
3230             f2_k[i]   =  h2[i] + g2[i];
3231             f2_l[i]   = -h2[i];
3232
3233             f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3234             f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3235             f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3236             f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3237         }
3238
3239         /* Shift forces */
3240         if (g)
3241         {
3242             copy_ivec(SHIFT_IVEC(g, a1j), jt1);
3243             ivec_sub(SHIFT_IVEC(g, a1i),  jt1, dt1_ij);
3244             ivec_sub(SHIFT_IVEC(g, a1k),  jt1, dt1_kj);
3245             ivec_sub(SHIFT_IVEC(g, a1l),  jt1, dt1_lj);
3246             t11 = IVEC2IS(dt1_ij);
3247             t21 = IVEC2IS(dt1_kj);
3248             t31 = IVEC2IS(dt1_lj);
3249
3250             copy_ivec(SHIFT_IVEC(g, a2j), jt2);
3251             ivec_sub(SHIFT_IVEC(g, a2i),  jt2, dt2_ij);
3252             ivec_sub(SHIFT_IVEC(g, a2k),  jt2, dt2_kj);
3253             ivec_sub(SHIFT_IVEC(g, a2l),  jt2, dt2_lj);
3254             t12 = IVEC2IS(dt2_ij);
3255             t22 = IVEC2IS(dt2_kj);
3256             t32 = IVEC2IS(dt2_lj);
3257         }
3258         else if (pbc)
3259         {
3260             t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3261             t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3262         }
3263         else
3264         {
3265             t31 = CENTRAL;
3266             t32 = CENTRAL;
3267         }
3268
3269         rvec_inc(fshift[t11], f1_i);
3270         rvec_inc(fshift[CENTRAL], f1_j);
3271         rvec_inc(fshift[t21], f1_k);
3272         rvec_inc(fshift[t31], f1_l);
3273
3274         rvec_inc(fshift[t21], f2_i);
3275         rvec_inc(fshift[CENTRAL], f2_j);
3276         rvec_inc(fshift[t22], f2_k);
3277         rvec_inc(fshift[t32], f2_l);
3278     }
3279     return vtot;
3280 }
3281
3282
3283 //! \cond
3284 /***********************************************************
3285  *
3286  *   G R O M O S  9 6   F U N C T I O N S
3287  *
3288  ***********************************************************/
3289 static real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
3290                         real *V, real *F)
3291 {
3292     const real half = 0.5;
3293     real       L1, kk, x0, dx, dx2;
3294     real       v, f, dvdlambda;
3295
3296     L1    = 1.0-lambda;
3297     kk    = L1*kA+lambda*kB;
3298     x0    = L1*xA+lambda*xB;
3299
3300     dx    = x-x0;
3301     dx2   = dx*dx;
3302
3303     f          = -kk*dx;
3304     v          = half*kk*dx2;
3305     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
3306
3307     *F    = f;
3308     *V    = v;
3309
3310     return dvdlambda;
3311
3312     /* That was 21 flops */
3313 }
3314
3315 real g96bonds(int nbonds,
3316               const t_iatom forceatoms[], const t_iparams forceparams[],
3317               const rvec x[], rvec4 f[], rvec fshift[],
3318               const t_pbc *pbc, const t_graph *g,
3319               real lambda, real *dvdlambda,
3320               const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3321               int gmx_unused *global_atom_index)
3322 {
3323     int  i, m, ki, ai, aj, type;
3324     real dr2, fbond, vbond, fij, vtot;
3325     rvec dx;
3326     ivec dt;
3327
3328     vtot = 0.0;
3329     for (i = 0; (i < nbonds); )
3330     {
3331         type = forceatoms[i++];
3332         ai   = forceatoms[i++];
3333         aj   = forceatoms[i++];
3334
3335         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3336         dr2  = iprod(dx, dx);                       /*   5              */
3337
3338         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3339                                   forceparams[type].harmonic.krB,
3340                                   forceparams[type].harmonic.rA,
3341                                   forceparams[type].harmonic.rB,
3342                                   dr2, lambda, &vbond, &fbond);
3343
3344         vtot  += 0.5*vbond;                         /* 1*/
3345
3346         if (g)
3347         {
3348             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3349             ki = IVEC2IS(dt);
3350         }
3351         for (m = 0; (m < DIM); m++)     /*  15          */
3352         {
3353             fij                 = fbond*dx[m];
3354             f[ai][m]           += fij;
3355             f[aj][m]           -= fij;
3356             fshift[ki][m]      += fij;
3357             fshift[CENTRAL][m] -= fij;
3358         }
3359     }               /* 44 TOTAL */
3360     return vtot;
3361 }
3362
3363 static real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
3364                           rvec r_ij, rvec r_kj,
3365                           int *t1, int *t2)
3366 /* Return value is the angle between the bonds i-j and j-k */
3367 {
3368     real costh;
3369
3370     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3371     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3372
3373     costh = cos_angle(r_ij, r_kj);         /* 25                */
3374     /* 41 TOTAL */
3375     return costh;
3376 }
3377
3378 real g96angles(int nbonds,
3379                const t_iatom forceatoms[], const t_iparams forceparams[],
3380                const rvec x[], rvec4 f[], rvec fshift[],
3381                const t_pbc *pbc, const t_graph *g,
3382                real lambda, real *dvdlambda,
3383                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3384                int gmx_unused *global_atom_index)
3385 {
3386     int  i, ai, aj, ak, type, m, t1, t2;
3387     rvec r_ij, r_kj;
3388     real cos_theta, dVdt, va, vtot;
3389     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3390     rvec f_i, f_j, f_k;
3391     ivec jt, dt_ij, dt_kj;
3392
3393     vtot = 0.0;
3394     for (i = 0; (i < nbonds); )
3395     {
3396         type = forceatoms[i++];
3397         ai   = forceatoms[i++];
3398         aj   = forceatoms[i++];
3399         ak   = forceatoms[i++];
3400
3401         cos_theta  = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3402
3403         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3404                                   forceparams[type].harmonic.krB,
3405                                   forceparams[type].harmonic.rA,
3406                                   forceparams[type].harmonic.rB,
3407                                   cos_theta, lambda, &va, &dVdt);
3408         vtot    += va;
3409
3410         rij_1    = gmx::invsqrt(iprod(r_ij, r_ij));
3411         rkj_1    = gmx::invsqrt(iprod(r_kj, r_kj));
3412         rij_2    = rij_1*rij_1;
3413         rkj_2    = rkj_1*rkj_1;
3414         rijrkj_1 = rij_1*rkj_1;         /* 23 */
3415
3416         for (m = 0; (m < DIM); m++)     /*  42  */
3417         {
3418             f_i[m]    = dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
3419             f_k[m]    = dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
3420             f_j[m]    = -f_i[m]-f_k[m];
3421             f[ai][m] += f_i[m];
3422             f[aj][m] += f_j[m];
3423             f[ak][m] += f_k[m];
3424         }
3425
3426         if (g)
3427         {
3428             copy_ivec(SHIFT_IVEC(g, aj), jt);
3429
3430             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3431             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3432             t1 = IVEC2IS(dt_ij);
3433             t2 = IVEC2IS(dt_kj);
3434         }
3435         rvec_inc(fshift[t1], f_i);
3436         rvec_inc(fshift[CENTRAL], f_j);
3437         rvec_inc(fshift[t2], f_k);          /* 9 */
3438         /* 163 TOTAL    */
3439     }
3440     return vtot;
3441 }
3442
3443 real cross_bond_bond(int nbonds,
3444                      const t_iatom forceatoms[], const t_iparams forceparams[],
3445                      const rvec x[], rvec4 f[], rvec fshift[],
3446                      const t_pbc *pbc, const t_graph *g,
3447                      real gmx_unused lambda, real gmx_unused *dvdlambda,
3448                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused  *fcd,
3449                      int gmx_unused *global_atom_index)
3450 {
3451     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3452      * pp. 842-847
3453      */
3454     int  i, ai, aj, ak, type, m, t1, t2;
3455     rvec r_ij, r_kj;
3456     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3457     rvec f_i, f_j, f_k;
3458     ivec jt, dt_ij, dt_kj;
3459
3460     vtot = 0.0;
3461     for (i = 0; (i < nbonds); )
3462     {
3463         type = forceatoms[i++];
3464         ai   = forceatoms[i++];
3465         aj   = forceatoms[i++];
3466         ak   = forceatoms[i++];
3467         r1e  = forceparams[type].cross_bb.r1e;
3468         r2e  = forceparams[type].cross_bb.r2e;
3469         krr  = forceparams[type].cross_bb.krr;
3470
3471         /* Compute distance vectors ... */
3472         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3473         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3474
3475         /* ... and their lengths */
3476         r1 = norm(r_ij);
3477         r2 = norm(r_kj);
3478
3479         /* Deviations from ideality */
3480         s1 = r1-r1e;
3481         s2 = r2-r2e;
3482
3483         /* Energy (can be negative!) */
3484         vrr   = krr*s1*s2;
3485         vtot += vrr;
3486
3487         /* Forces */
3488         svmul(-krr*s2/r1, r_ij, f_i);
3489         svmul(-krr*s1/r2, r_kj, f_k);
3490
3491         for (m = 0; (m < DIM); m++)     /*  12  */
3492         {
3493             f_j[m]    = -f_i[m] - f_k[m];
3494             f[ai][m] += f_i[m];
3495             f[aj][m] += f_j[m];
3496             f[ak][m] += f_k[m];
3497         }
3498
3499         /* Virial stuff */
3500         if (g)
3501         {
3502             copy_ivec(SHIFT_IVEC(g, aj), jt);
3503
3504             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3505             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3506             t1 = IVEC2IS(dt_ij);
3507             t2 = IVEC2IS(dt_kj);
3508         }
3509         rvec_inc(fshift[t1], f_i);
3510         rvec_inc(fshift[CENTRAL], f_j);
3511         rvec_inc(fshift[t2], f_k);          /* 9 */
3512         /* 163 TOTAL    */
3513     }
3514     return vtot;
3515 }
3516
3517 real cross_bond_angle(int nbonds,
3518                       const t_iatom forceatoms[], const t_iparams forceparams[],
3519                       const rvec x[], rvec4 f[], rvec fshift[],
3520                       const t_pbc *pbc, const t_graph *g,
3521                       real gmx_unused lambda, real gmx_unused *dvdlambda,
3522                       const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3523                       int gmx_unused *global_atom_index)
3524 {
3525     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3526      * pp. 842-847
3527      */
3528     int  i, ai, aj, ak, type, m, t1, t2;
3529     rvec r_ij, r_kj, r_ik;
3530     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3531     rvec f_i, f_j, f_k;
3532     ivec jt, dt_ij, dt_kj;
3533
3534     vtot = 0.0;
3535     for (i = 0; (i < nbonds); )
3536     {
3537         type = forceatoms[i++];
3538         ai   = forceatoms[i++];
3539         aj   = forceatoms[i++];
3540         ak   = forceatoms[i++];
3541         r1e  = forceparams[type].cross_ba.r1e;
3542         r2e  = forceparams[type].cross_ba.r2e;
3543         r3e  = forceparams[type].cross_ba.r3e;
3544         krt  = forceparams[type].cross_ba.krt;
3545
3546         /* Compute distance vectors ... */
3547         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3548         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3549         pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3550
3551         /* ... and their lengths */
3552         r1 = norm(r_ij);
3553         r2 = norm(r_kj);
3554         r3 = norm(r_ik);
3555
3556         /* Deviations from ideality */
3557         s1 = r1-r1e;
3558         s2 = r2-r2e;
3559         s3 = r3-r3e;
3560
3561         /* Energy (can be negative!) */
3562         vrt   = krt*s3*(s1+s2);
3563         vtot += vrt;
3564
3565         /* Forces */
3566         k1 = -krt*(s3/r1);
3567         k2 = -krt*(s3/r2);
3568         k3 = -krt*(s1+s2)/r3;
3569         for (m = 0; (m < DIM); m++)
3570         {
3571             f_i[m] = k1*r_ij[m] + k3*r_ik[m];
3572             f_k[m] = k2*r_kj[m] - k3*r_ik[m];
3573             f_j[m] = -f_i[m] - f_k[m];
3574         }
3575
3576         for (m = 0; (m < DIM); m++)     /*  12  */
3577         {
3578             f[ai][m] += f_i[m];
3579             f[aj][m] += f_j[m];
3580             f[ak][m] += f_k[m];
3581         }
3582
3583         /* Virial stuff */
3584         if (g)
3585         {
3586             copy_ivec(SHIFT_IVEC(g, aj), jt);
3587
3588             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3589             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3590             t1 = IVEC2IS(dt_ij);
3591             t2 = IVEC2IS(dt_kj);
3592         }
3593         rvec_inc(fshift[t1], f_i);
3594         rvec_inc(fshift[CENTRAL], f_j);
3595         rvec_inc(fshift[t2], f_k);          /* 9 */
3596         /* 163 TOTAL    */
3597     }
3598     return vtot;
3599 }
3600
3601 static real bonded_tab(const char *type, int table_nr,
3602                        const bondedtable_t *table, real kA, real kB, real r,
3603                        real lambda, real *V, real *F)
3604 {
3605     real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3606     int  n0, nnn;
3607     real dvdlambda;
3608
3609     k = (1.0 - lambda)*kA + lambda*kB;
3610
3611     tabscale = table->scale;
3612     VFtab    = table->data;
3613
3614     rt    = r*tabscale;
3615     n0    = static_cast<int>(rt);
3616     if (n0 >= table->n)
3617     {
3618         gmx_fatal(FARGS, "A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
3619                   type, table_nr, r, n0, n0+1, table->n);
3620     }
3621     eps   = rt - n0;
3622     eps2  = eps*eps;
3623     nnn   = 4*n0;
3624     Yt    = VFtab[nnn];
3625     Ft    = VFtab[nnn+1];
3626     Geps  = VFtab[nnn+2]*eps;
3627     Heps2 = VFtab[nnn+3]*eps2;
3628     Fp    = Ft + Geps + Heps2;
3629     VV    = Yt + Fp*eps;
3630     FF    = Fp + Geps + 2.0*Heps2;
3631
3632     *F         = -k*FF*tabscale;
3633     *V         = k*VV;
3634     dvdlambda  = (kB - kA)*VV;
3635
3636     return dvdlambda;
3637
3638     /* That was 22 flops */
3639 }
3640
3641 real tab_bonds(int nbonds,
3642                const t_iatom forceatoms[], const t_iparams forceparams[],
3643                const rvec x[], rvec4 f[], rvec fshift[],
3644                const t_pbc *pbc, const t_graph *g,
3645                real lambda, real *dvdlambda,
3646                const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3647                int gmx_unused  *global_atom_index)
3648 {
3649     int  i, m, ki, ai, aj, type, table;
3650     real dr, dr2, fbond, vbond, fij, vtot;
3651     rvec dx;
3652     ivec dt;
3653
3654     vtot = 0.0;
3655     for (i = 0; (i < nbonds); )
3656     {
3657         type = forceatoms[i++];
3658         ai   = forceatoms[i++];
3659         aj   = forceatoms[i++];
3660
3661         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3662         dr2  = iprod(dx, dx);                       /*   5              */
3663         dr   = dr2*gmx::invsqrt(dr2);               /*  10              */
3664
3665         table = forceparams[type].tab.table;
3666
3667         *dvdlambda += bonded_tab("bond", table,
3668                                  &fcd->bondtab[table],
3669                                  forceparams[type].tab.kA,
3670                                  forceparams[type].tab.kB,
3671                                  dr, lambda, &vbond, &fbond); /*  22 */
3672
3673         if (dr2 == 0.0)
3674         {
3675             continue;
3676         }
3677
3678
3679         vtot  += vbond;             /* 1*/
3680         fbond *= gmx::invsqrt(dr2); /*   6              */
3681         if (g)
3682         {
3683             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3684             ki = IVEC2IS(dt);
3685         }
3686         for (m = 0; (m < DIM); m++)     /*  15          */
3687         {
3688             fij                 = fbond*dx[m];
3689             f[ai][m]           += fij;
3690             f[aj][m]           -= fij;
3691             fshift[ki][m]      += fij;
3692             fshift[CENTRAL][m] -= fij;
3693         }
3694     }               /* 62 TOTAL */
3695     return vtot;
3696 }
3697
3698 real tab_angles(int nbonds,
3699                 const t_iatom forceatoms[], const t_iparams forceparams[],
3700                 const rvec x[], rvec4 f[], rvec fshift[],
3701                 const t_pbc *pbc, const t_graph *g,
3702                 real lambda, real *dvdlambda,
3703                 const t_mdatoms gmx_unused  *md, t_fcdata *fcd,
3704                 int gmx_unused *global_atom_index)
3705 {
3706     int  i, ai, aj, ak, t1, t2, type, table;
3707     rvec r_ij, r_kj;
3708     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3709     ivec jt, dt_ij, dt_kj;
3710
3711     vtot = 0.0;
3712     for (i = 0; (i < nbonds); )
3713     {
3714         type = forceatoms[i++];
3715         ai   = forceatoms[i++];
3716         aj   = forceatoms[i++];
3717         ak   = forceatoms[i++];
3718
3719         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
3720                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
3721
3722         table = forceparams[type].tab.table;
3723
3724         *dvdlambda += bonded_tab("angle", table,
3725                                  &fcd->angletab[table],
3726                                  forceparams[type].tab.kA,
3727                                  forceparams[type].tab.kB,
3728                                  theta, lambda, &va, &dVdt); /*  22  */
3729         vtot += va;
3730
3731         cos_theta2 = gmx::square(cos_theta);            /*   1          */
3732         if (cos_theta2 < 1)
3733         {
3734             int  m;
3735             real st, sth;
3736             real cik, cii, ckk;
3737             real nrkj2, nrij2;
3738             rvec f_i, f_j, f_k;
3739
3740             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
3741             sth   = st*cos_theta;                      /*   1           */
3742             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
3743             nrij2 = iprod(r_ij, r_ij);
3744
3745             cik = st*gmx::invsqrt(nrkj2*nrij2); /*  12          */
3746             cii = sth/nrij2;                    /*  10          */
3747             ckk = sth/nrkj2;                    /*  10          */
3748
3749             for (m = 0; (m < DIM); m++)         /*  39          */
3750             {
3751                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
3752                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
3753                 f_j[m]    = -f_i[m]-f_k[m];
3754                 f[ai][m] += f_i[m];
3755                 f[aj][m] += f_j[m];
3756                 f[ak][m] += f_k[m];
3757             }
3758             if (g)
3759             {
3760                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3761
3762                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3763                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3764                 t1 = IVEC2IS(dt_ij);
3765                 t2 = IVEC2IS(dt_kj);
3766             }
3767             rvec_inc(fshift[t1], f_i);
3768             rvec_inc(fshift[CENTRAL], f_j);
3769             rvec_inc(fshift[t2], f_k);
3770         }                                       /* 169 TOTAL    */
3771     }
3772     return vtot;
3773 }
3774
3775 real tab_dihs(int nbonds,
3776               const t_iatom forceatoms[], const t_iparams forceparams[],
3777               const rvec x[], rvec4 f[], rvec fshift[],
3778               const t_pbc *pbc, const t_graph *g,
3779               real lambda, real *dvdlambda,
3780               const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3781               int gmx_unused *global_atom_index)
3782 {
3783     int  i, type, ai, aj, ak, al, table;
3784     int  t1, t2, t3;
3785     rvec r_ij, r_kj, r_kl, m, n;
3786     real phi, ddphi, vpd, vtot;
3787
3788     vtot = 0.0;
3789     for (i = 0; (i < nbonds); )
3790     {
3791         type = forceatoms[i++];
3792         ai   = forceatoms[i++];
3793         aj   = forceatoms[i++];
3794         ak   = forceatoms[i++];
3795         al   = forceatoms[i++];
3796
3797         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
3798                         &t1, &t2, &t3);  /*  84  */
3799
3800         table = forceparams[type].tab.table;
3801
3802         /* Hopefully phi+M_PI never results in values < 0 */
3803         *dvdlambda += bonded_tab("dihedral", table,
3804                                  &fcd->dihtab[table],
3805                                  forceparams[type].tab.kA,
3806                                  forceparams[type].tab.kB,
3807                                  phi+M_PI, lambda, &vpd, &ddphi);
3808
3809         vtot += vpd;
3810         do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
3811                    f, fshift, pbc, g, x, t1, t2, t3); /* 112    */
3812
3813     }                                                 /* 227 TOTAL  */
3814
3815     return vtot;
3816 }
3817
3818 //! \endcond