Improved variable names in FEP nonbonded kernel
[gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
blob5c62efc57d4cd660dc1293eab9e59117579c209e
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "gmxpre.h"
40 #include "nb_free_energy.h"
42 #include "config.h"
44 #include <cmath>
46 #include <algorithm>
48 #include "gromacs/gmxlib/nrnb.h"
49 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
51 #include "gromacs/math/functions.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/mdtypes/forceoutput.h"
54 #include "gromacs/mdtypes/forcerec.h"
55 #include "gromacs/mdtypes/interaction_const.h"
56 #include "gromacs/mdtypes/md_enums.h"
57 #include "gromacs/mdtypes/mdatom.h"
58 #include "gromacs/simd/simd.h"
59 #include "gromacs/utility/fatalerror.h"
62 //! Scalar (non-SIMD) data types.
63 struct ScalarDataTypes
65 using RealType = real; //!< The data type to use as real.
66 using IntType = int; //!< The data type to use as int.
67 static constexpr int simdRealWidth = 1; //!< The width of the RealType.
68 static constexpr int simdIntWidth = 1; //!< The width of the IntType.
71 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
72 //! SIMD data types.
73 struct SimdDataTypes
75 using RealType = gmx::SimdReal; //!< The data type to use as real.
76 using IntType = gmx::SimdInt32; //!< The data type to use as int.
77 static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH; //!< The width of the RealType.
78 static constexpr int simdIntWidth = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
80 #endif
82 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
83 template<class RealType>
84 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
86 *invPthRoot = gmx::invsqrt(std::cbrt(r));
87 *pthRoot = 1 / (*invPthRoot);
90 template<class RealType>
91 static inline RealType calculateRinv6(const RealType rInvV)
93 RealType rInv6 = rInvV * rInvV;
94 return (rInv6 * rInv6 * rInv6);
97 template<class RealType>
98 static inline RealType calculateVdw6(const RealType c6, const RealType rInv6)
100 return (c6 * rInv6);
103 template<class RealType>
104 static inline RealType calculateVdw12(const RealType c12, const RealType rInv6)
106 return (c12 * rInv6 * rInv6);
109 /* reaction-field electrostatics */
110 template<class RealType>
111 static inline RealType reactionFieldScalarForce(const RealType qq,
112 const RealType rInv,
113 const RealType r,
114 const real krf,
115 const real two)
117 return (qq * (rInv - two * krf * r * r));
119 template<class RealType>
120 static inline RealType reactionFieldPotential(const RealType qq,
121 const RealType rInv,
122 const RealType r,
123 const real krf,
124 const real potentialShift)
126 return (qq * (rInv + krf * r * r - potentialShift));
129 /* Ewald electrostatics */
130 template<class RealType>
131 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rInv)
133 return (coulomb * rInv);
135 template<class RealType>
136 static inline RealType ewaldPotential(const RealType coulomb, const RealType rInv, const real potentialShift)
138 return (coulomb * (rInv - potentialShift));
141 /* cutoff LJ */
142 template<class RealType>
143 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
145 return (v12 - v6);
147 template<class RealType>
148 static inline RealType lennardJonesPotential(const RealType v6,
149 const RealType v12,
150 const RealType c6,
151 const RealType c12,
152 const real repulsionShift,
153 const real dispersionShift,
154 const real oneSixth,
155 const real oneTwelfth)
157 return ((v12 + c12 * repulsionShift) * oneTwelfth - (v6 + c6 * dispersionShift) * oneSixth);
160 /* Ewald LJ */
161 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real oneSixth)
163 return (c6grid * potentialShift * oneSixth);
166 /* LJ Potential switch */
167 template<class RealType>
168 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
169 const RealType potential,
170 const RealType sw,
171 const RealType r,
172 const RealType rVdw,
173 const RealType dsw,
174 const real zero)
176 if (r < rVdw)
178 real fScalar = fScalarInp * sw - r * potential * dsw;
179 return (fScalar);
181 return (zero);
183 template<class RealType>
184 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
185 const RealType sw,
186 const RealType r,
187 const RealType rVdw,
188 const real zero)
190 if (r < rVdw)
192 real potential = potentialInp * sw;
193 return (potential);
195 return (zero);
199 //! Templated free-energy non-bonded kernel
200 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
201 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
202 rvec* gmx_restrict xx,
203 gmx::ForceWithShiftForces* forceWithShiftForces,
204 const t_forcerec* gmx_restrict fr,
205 const t_mdatoms* gmx_restrict mdatoms,
206 nb_kernel_data_t* gmx_restrict kernel_data,
207 t_nrnb* gmx_restrict nrnb)
209 #define STATE_A 0
210 #define STATE_B 1
211 #define NSTATES 2
213 using RealType = typename DataTypes::RealType;
214 using IntType = typename DataTypes::IntType;
216 /* FIXME: How should these be handled with SIMD? */
217 constexpr real oneTwelfth = 1.0 / 12.0;
218 constexpr real oneSixth = 1.0 / 6.0;
219 constexpr real zero = 0.0;
220 constexpr real half = 0.5;
221 constexpr real one = 1.0;
222 constexpr real two = 2.0;
223 constexpr real six = 6.0;
225 /* Extract pointer to non-bonded interaction constants */
226 const interaction_const_t* ic = fr->ic;
228 // Extract pair list data
229 const int nri = nlist->nri;
230 const int* iinr = nlist->iinr;
231 const int* jindex = nlist->jindex;
232 const int* jjnr = nlist->jjnr;
233 const int* shift = nlist->shift;
234 const int* gid = nlist->gid;
236 const real* shiftvec = fr->shift_vec[0];
237 const real* chargeA = mdatoms->chargeA;
238 const real* chargeB = mdatoms->chargeB;
239 real* Vc = kernel_data->energygrp_elec;
240 const int* typeA = mdatoms->typeA;
241 const int* typeB = mdatoms->typeB;
242 const int ntype = fr->ntype;
243 const real* nbfp = fr->nbfp.data();
244 const real* nbfp_grid = fr->ljpme_c6grid;
245 real* Vv = kernel_data->energygrp_vdw;
246 const real lambda_coul = kernel_data->lambda[efptCOUL];
247 const real lambda_vdw = kernel_data->lambda[efptVDW];
248 real* dvdl = kernel_data->dvdl;
249 const auto& scParams = *ic->softCoreParameters;
250 const real alpha_coul = scParams.alphaCoulomb;
251 const real alpha_vdw = scParams.alphaVdw;
252 const real lam_power = scParams.lambdaPower;
253 const real sigma6_def = scParams.sigma6WithInvalidSigma;
254 const real sigma6_min = scParams.sigma6Minimum;
255 const bool doForces = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
256 const bool doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
257 const bool doPotential = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
259 // Extract data from interaction_const_t
260 const real facel = ic->epsfac;
261 const real rCoulomb = ic->rcoulomb;
262 const real krf = ic->k_rf;
263 const real crf = ic->c_rf;
264 const real shLjEwald = ic->sh_lj_ewald;
265 const real rVdw = ic->rvdw;
266 const real dispersionShift = ic->dispersion_shift.cpot;
267 const real repulsionShift = ic->repulsion_shift.cpot;
269 // Note that the nbnxm kernels do not support Coulomb potential switching at all
270 GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
271 "Potential switching is not supported for Coulomb with FEP");
273 real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
274 if (vdwModifierIsPotSwitch)
276 const real d = ic->rvdw - ic->rvdw_switch;
277 vdw_swV3 = -10.0 / (d * d * d);
278 vdw_swV4 = 15.0 / (d * d * d * d);
279 vdw_swV5 = -6.0 / (d * d * d * d * d);
280 vdw_swF2 = -30.0 / (d * d * d);
281 vdw_swF3 = 60.0 / (d * d * d * d);
282 vdw_swF4 = -30.0 / (d * d * d * d * d);
284 else
286 /* Avoid warnings from stupid compilers (looking at you, Clang!) */
287 vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
290 int icoul;
291 if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
293 icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
295 else
297 icoul = GMX_NBKERNEL_ELEC_NONE;
300 real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
301 rcutoff_max2 = rcutoff_max2 * rcutoff_max2;
303 const real* tab_ewald_F_lj = nullptr;
304 const real* tab_ewald_V_lj = nullptr;
305 const real* ewtab = nullptr;
306 real coulombTableScale = 0;
307 real coulombTableScaleInvHalf = 0;
308 real vdwTableScale = 0;
309 real vdwTableScaleInvHalf = 0;
310 real sh_ewald = 0;
311 if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
313 sh_ewald = ic->sh_ewald;
315 if (elecInteractionTypeIsEwald)
317 const auto& coulombTables = *ic->coulombEwaldTables;
318 ewtab = coulombTables.tableFDV0.data();
319 coulombTableScale = coulombTables.scale;
320 coulombTableScaleInvHalf = half / coulombTableScale;
322 if (vdwInteractionTypeIsEwald)
324 const auto& vdwTables = *ic->vdwEwaldTables;
325 tab_ewald_F_lj = vdwTables.tableF.data();
326 tab_ewald_V_lj = vdwTables.tableV.data();
327 vdwTableScale = vdwTables.scale;
328 vdwTableScaleInvHalf = half / vdwTableScale;
331 /* For Ewald/PME interactions we cannot easily apply the soft-core component to
332 * reciprocal space. When we use non-switched Ewald interactions, we
333 * assume the soft-coring does not significantly affect the grid contribution
334 * and apply the soft-core only to the full 1/r (- shift) pair contribution.
336 * However, we cannot use this approach for switch-modified since we would then
337 * effectively end up evaluating a significantly different interaction here compared to the
338 * normal (non-free-energy) kernels, either by applying a cutoff at a different
339 * position than what the user requested, or by switching different
340 * things (1/r rather than short-range Ewald). For these settings, we just
341 * use the traditional short-range Ewald interaction in that case.
343 GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
344 "Can not apply soft-core to switched Ewald potentials");
346 real dvdlCoul = 0;
347 real dvdlVdw = 0;
349 /* Lambda factor for state A, 1-lambda*/
350 real LFC[NSTATES], LFV[NSTATES];
351 LFC[STATE_A] = one - lambda_coul;
352 LFV[STATE_A] = one - lambda_vdw;
354 /* Lambda factor for state B, lambda*/
355 LFC[STATE_B] = lambda_coul;
356 LFV[STATE_B] = lambda_vdw;
358 /*derivative of the lambda factor for state A and B */
359 real DLF[NSTATES];
360 DLF[STATE_A] = -1;
361 DLF[STATE_B] = 1;
363 real lFacCoul[NSTATES], dlFacCoul[NSTATES], lFacVdw[NSTATES], dlFacVdw[NSTATES];
364 constexpr real sc_r_power = 6.0_real;
365 for (int i = 0; i < NSTATES; i++)
367 lFacCoul[i] = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
368 dlFacCoul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
369 lFacVdw[i] = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
370 dlFacVdw[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
373 // TODO: We should get rid of using pointers to real
374 const real* x = xx[0];
375 real* gmx_restrict f = &(forceWithShiftForces->force()[0][0]);
376 real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
378 for (int n = 0; n < nri; n++)
380 int npair_within_cutoff = 0;
382 const int is3 = 3 * shift[n];
383 const real shX = shiftvec[is3];
384 const real shY = shiftvec[is3 + 1];
385 const real shZ = shiftvec[is3 + 2];
386 const int nj0 = jindex[n];
387 const int nj1 = jindex[n + 1];
388 const int ii = iinr[n];
389 const int ii3 = 3 * ii;
390 const real ix = shX + x[ii3 + 0];
391 const real iy = shY + x[ii3 + 1];
392 const real iz = shZ + x[ii3 + 2];
393 const real iqA = facel * chargeA[ii];
394 const real iqB = facel * chargeB[ii];
395 const int ntiA = 2 * ntype * typeA[ii];
396 const int ntiB = 2 * ntype * typeB[ii];
397 real vCTot = 0;
398 real vVTot = 0;
399 real fIX = 0;
400 real fIY = 0;
401 real fIZ = 0;
403 for (int k = nj0; k < nj1; k++)
405 int tj[NSTATES];
406 const int jnr = jjnr[k];
407 const int j3 = 3 * jnr;
408 RealType c6[NSTATES], c12[NSTATES], qq[NSTATES], vCoul[NSTATES], vVdw[NSTATES];
409 RealType r, rInv, rp, rpm2;
410 RealType alphaVdwEff, alphaCoulEff, sigma6[NSTATES];
411 const RealType dX = ix - x[j3];
412 const RealType dY = iy - x[j3 + 1];
413 const RealType dZ = iz - x[j3 + 2];
414 const RealType rSq = dX * dX + dY * dY + dZ * dZ;
415 RealType fScalC[NSTATES], fScalV[NSTATES];
416 /* Check if this pair on the exlusions list.*/
417 const bool bPairIncluded = nlist->excl_fep == nullptr || nlist->excl_fep[k];
419 if (rSq >= rcutoff_max2 && bPairIncluded)
421 /* We save significant time by skipping all code below.
422 * Note that with soft-core interactions, the actual cut-off
423 * check might be different. But since the soft-core distance
424 * is always larger than r, checking on r here is safe.
425 * Exclusions outside the cutoff can not be skipped as
426 * when using Ewald: the reciprocal-space
427 * Ewald component still needs to be subtracted.
430 continue;
432 npair_within_cutoff++;
434 if (rSq > 0)
436 /* Note that unlike in the nbnxn kernels, we do not need
437 * to clamp the value of rSq before taking the invsqrt
438 * to avoid NaN in the LJ calculation, since here we do
439 * not calculate LJ interactions when C6 and C12 are zero.
442 rInv = gmx::invsqrt(rSq);
443 r = rSq * rInv;
445 else
447 /* The force at r=0 is zero, because of symmetry.
448 * But note that the potential is in general non-zero,
449 * since the soft-cored r will be non-zero.
451 rInv = 0;
452 r = 0;
455 if (useSoftCore)
457 rpm2 = rSq * rSq; /* r4 */
458 rp = rpm2 * rSq; /* r6 */
460 else
462 /* The soft-core power p will not affect the results
463 * with not using soft-core, so we use power of 0 which gives
464 * the simplest math and cheapest code.
466 rpm2 = rInv * rInv;
467 rp = 1;
470 RealType fScal = 0;
472 qq[STATE_A] = iqA * chargeA[jnr];
473 qq[STATE_B] = iqB * chargeB[jnr];
475 tj[STATE_A] = ntiA + 2 * typeA[jnr];
476 tj[STATE_B] = ntiB + 2 * typeB[jnr];
478 if (bPairIncluded)
480 c6[STATE_A] = nbfp[tj[STATE_A]];
481 c6[STATE_B] = nbfp[tj[STATE_B]];
483 for (int i = 0; i < NSTATES; i++)
485 c12[i] = nbfp[tj[i] + 1];
486 if (useSoftCore)
488 if ((c6[i] > 0) && (c12[i] > 0))
490 /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
491 sigma6[i] = half * c12[i] / c6[i];
492 if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
494 sigma6[i] = sigma6_min;
497 else
499 sigma6[i] = sigma6_def;
504 if (useSoftCore)
506 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
507 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
509 alphaVdwEff = 0;
510 alphaCoulEff = 0;
512 else
514 alphaVdwEff = alpha_vdw;
515 alphaCoulEff = alpha_coul;
519 for (int i = 0; i < NSTATES; i++)
521 fScalC[i] = 0;
522 fScalV[i] = 0;
523 vCoul[i] = 0;
524 vVdw[i] = 0;
526 RealType rInvC, rInvV, rC, rV, rPInvC, rPInvV;
528 /* Only spend time on A or B state if it is non-zero */
529 if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
531 /* this section has to be inside the loop because of the dependence on sigma6 */
532 if (useSoftCore)
534 rPInvC = one / (alphaCoulEff * lFacCoul[i] * sigma6[i] + rp);
535 pthRoot(rPInvC, &rInvC, &rC);
536 if (scLambdasOrAlphasDiffer)
538 rPInvV = one / (alphaVdwEff * lFacVdw[i] * sigma6[i] + rp);
539 pthRoot(rPInvV, &rInvV, &rV);
541 else
543 /* We can avoid one expensive pow and one / operation */
544 rPInvV = rPInvC;
545 rInvV = rInvC;
546 rV = rC;
549 else
551 rPInvC = 1;
552 rInvC = rInv;
553 rC = r;
555 rPInvV = 1;
556 rInvV = rInv;
557 rV = r;
560 /* Only process the coulomb interactions if we have charges,
561 * and if we either include all entries in the list (no cutoff
562 * used in the kernel), or if we are within the cutoff.
564 bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rCoulomb)
565 || (!elecInteractionTypeIsEwald && rC < rCoulomb);
567 if ((qq[i] != 0) && computeElecInteraction)
569 if (elecInteractionTypeIsEwald)
571 vCoul[i] = ewaldPotential(qq[i], rInvC, sh_ewald);
572 fScalC[i] = ewaldScalarForce(qq[i], rInvC);
574 else
576 vCoul[i] = reactionFieldPotential(qq[i], rInvC, rC, krf, crf);
577 fScalC[i] = reactionFieldScalarForce(qq[i], rInvC, rC, krf, two);
581 /* Only process the VDW interactions if we have
582 * some non-zero parameters, and if we either
583 * include all entries in the list (no cutoff used
584 * in the kernel), or if we are within the cutoff.
586 bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rVdw)
587 || (!vdwInteractionTypeIsEwald && rV < rVdw);
588 if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
590 RealType rInv6;
591 if (useSoftCore)
593 rInv6 = rPInvV;
595 else
597 rInv6 = calculateRinv6(rInvV);
599 RealType vVdw6 = calculateVdw6(c6[i], rInv6);
600 RealType vVdw12 = calculateVdw12(c12[i], rInv6);
602 vVdw[i] = lennardJonesPotential(
603 vVdw6, vVdw12, c6[i], c12[i], repulsionShift, dispersionShift, oneSixth, oneTwelfth);
604 fScalV[i] = lennardJonesScalarForce(vVdw6, vVdw12);
606 if (vdwInteractionTypeIsEwald)
608 /* Subtract the grid potential at the cut-off */
609 vVdw[i] += ewaldLennardJonesGridSubtract(
610 nbfp_grid[tj[i]], shLjEwald, oneSixth);
613 if (vdwModifierIsPotSwitch)
615 RealType d = rV - ic->rvdw_switch;
616 d = (d > zero) ? d : zero;
617 const RealType d2 = d * d;
618 const RealType sw =
619 one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
620 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
622 fScalV[i] = potSwitchScalarForceMod(
623 fScalV[i], vVdw[i], sw, rV, rVdw, dsw, zero);
624 vVdw[i] = potSwitchPotentialMod(vVdw[i], sw, rV, rVdw, zero);
628 /* fScalC (and fScalV) now contain: dV/drC * rC
629 * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
630 * Further down we first multiply by r^p-2 and then by
631 * the vector r, which in total gives: dV/drC * (r/rC)^1-p
633 fScalC[i] *= rPInvC;
634 fScalV[i] *= rPInvV;
636 } // end for (int i = 0; i < NSTATES; i++)
638 /* Assemble A and B states */
639 for (int i = 0; i < NSTATES; i++)
641 vCTot += LFC[i] * vCoul[i];
642 vVTot += LFV[i] * vVdw[i];
644 fScal += LFC[i] * fScalC[i] * rpm2;
645 fScal += LFV[i] * fScalV[i] * rpm2;
647 if (useSoftCore)
649 dvdlCoul += vCoul[i] * DLF[i]
650 + LFC[i] * alphaCoulEff * dlFacCoul[i] * fScalC[i] * sigma6[i];
651 dvdlVdw += vVdw[i] * DLF[i]
652 + LFV[i] * alphaVdwEff * dlFacVdw[i] * fScalV[i] * sigma6[i];
654 else
656 dvdlCoul += vCoul[i] * DLF[i];
657 dvdlVdw += vVdw[i] * DLF[i];
660 } // end if (bPairIncluded)
661 else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
663 /* For excluded pairs, which are only in this pair list when
664 * using the Verlet scheme, we don't use soft-core.
665 * As there is no singularity, there is no need for soft-core.
667 const real FF = -two * krf;
668 RealType VV = krf * rSq - crf;
670 if (ii == jnr)
672 VV *= half;
675 for (int i = 0; i < NSTATES; i++)
677 vCTot += LFC[i] * qq[i] * VV;
678 fScal += LFC[i] * qq[i] * FF;
679 dvdlCoul += DLF[i] * qq[i] * VV;
683 if (elecInteractionTypeIsEwald && (r < rCoulomb || !bPairIncluded))
685 /* See comment in the preamble. When using Ewald interactions
686 * (unless we use a switch modifier) we subtract the reciprocal-space
687 * Ewald component here which made it possible to apply the free
688 * energy interaction to 1/r (vanilla coulomb short-range part)
689 * above. This gets us closer to the ideal case of applying
690 * the softcore to the entire electrostatic interaction,
691 * including the reciprocal-space component.
693 real v_lr, f_lr;
695 const RealType ewrt = r * coulombTableScale;
696 IntType ewitab = static_cast<IntType>(ewrt);
697 const RealType eweps = ewrt - ewitab;
698 ewitab = 4 * ewitab;
699 f_lr = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
700 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
701 f_lr *= rInv;
703 /* Note that any possible Ewald shift has already been applied in
704 * the normal interaction part above.
707 if (ii == jnr)
709 /* If we get here, the i particle (ii) has itself (jnr)
710 * in its neighborlist. This can only happen with the Verlet
711 * scheme, and corresponds to a self-interaction that will
712 * occur twice. Scale it down by 50% to only include it once.
714 v_lr *= half;
717 for (int i = 0; i < NSTATES; i++)
719 vCTot -= LFC[i] * qq[i] * v_lr;
720 fScal -= LFC[i] * qq[i] * f_lr;
721 dvdlCoul -= (DLF[i] * qq[i]) * v_lr;
725 if (vdwInteractionTypeIsEwald && r < rVdw)
727 /* See comment in the preamble. When using LJ-Ewald interactions
728 * (unless we use a switch modifier) we subtract the reciprocal-space
729 * Ewald component here which made it possible to apply the free
730 * energy interaction to r^-6 (vanilla LJ6 short-range part)
731 * above. This gets us closer to the ideal case of applying
732 * the softcore to the entire VdW interaction,
733 * including the reciprocal-space component.
735 /* We could also use the analytical form here
736 * iso a table, but that can cause issues for
737 * r close to 0 for non-interacting pairs.
740 const RealType rs = rSq * rInv * vdwTableScale;
741 const IntType ri = static_cast<IntType>(rs);
742 const RealType frac = rs - ri;
743 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
744 /* TODO: Currently the Ewald LJ table does not contain
745 * the factor 1/6, we should add this.
747 const RealType FF = f_lr * rInv / six;
748 RealType VV =
749 (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
750 / six;
752 if (ii == jnr)
754 /* If we get here, the i particle (ii) has itself (jnr)
755 * in its neighborlist. This can only happen with the Verlet
756 * scheme, and corresponds to a self-interaction that will
757 * occur twice. Scale it down by 50% to only include it once.
759 VV *= half;
762 for (int i = 0; i < NSTATES; i++)
764 const real c6grid = nbfp_grid[tj[i]];
765 vVTot += LFV[i] * c6grid * VV;
766 fScal += LFV[i] * c6grid * FF;
767 dvdlVdw += (DLF[i] * c6grid) * VV;
771 if (doForces)
773 const real tX = fScal * dX;
774 const real tY = fScal * dY;
775 const real tZ = fScal * dZ;
776 fIX = fIX + tX;
777 fIY = fIY + tY;
778 fIZ = fIZ + tZ;
779 /* OpenMP atomics are expensive, but this kernels is also
780 * expensive, so we can take this hit, instead of using
781 * thread-local output buffers and extra reduction.
783 * All the OpenMP regions in this file are trivial and should
784 * not throw, so no need for try/catch.
786 #pragma omp atomic
787 f[j3] -= tX;
788 #pragma omp atomic
789 f[j3 + 1] -= tY;
790 #pragma omp atomic
791 f[j3 + 2] -= tZ;
793 } // end for (int k = nj0; k < nj1; k++)
795 /* The atomics below are expensive with many OpenMP threads.
796 * Here unperturbed i-particles will usually only have a few
797 * (perturbed) j-particles in the list. Thus with a buffered list
798 * we can skip a significant number of i-reductions with a check.
800 if (npair_within_cutoff > 0)
802 if (doForces)
804 #pragma omp atomic
805 f[ii3] += fIX;
806 #pragma omp atomic
807 f[ii3 + 1] += fIY;
808 #pragma omp atomic
809 f[ii3 + 2] += fIZ;
811 if (doShiftForces)
813 #pragma omp atomic
814 fshift[is3] += fIX;
815 #pragma omp atomic
816 fshift[is3 + 1] += fIY;
817 #pragma omp atomic
818 fshift[is3 + 2] += fIZ;
820 if (doPotential)
822 int ggid = gid[n];
823 #pragma omp atomic
824 Vc[ggid] += vCTot;
825 #pragma omp atomic
826 Vv[ggid] += vVTot;
829 } // end for (int n = 0; n < nri; n++)
831 #pragma omp atomic
832 dvdl[efptCOUL] += dvdlCoul;
833 #pragma omp atomic
834 dvdl[efptVDW] += dvdlVdw;
836 /* Estimate flops, average for free energy stuff:
837 * 12 flops per outer iteration
838 * 150 flops per inner iteration
840 #pragma omp atomic
841 inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
844 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
845 rvec* gmx_restrict xx,
846 gmx::ForceWithShiftForces* forceWithShiftForces,
847 const t_forcerec* gmx_restrict fr,
848 const t_mdatoms* gmx_restrict mdatoms,
849 nb_kernel_data_t* gmx_restrict kernel_data,
850 t_nrnb* gmx_restrict nrnb);
852 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
853 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
855 if (useSimd)
857 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
858 /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
859 return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
860 #else
861 return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
862 #endif
864 else
866 return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
870 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
871 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
873 if (vdwModifierIsPotSwitch)
875 return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>(
876 useSimd));
878 else
880 return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>(
881 useSimd));
885 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
886 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
887 const bool vdwModifierIsPotSwitch,
888 const bool useSimd)
890 if (elecInteractionTypeIsEwald)
892 return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
893 vdwModifierIsPotSwitch, useSimd));
895 else
897 return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
898 vdwModifierIsPotSwitch, useSimd));
902 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
903 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
904 const bool elecInteractionTypeIsEwald,
905 const bool vdwModifierIsPotSwitch,
906 const bool useSimd)
908 if (vdwInteractionTypeIsEwald)
910 return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
911 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
913 else
915 return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
916 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
920 template<bool useSoftCore>
921 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
922 const bool vdwInteractionTypeIsEwald,
923 const bool elecInteractionTypeIsEwald,
924 const bool vdwModifierIsPotSwitch,
925 const bool useSimd)
927 if (scLambdasOrAlphasDiffer)
929 return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
930 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
932 else
934 return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
935 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
939 static KernelFunction dispatchKernel(const bool scLambdasOrAlphasDiffer,
940 const bool vdwInteractionTypeIsEwald,
941 const bool elecInteractionTypeIsEwald,
942 const bool vdwModifierIsPotSwitch,
943 const bool useSimd,
944 const interaction_const_t& ic)
946 if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
948 return (dispatchKernelOnScLambdasOrAlphasDifference<false>(scLambdasOrAlphasDiffer,
949 vdwInteractionTypeIsEwald,
950 elecInteractionTypeIsEwald,
951 vdwModifierIsPotSwitch,
952 useSimd));
954 else
956 return (dispatchKernelOnScLambdasOrAlphasDifference<true>(scLambdasOrAlphasDiffer,
957 vdwInteractionTypeIsEwald,
958 elecInteractionTypeIsEwald,
959 vdwModifierIsPotSwitch,
960 useSimd));
965 void gmx_nb_free_energy_kernel(const t_nblist* nlist,
966 rvec* xx,
967 gmx::ForceWithShiftForces* ff,
968 const t_forcerec* fr,
969 const t_mdatoms* mdatoms,
970 nb_kernel_data_t* kernel_data,
971 t_nrnb* nrnb)
973 const interaction_const_t& ic = *fr->ic;
974 GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == eelCUT || EEL_RF(ic.eeltype),
975 "Unsupported eeltype with free energy");
976 GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
978 const auto& scParams = *ic.softCoreParameters;
979 const bool vdwInteractionTypeIsEwald = (EVDW_PME(ic.vdwtype));
980 const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
981 const bool vdwModifierIsPotSwitch = (ic.vdw_modifier == eintmodPOTSWITCH);
982 bool scLambdasOrAlphasDiffer = true;
983 const bool useSimd = fr->use_simd_kernels;
985 if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
987 scLambdasOrAlphasDiffer = false;
989 else
991 if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW]
992 && scParams.alphaCoulomb == scParams.alphaVdw)
994 scLambdasOrAlphasDiffer = false;
998 KernelFunction kernelFunc;
999 kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer,
1000 vdwInteractionTypeIsEwald,
1001 elecInteractionTypeIsEwald,
1002 vdwModifierIsPotSwitch,
1003 useSimd,
1004 ic);
1005 kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);