prepareGpuKernelArguments() and launchGpuKernel() are added
[gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
bloba3c495c885ce4d993188cc24f0f14e5704693840
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "nb_free_energy.h"
41 #include <cmath>
43 #include <algorithm>
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forcerec.h"
51 #include "gromacs/mdtypes/md_enums.h"
52 #include "gromacs/utility/fatalerror.h"
54 void
55 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist,
56 rvec * gmx_restrict xx,
57 rvec * gmx_restrict ff,
58 t_forcerec * gmx_restrict fr,
59 const t_mdatoms * gmx_restrict mdatoms,
60 nb_kernel_data_t * gmx_restrict kernel_data,
61 t_nrnb * gmx_restrict nrnb)
64 #define STATE_A 0
65 #define STATE_B 1
66 #define NSTATES 2
67 int i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
68 real shX, shY, shZ;
69 real tx, ty, tz, Fscal;
70 double FscalC[NSTATES], FscalV[NSTATES]; /* Needs double for sc_power==48 */
71 double Vcoul[NSTATES], Vvdw[NSTATES]; /* Needs double for sc_power==48 */
72 real rinv6, r, rtC, rtV;
73 real iqA, iqB;
74 real qq[NSTATES], vctot;
75 int ntiA, ntiB, tj[NSTATES];
76 real Vvdw6, Vvdw12, vvtot;
77 real ix, iy, iz, fix, fiy, fiz;
78 real dx, dy, dz, rsq, rinv;
79 real c6[NSTATES], c12[NSTATES], c6grid;
80 real LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
81 double dvdl_coul, dvdl_vdw;
82 real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
83 real sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
84 double rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
85 real sigma2[NSTATES], sigma_pow[NSTATES];
86 int do_tab, tab_elemsize = 0;
87 int n0, n1C, n1V, nnn;
88 real Y, F, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
89 int icoul, ivdw;
90 int nri;
91 const int * iinr;
92 const int * jindex;
93 const int * jjnr;
94 const int * shift;
95 const int * gid;
96 const int * typeA;
97 const int * typeB;
98 int ntype;
99 const real * shiftvec;
100 real * fshift;
101 real tabscale = 0;
102 const real * VFtab = nullptr;
103 const real * x;
104 real * f;
105 real facel, krf, crf;
106 const real * chargeA;
107 const real * chargeB;
108 real sigma6_min, sigma6_def, lam_power, sc_r_power;
109 real alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
110 real ewcljrsq, ewclj, ewclj2, exponent, poly, vvdw_disp, vvdw_rep, sh_lj_ewald;
111 real ewclj6;
112 const real * nbfp, *nbfp_grid;
113 real * dvdl;
114 real * Vv;
115 real * Vc;
116 gmx_bool bDoForces, bDoShiftForces, bDoPotential;
117 real rcoulomb, rvdw, sh_invrc6;
118 gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll;
119 gmx_bool bEwald, bEwaldLJ;
120 real rcutoff_max2;
121 const real * tab_ewald_F_lj = nullptr;
122 const real * tab_ewald_V_lj = nullptr;
123 real d, d2, sw, dsw, rinvcorr;
124 real elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
125 real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
126 gmx_bool bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6;
127 gmx_bool bComputeVdwInteraction, bComputeElecInteraction;
128 const real * ewtab = nullptr;
129 int ewitab;
130 real ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
132 const real onetwelfth = 1.0/12.0;
133 const real onesixth = 1.0/6.0;
134 const real zero = 0.0;
135 const real half = 0.5;
136 const real one = 1.0;
137 const real two = 2.0;
138 const real six = 6.0;
139 const real fourtyeight = 48.0;
141 /* Extract pointer to non-bonded interaction constants */
142 const interaction_const_t *ic = fr->ic;
144 x = xx[0];
145 f = ff[0];
147 fshift = fr->fshift[0];
149 nri = nlist->nri;
150 iinr = nlist->iinr;
151 jindex = nlist->jindex;
152 jjnr = nlist->jjnr;
153 icoul = nlist->ielec;
154 ivdw = nlist->ivdw;
155 shift = nlist->shift;
156 gid = nlist->gid;
158 shiftvec = fr->shift_vec[0];
159 chargeA = mdatoms->chargeA;
160 chargeB = mdatoms->chargeB;
161 facel = fr->ic->epsfac;
162 krf = ic->k_rf;
163 crf = ic->c_rf;
164 Vc = kernel_data->energygrp_elec;
165 typeA = mdatoms->typeA;
166 typeB = mdatoms->typeB;
167 ntype = fr->ntype;
168 nbfp = fr->nbfp;
169 nbfp_grid = fr->ljpme_c6grid;
170 Vv = kernel_data->energygrp_vdw;
171 lambda_coul = kernel_data->lambda[efptCOUL];
172 lambda_vdw = kernel_data->lambda[efptVDW];
173 dvdl = kernel_data->dvdl;
174 alpha_coul = fr->sc_alphacoul;
175 alpha_vdw = fr->sc_alphavdw;
176 lam_power = fr->sc_power;
177 sc_r_power = fr->sc_r_power;
178 sigma6_def = fr->sc_sigma6_def;
179 sigma6_min = fr->sc_sigma6_min;
180 bDoForces = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
181 bDoShiftForces = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
182 bDoPotential = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
184 rcoulomb = ic->rcoulomb;
185 rvdw = ic->rvdw;
186 sh_invrc6 = ic->sh_invrc6;
187 sh_lj_ewald = ic->sh_lj_ewald;
188 ewclj = ic->ewaldcoeff_lj;
189 ewclj2 = ewclj*ewclj;
190 ewclj6 = ewclj2*ewclj2*ewclj2;
192 if (ic->coulomb_modifier == eintmodPOTSWITCH)
194 d = ic->rcoulomb - ic->rcoulomb_switch;
195 elec_swV3 = -10.0/(d*d*d);
196 elec_swV4 = 15.0/(d*d*d*d);
197 elec_swV5 = -6.0/(d*d*d*d*d);
198 elec_swF2 = -30.0/(d*d*d);
199 elec_swF3 = 60.0/(d*d*d*d);
200 elec_swF4 = -30.0/(d*d*d*d*d);
202 else
204 /* Avoid warnings from stupid compilers (looking at you, Clang!) */
205 elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
208 if (ic->vdw_modifier == eintmodPOTSWITCH)
210 d = ic->rvdw - ic->rvdw_switch;
211 vdw_swV3 = -10.0/(d*d*d);
212 vdw_swV4 = 15.0/(d*d*d*d);
213 vdw_swV5 = -6.0/(d*d*d*d*d);
214 vdw_swF2 = -30.0/(d*d*d);
215 vdw_swF3 = 60.0/(d*d*d*d);
216 vdw_swF4 = -30.0/(d*d*d*d*d);
218 else
220 /* Avoid warnings from stupid compilers (looking at you, Clang!) */
221 vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
224 if (fr->cutoff_scheme == ecutsVERLET)
226 const interaction_const_t *ic = fr->ic;
228 if (EVDW_PME(ic->vdwtype))
230 ivdw = GMX_NBKERNEL_VDW_LJEWALD;
232 else
234 ivdw = GMX_NBKERNEL_VDW_LENNARDJONES;
237 if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
239 icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
241 else if (EEL_PME_EWALD(ic->eeltype))
243 icoul = GMX_NBKERNEL_ELEC_EWALD;
245 else
247 gmx_incons("Unsupported eeltype with Verlet and free-energy");
250 bExactElecCutoff = TRUE;
251 bExactVdwCutoff = TRUE;
253 else
255 bExactElecCutoff = (ic->coulomb_modifier != eintmodNONE) || ic->eeltype == eelRF_ZERO;
256 bExactVdwCutoff = (ic->vdw_modifier != eintmodNONE);
259 bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
260 rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
261 rcutoff_max2 = rcutoff_max2*rcutoff_max2;
263 bEwald = (icoul == GMX_NBKERNEL_ELEC_EWALD);
264 bEwaldLJ = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
266 if (bEwald || bEwaldLJ)
268 sh_ewald = ic->sh_ewald;
269 ewtab = ic->tabq_coul_FDV0;
270 ewtabscale = ic->tabq_scale;
271 ewtabhalfspace = half/ewtabscale;
272 tab_ewald_F_lj = ic->tabq_vdw_F;
273 tab_ewald_V_lj = ic->tabq_vdw_V;
276 /* For Ewald/PME interactions we cannot easily apply the soft-core component to
277 * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
278 * can apply the small trick of subtracting the _reciprocal_ space contribution
279 * in this kernel, and instead apply the free energy interaction to the 1/r
280 * (standard coulomb) interaction.
282 * However, we cannot use this approach for switch-modified since we would then
283 * effectively end up evaluating a significantly different interaction here compared to the
284 * normal (non-free-energy) kernels, either by applying a cutoff at a different
285 * position than what the user requested, or by switching different
286 * things (1/r rather than short-range Ewald). For these settings, we just
287 * use the traditional short-range Ewald interaction in that case.
289 bConvertEwaldToCoulomb = (bEwald && (ic->coulomb_modifier != eintmodPOTSWITCH));
290 /* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0),
291 * but writing it this way means we stay in sync with coulomb, and it avoids future bugs.
293 bConvertLJEwaldToLJ6 = (bEwaldLJ && (ic->vdw_modifier != eintmodPOTSWITCH));
295 /* We currently don't implement exclusion correction, needed with the Verlet cut-off scheme, without conversion */
296 if (fr->cutoff_scheme == ecutsVERLET &&
297 ((bEwald && !bConvertEwaldToCoulomb) ||
298 (bEwaldLJ && !bConvertLJEwaldToLJ6)))
300 gmx_incons("Unimplemented non-bonded setup");
303 /* fix compiler warnings */
304 n1C = n1V = 0;
305 epsC = epsV = 0;
306 eps2C = eps2V = 0;
308 dvdl_coul = 0;
309 dvdl_vdw = 0;
311 /* Lambda factor for state A, 1-lambda*/
312 LFC[STATE_A] = one - lambda_coul;
313 LFV[STATE_A] = one - lambda_vdw;
315 /* Lambda factor for state B, lambda*/
316 LFC[STATE_B] = lambda_coul;
317 LFV[STATE_B] = lambda_vdw;
319 /*derivative of the lambda factor for state A and B */
320 DLF[STATE_A] = -1;
321 DLF[STATE_B] = 1;
323 for (i = 0; i < NSTATES; i++)
325 lfac_coul[i] = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
326 dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
327 lfac_vdw[i] = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
328 dlfac_vdw[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
330 /* precalculate */
331 sigma2_def = std::cbrt(sigma6_def);
332 sigma2_min = std::cbrt(sigma6_min);
334 /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
336 do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
337 ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
338 if (do_tab)
340 tabscale = kernel_data->table_elec_vdw->scale;
341 VFtab = kernel_data->table_elec_vdw->data;
342 /* we always use the combined table here */
343 tab_elemsize = kernel_data->table_elec_vdw->stride;
346 for (n = 0; (n < nri); n++)
348 int npair_within_cutoff;
350 npair_within_cutoff = 0;
352 is3 = 3*shift[n];
353 shX = shiftvec[is3];
354 shY = shiftvec[is3+1];
355 shZ = shiftvec[is3+2];
356 nj0 = jindex[n];
357 nj1 = jindex[n+1];
358 ii = iinr[n];
359 ii3 = 3*ii;
360 ix = shX + x[ii3+0];
361 iy = shY + x[ii3+1];
362 iz = shZ + x[ii3+2];
363 iqA = facel*chargeA[ii];
364 iqB = facel*chargeB[ii];
365 ntiA = 2*ntype*typeA[ii];
366 ntiB = 2*ntype*typeB[ii];
367 vctot = 0;
368 vvtot = 0;
369 fix = 0;
370 fiy = 0;
371 fiz = 0;
373 for (k = nj0; (k < nj1); k++)
375 jnr = jjnr[k];
376 j3 = 3*jnr;
377 dx = ix - x[j3];
378 dy = iy - x[j3+1];
379 dz = iz - x[j3+2];
380 rsq = dx*dx + dy*dy + dz*dz;
382 if (bExactCutoffAll && rsq >= rcutoff_max2)
384 /* We save significant time by skipping all code below.
385 * Note that with soft-core interactions, the actual cut-off
386 * check might be different. But since the soft-core distance
387 * is always larger than r, checking on r here is safe.
389 continue;
391 npair_within_cutoff++;
393 if (rsq > 0)
395 /* Note that unlike in the nbnxn kernels, we do not need
396 * to clamp the value of rsq before taking the invsqrt
397 * to avoid NaN in the LJ calculation, since here we do
398 * not calculate LJ interactions when C6 and C12 are zero.
401 rinv = gmx::invsqrt(rsq);
402 r = rsq*rinv;
404 else
406 /* The force at r=0 is zero, because of symmetry.
407 * But note that the potential is in general non-zero,
408 * since the soft-cored r will be non-zero.
410 rinv = 0;
411 r = 0;
414 if (sc_r_power == six)
416 rpm2 = rsq*rsq; /* r4 */
417 rp = rpm2*rsq; /* r6 */
419 else if (sc_r_power == fourtyeight)
421 rp = rsq*rsq*rsq; /* r6 */
422 rp = rp*rp; /* r12 */
423 rp = rp*rp; /* r24 */
424 rp = rp*rp; /* r48 */
425 rpm2 = rp/rsq; /* r46 */
427 else
429 rp = std::pow(r, sc_r_power); /* not currently supported as input, but can handle it */
430 rpm2 = rp/rsq;
433 Fscal = 0;
435 qq[STATE_A] = iqA*chargeA[jnr];
436 qq[STATE_B] = iqB*chargeB[jnr];
438 tj[STATE_A] = ntiA+2*typeA[jnr];
439 tj[STATE_B] = ntiB+2*typeB[jnr];
441 if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
443 c6[STATE_A] = nbfp[tj[STATE_A]];
444 c6[STATE_B] = nbfp[tj[STATE_B]];
446 for (i = 0; i < NSTATES; i++)
448 c12[i] = nbfp[tj[i]+1];
449 if ((c6[i] > 0) && (c12[i] > 0))
451 /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
452 sigma6[i] = half*c12[i]/c6[i];
453 sigma2[i] = std::cbrt(sigma6[i]);
454 /* should be able to get rid of cbrt call eventually. Will require agreement on
455 what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */
456 if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
458 sigma6[i] = sigma6_min;
459 sigma2[i] = sigma2_min;
462 else
464 sigma6[i] = sigma6_def;
465 sigma2[i] = sigma2_def;
467 if (sc_r_power == six)
469 sigma_pow[i] = sigma6[i];
471 else if (sc_r_power == fourtyeight)
473 sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
474 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
475 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
477 else
478 { /* not really supported as input, but in here for testing the general case*/
479 sigma_pow[i] = std::pow(sigma2[i], sc_r_power/2);
483 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
484 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
486 alpha_vdw_eff = 0;
487 alpha_coul_eff = 0;
489 else
491 alpha_vdw_eff = alpha_vdw;
492 alpha_coul_eff = alpha_coul;
495 for (i = 0; i < NSTATES; i++)
497 FscalC[i] = 0;
498 FscalV[i] = 0;
499 Vcoul[i] = 0;
500 Vvdw[i] = 0;
502 /* Only spend time on A or B state if it is non-zero */
503 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
505 /* this section has to be inside the loop because of the dependence on sigma_pow */
506 rpinvC = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
507 rinvC = std::pow(rpinvC, one/sc_r_power);
508 rC = one/rinvC;
510 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
511 rinvV = std::pow(rpinvV, one/sc_r_power);
512 rV = one/rinvV;
514 if (do_tab)
516 rtC = rC*tabscale;
517 n0 = rtC;
518 epsC = rtC-n0;
519 eps2C = epsC*epsC;
520 n1C = tab_elemsize*n0;
522 rtV = rV*tabscale;
523 n0 = rtV;
524 epsV = rtV-n0;
525 eps2V = epsV*epsV;
526 n1V = tab_elemsize*n0;
529 /* Only process the coulomb interactions if we have charges,
530 * and if we either include all entries in the list (no cutoff
531 * used in the kernel), or if we are within the cutoff.
533 bComputeElecInteraction = !bExactElecCutoff ||
534 ( bConvertEwaldToCoulomb && r < rcoulomb) ||
535 (!bConvertEwaldToCoulomb && rC < rcoulomb);
537 if ( (qq[i] != 0) && bComputeElecInteraction)
539 switch (icoul)
541 case GMX_NBKERNEL_ELEC_COULOMB:
542 /* simple cutoff */
543 Vcoul[i] = qq[i]*rinvC;
544 FscalC[i] = Vcoul[i];
545 /* The shift for the Coulomb potential is stored in
546 * the RF parameter c_rf, which is 0 without shift.
548 Vcoul[i] -= qq[i]*ic->c_rf;
549 break;
551 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
552 /* reaction-field */
553 Vcoul[i] = qq[i]*(rinvC + krf*rC*rC-crf);
554 FscalC[i] = qq[i]*(rinvC - two*krf*rC*rC);
555 break;
557 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
558 /* non-Ewald tabulated coulomb */
559 nnn = n1C;
560 Y = VFtab[nnn];
561 F = VFtab[nnn+1];
562 Geps = epsC*VFtab[nnn+2];
563 Heps2 = eps2C*VFtab[nnn+3];
564 Fp = F+Geps+Heps2;
565 VV = Y+epsC*Fp;
566 FF = Fp+Geps+two*Heps2;
567 Vcoul[i] = qq[i]*VV;
568 FscalC[i] = -qq[i]*tabscale*FF*rC;
569 break;
571 case GMX_NBKERNEL_ELEC_EWALD:
572 if (bConvertEwaldToCoulomb)
574 /* Ewald FEP is done only on the 1/r part */
575 Vcoul[i] = qq[i]*(rinvC-sh_ewald);
576 FscalC[i] = qq[i]*rinvC;
578 else
580 ewrt = rC*ewtabscale;
581 ewitab = static_cast<int>(ewrt);
582 eweps = ewrt-ewitab;
583 ewitab = 4*ewitab;
584 FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
585 rinvcorr = rinvC-sh_ewald;
586 Vcoul[i] = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
587 FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
589 break;
591 case GMX_NBKERNEL_ELEC_NONE:
592 FscalC[i] = zero;
593 Vcoul[i] = zero;
594 break;
596 default:
597 gmx_incons("Invalid icoul in free energy kernel");
598 break;
601 if (ic->coulomb_modifier == eintmodPOTSWITCH)
603 d = rC - ic->rcoulomb_switch;
604 d = (d > zero) ? d : zero;
605 d2 = d*d;
606 sw = one+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
607 dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
609 FscalC[i] = FscalC[i]*sw - rC*Vcoul[i]*dsw;
610 Vcoul[i] *= sw;
612 FscalC[i] = (rC < rcoulomb) ? FscalC[i] : zero;
613 Vcoul[i] = (rC < rcoulomb) ? Vcoul[i] : zero;
617 /* Only process the VDW interactions if we have
618 * some non-zero parameters, and if we either
619 * include all entries in the list (no cutoff used
620 * in the kernel), or if we are within the cutoff.
622 bComputeVdwInteraction = !bExactVdwCutoff ||
623 ( bConvertLJEwaldToLJ6 && r < rvdw) ||
624 (!bConvertLJEwaldToLJ6 && rV < rvdw);
625 if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
627 switch (ivdw)
629 case GMX_NBKERNEL_VDW_LENNARDJONES:
630 /* cutoff LJ */
631 if (sc_r_power == six)
633 rinv6 = rpinvV;
635 else
637 rinv6 = rinvV*rinvV;
638 rinv6 = rinv6*rinv6*rinv6;
640 Vvdw6 = c6[i]*rinv6;
641 Vvdw12 = c12[i]*rinv6*rinv6;
643 Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
644 - (Vvdw6 - c6[i]*sh_invrc6)*onesixth);
645 FscalV[i] = Vvdw12 - Vvdw6;
646 break;
648 case GMX_NBKERNEL_VDW_BUCKINGHAM:
649 gmx_fatal(FARGS, "Buckingham free energy not supported.");
650 break;
652 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
653 /* Table LJ */
654 nnn = n1V+4;
655 /* dispersion */
656 Y = VFtab[nnn];
657 F = VFtab[nnn+1];
658 Geps = epsV*VFtab[nnn+2];
659 Heps2 = eps2V*VFtab[nnn+3];
660 Fp = F+Geps+Heps2;
661 VV = Y+epsV*Fp;
662 FF = Fp+Geps+two*Heps2;
663 Vvdw[i] += c6[i]*VV;
664 FscalV[i] -= c6[i]*tabscale*FF*rV;
666 /* repulsion */
667 Y = VFtab[nnn+4];
668 F = VFtab[nnn+5];
669 Geps = epsV*VFtab[nnn+6];
670 Heps2 = eps2V*VFtab[nnn+7];
671 Fp = F+Geps+Heps2;
672 VV = Y+epsV*Fp;
673 FF = Fp+Geps+two*Heps2;
674 Vvdw[i] += c12[i]*VV;
675 FscalV[i] -= c12[i]*tabscale*FF*rV;
676 break;
678 case GMX_NBKERNEL_VDW_LJEWALD:
679 if (sc_r_power == six)
681 rinv6 = rpinvV;
683 else
685 rinv6 = rinvV*rinvV;
686 rinv6 = rinv6*rinv6*rinv6;
688 c6grid = nbfp_grid[tj[i]];
690 if (bConvertLJEwaldToLJ6)
692 /* cutoff LJ */
693 Vvdw6 = c6[i]*rinv6;
694 Vvdw12 = c12[i]*rinv6*rinv6;
696 Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
697 - (Vvdw6 - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)*onesixth);
698 FscalV[i] = Vvdw12 - Vvdw6;
700 else
702 /* Normal LJ-PME */
703 ewcljrsq = ewclj2*rV*rV;
704 exponent = std::exp(-ewcljrsq);
705 poly = exponent*(one + ewcljrsq + ewcljrsq*ewcljrsq*half);
706 vvdw_disp = (c6[i]-c6grid*(one-poly))*rinv6;
707 vvdw_rep = c12[i]*rinv6*rinv6;
708 FscalV[i] = vvdw_rep - vvdw_disp - c6grid*onesixth*exponent*ewclj6;
709 Vvdw[i] = (vvdw_rep - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth - (vvdw_disp - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)/six;
711 break;
713 case GMX_NBKERNEL_VDW_NONE:
714 Vvdw[i] = zero;
715 FscalV[i] = zero;
716 break;
718 default:
719 gmx_incons("Invalid ivdw in free energy kernel");
720 break;
723 if (ic->vdw_modifier == eintmodPOTSWITCH)
725 d = rV - ic->rvdw_switch;
726 d = (d > zero) ? d : zero;
727 d2 = d*d;
728 sw = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
729 dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
731 FscalV[i] = FscalV[i]*sw - rV*Vvdw[i]*dsw;
732 Vvdw[i] *= sw;
734 FscalV[i] = (rV < rvdw) ? FscalV[i] : zero;
735 Vvdw[i] = (rV < rvdw) ? Vvdw[i] : zero;
739 /* FscalC (and FscalV) now contain: dV/drC * rC
740 * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
741 * Further down we first multiply by r^p-2 and then by
742 * the vector r, which in total gives: dV/drC * (r/rC)^1-p
744 FscalC[i] *= rpinvC;
745 FscalV[i] *= rpinvV;
749 /* Assemble A and B states */
750 for (i = 0; i < NSTATES; i++)
752 vctot += LFC[i]*Vcoul[i];
753 vvtot += LFV[i]*Vvdw[i];
755 Fscal += LFC[i]*FscalC[i]*rpm2;
756 Fscal += LFV[i]*FscalV[i]*rpm2;
758 dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
759 dvdl_vdw += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
762 else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
764 /* For excluded pairs, which are only in this pair list when
765 * using the Verlet scheme, we don't use soft-core.
766 * The group scheme also doesn't soft-core for these.
767 * As there is no singularity, there is no need for soft-core.
769 VV = krf*rsq - crf;
770 FF = -two*krf;
772 if (ii == jnr)
774 VV *= half;
777 for (i = 0; i < NSTATES; i++)
779 vctot += LFC[i]*qq[i]*VV;
780 Fscal += LFC[i]*qq[i]*FF;
781 dvdl_coul += DLF[i]*qq[i]*VV;
785 if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) )
787 /* See comment in the preamble. When using Ewald interactions
788 * (unless we use a switch modifier) we subtract the reciprocal-space
789 * Ewald component here which made it possible to apply the free
790 * energy interaction to 1/r (vanilla coulomb short-range part)
791 * above. This gets us closer to the ideal case of applying
792 * the softcore to the entire electrostatic interaction,
793 * including the reciprocal-space component.
795 real v_lr, f_lr;
797 ewrt = r*ewtabscale;
798 ewitab = static_cast<int>(ewrt);
799 eweps = ewrt-ewitab;
800 ewitab = 4*ewitab;
801 f_lr = ewtab[ewitab]+eweps*ewtab[ewitab+1];
802 v_lr = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
803 f_lr *= rinv;
805 /* Note that any possible Ewald shift has already been applied in
806 * the normal interaction part above.
809 if (ii == jnr)
811 /* If we get here, the i particle (ii) has itself (jnr)
812 * in its neighborlist. This can only happen with the Verlet
813 * scheme, and corresponds to a self-interaction that will
814 * occur twice. Scale it down by 50% to only include it once.
816 v_lr *= half;
819 for (i = 0; i < NSTATES; i++)
821 vctot -= LFC[i]*qq[i]*v_lr;
822 Fscal -= LFC[i]*qq[i]*f_lr;
823 dvdl_coul -= (DLF[i]*qq[i])*v_lr;
827 if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw))
829 /* See comment in the preamble. When using LJ-Ewald interactions
830 * (unless we use a switch modifier) we subtract the reciprocal-space
831 * Ewald component here which made it possible to apply the free
832 * energy interaction to r^-6 (vanilla LJ6 short-range part)
833 * above. This gets us closer to the ideal case of applying
834 * the softcore to the entire VdW interaction,
835 * including the reciprocal-space component.
837 /* We could also use the analytical form here
838 * iso a table, but that can cause issues for
839 * r close to 0 for non-interacting pairs.
841 real rs, frac, f_lr;
842 int ri;
844 rs = rsq*rinv*ewtabscale;
845 ri = static_cast<int>(rs);
846 frac = rs - ri;
847 f_lr = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
848 /* TODO: Currently the Ewald LJ table does not contain
849 * the factor 1/6, we should add this.
851 FF = f_lr*rinv/six;
852 VV = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
854 if (ii == jnr)
856 /* If we get here, the i particle (ii) has itself (jnr)
857 * in its neighborlist. This can only happen with the Verlet
858 * scheme, and corresponds to a self-interaction that will
859 * occur twice. Scale it down by 50% to only include it once.
861 VV *= half;
864 for (i = 0; i < NSTATES; i++)
866 c6grid = nbfp_grid[tj[i]];
867 vvtot += LFV[i]*c6grid*VV;
868 Fscal += LFV[i]*c6grid*FF;
869 dvdl_vdw += (DLF[i]*c6grid)*VV;
873 if (bDoForces)
875 tx = Fscal*dx;
876 ty = Fscal*dy;
877 tz = Fscal*dz;
878 fix = fix + tx;
879 fiy = fiy + ty;
880 fiz = fiz + tz;
881 /* OpenMP atomics are expensive, but this kernels is also
882 * expensive, so we can take this hit, instead of using
883 * thread-local output buffers and extra reduction.
885 * All the OpenMP regions in this file are trivial and should
886 * not throw, so no need for try/catch.
888 #pragma omp atomic
889 f[j3] -= tx;
890 #pragma omp atomic
891 f[j3+1] -= ty;
892 #pragma omp atomic
893 f[j3+2] -= tz;
897 /* The atomics below are expensive with many OpenMP threads.
898 * Here unperturbed i-particles will usually only have a few
899 * (perturbed) j-particles in the list. Thus with a buffered list
900 * we can skip a significant number of i-reductions with a check.
902 if (npair_within_cutoff > 0)
904 if (bDoForces)
906 #pragma omp atomic
907 f[ii3] += fix;
908 #pragma omp atomic
909 f[ii3+1] += fiy;
910 #pragma omp atomic
911 f[ii3+2] += fiz;
913 if (bDoShiftForces)
915 #pragma omp atomic
916 fshift[is3] += fix;
917 #pragma omp atomic
918 fshift[is3+1] += fiy;
919 #pragma omp atomic
920 fshift[is3+2] += fiz;
922 if (bDoPotential)
924 ggid = gid[n];
925 #pragma omp atomic
926 Vc[ggid] += vctot;
927 #pragma omp atomic
928 Vv[ggid] += vvtot;
933 #pragma omp atomic
934 dvdl[efptCOUL] += dvdl_coul;
935 #pragma omp atomic
936 dvdl[efptVDW] += dvdl_vdw;
938 /* Estimate flops, average for free energy stuff:
939 * 12 flops per outer iteration
940 * 150 flops per inner iteration
942 #pragma omp atomic
943 inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);