Free-energy support for LJ-PME
[gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
blobf09b0069ba637919e405a4577c404548dc6e6dbf
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
41 #include <math.h>
43 #include "vec.h"
44 #include "typedefs.h"
45 #include "nonbonded.h"
46 #include "nb_kernel.h"
47 #include "nrnb.h"
48 #include "macros.h"
49 #include "nb_free_energy.h"
51 void
52 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist,
53 rvec * gmx_restrict xx,
54 rvec * gmx_restrict ff,
55 t_forcerec * gmx_restrict fr,
56 const t_mdatoms * gmx_restrict mdatoms,
57 nb_kernel_data_t * gmx_restrict kernel_data,
58 t_nrnb * gmx_restrict nrnb)
61 #define STATE_A 0
62 #define STATE_B 1
63 #define NSTATES 2
64 int i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
65 real shX, shY, shZ;
66 real Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
67 real Vcoul[NSTATES], Vvdw[NSTATES];
68 real rinv6, r, rt, rtC, rtV;
69 real iqA, iqB;
70 real qq[NSTATES], vctot, krsq;
71 int ntiA, ntiB, tj[NSTATES];
72 real Vvdw6, Vvdw12, vvtot;
73 real ix, iy, iz, fix, fiy, fiz;
74 real dx, dy, dz, rsq, rinv;
75 real c6[NSTATES], c12[NSTATES], c6grid[NSTATES];
76 real LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
77 double dvdl_coul, dvdl_vdw;
78 real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
79 real sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
80 real rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
81 real sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
82 int do_tab, tab_elemsize;
83 int n0, n1C, n1V, nnn;
84 real Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
85 int icoul, ivdw;
86 int nri;
87 const int * iinr;
88 const int * jindex;
89 const int * jjnr;
90 const int * shift;
91 const int * gid;
92 const int * typeA;
93 const int * typeB;
94 int ntype;
95 const real * shiftvec;
96 real dvdl_part;
97 real * fshift;
98 real tabscale = 0;
99 const real * VFtab = NULL;
100 const real * x;
101 real * f;
102 real facel, krf, crf;
103 const real * chargeA;
104 const real * chargeB;
105 real sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
106 real alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj;
107 const real * nbfp, *nbfp_grid;
108 real * dvdl;
109 real * Vv;
110 real * Vc;
111 gmx_bool bDoForces, bDoShiftForces, bDoPotential;
112 real rcoulomb, sh_ewald;
113 real rvdw, sh_invrc6;
114 gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll, bEwald;
115 real rcutoff_max2;
116 real rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
117 const real * tab_ewald_F;
118 const real * tab_ewald_V;
119 const real * tab_ewald_F_lj;
120 const real * tab_ewald_V_lj;
121 real tab_ewald_scale, tab_ewald_halfsp;
123 x = xx[0];
124 f = ff[0];
126 fshift = fr->fshift[0];
128 nri = nlist->nri;
129 iinr = nlist->iinr;
130 jindex = nlist->jindex;
131 jjnr = nlist->jjnr;
132 icoul = nlist->ielec;
133 ivdw = nlist->ivdw;
134 shift = nlist->shift;
135 gid = nlist->gid;
137 shiftvec = fr->shift_vec[0];
138 chargeA = mdatoms->chargeA;
139 chargeB = mdatoms->chargeB;
140 facel = fr->epsfac;
141 krf = fr->k_rf;
142 crf = fr->c_rf;
143 ewc_lj = fr->ewaldcoeff_lj;
144 Vc = kernel_data->energygrp_elec;
145 typeA = mdatoms->typeA;
146 typeB = mdatoms->typeB;
147 ntype = fr->ntype;
148 nbfp = fr->nbfp;
149 nbfp_grid = fr->ljpme_c6grid;
150 Vv = kernel_data->energygrp_vdw;
151 lambda_coul = kernel_data->lambda[efptCOUL];
152 lambda_vdw = kernel_data->lambda[efptVDW];
153 dvdl = kernel_data->dvdl;
154 alpha_coul = fr->sc_alphacoul;
155 alpha_vdw = fr->sc_alphavdw;
156 lam_power = fr->sc_power;
157 sc_r_power = fr->sc_r_power;
158 sigma6_def = fr->sc_sigma6_def;
159 sigma6_min = fr->sc_sigma6_min;
160 bDoForces = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
161 bDoShiftForces = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
162 bDoPotential = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
164 rcoulomb = fr->rcoulomb;
165 sh_ewald = fr->ic->sh_ewald;
166 rvdw = fr->rvdw;
167 sh_invrc6 = fr->ic->sh_invrc6;
169 /* Ewald (PME) reciprocal force and energy quadratic spline tables */
170 tab_ewald_F = fr->ic->tabq_coul_F;
171 tab_ewald_V = fr->ic->tabq_coul_V;
172 tab_ewald_scale = fr->ic->tabq_scale;
173 tab_ewald_F_lj = fr->ic->tabq_vdw_F;
174 tab_ewald_V_lj = fr->ic->tabq_vdw_V;
175 tab_ewald_halfsp = 0.5/tab_ewald_scale;
177 if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
179 rcutoff = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
180 rcutoff2 = rcutoff*rcutoff;
181 rswitch = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
182 d = rcutoff-rswitch;
183 swV3 = -10.0/(d*d*d);
184 swV4 = 15.0/(d*d*d*d);
185 swV5 = -6.0/(d*d*d*d*d);
186 swF2 = -30.0/(d*d*d);
187 swF3 = 60.0/(d*d*d*d);
188 swF4 = -30.0/(d*d*d*d*d);
190 else
192 /* Stupid compilers dont realize these variables will not be used */
193 rswitch = 0.0;
194 swV3 = 0.0;
195 swV4 = 0.0;
196 swV5 = 0.0;
197 swF2 = 0.0;
198 swF3 = 0.0;
199 swF4 = 0.0;
202 if (fr->cutoff_scheme == ecutsVERLET)
204 const interaction_const_t *ic;
206 ic = fr->ic;
207 if (EVDW_PME(ic->vdwtype))
209 ivdw = GMX_NBKERNEL_VDW_LJEWALD;
211 else
213 ivdw = GMX_NBKERNEL_VDW_LENNARDJONES;
216 if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
218 icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
220 else if (EEL_PME_EWALD(ic->eeltype))
222 icoul = GMX_NBKERNEL_ELEC_EWALD;
224 else
226 gmx_incons("Unsupported eeltype with Verlet and free-energy");
229 bExactElecCutoff = TRUE;
230 bExactVdwCutoff = TRUE;
232 else
234 bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
235 bExactVdwCutoff = (fr->vdw_modifier != eintmodNONE);
238 bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
239 rcutoff_max2 = max(fr->rcoulomb, fr->rvdw);
240 rcutoff_max2 = rcutoff_max2*rcutoff_max2;
242 bEwald = (icoul == GMX_NBKERNEL_ELEC_EWALD);
244 /* fix compiler warnings */
245 nj1 = 0;
246 n1C = n1V = 0;
247 epsC = epsV = 0;
248 eps2C = eps2V = 0;
250 dvdl_coul = 0;
251 dvdl_vdw = 0;
253 /* Lambda factor for state A, 1-lambda*/
254 LFC[STATE_A] = 1.0 - lambda_coul;
255 LFV[STATE_A] = 1.0 - lambda_vdw;
257 /* Lambda factor for state B, lambda*/
258 LFC[STATE_B] = lambda_coul;
259 LFV[STATE_B] = lambda_vdw;
261 /*derivative of the lambda factor for state A and B */
262 DLF[STATE_A] = -1;
263 DLF[STATE_B] = 1;
265 for (i = 0; i < NSTATES; i++)
267 lfac_coul[i] = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
268 dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
269 lfac_vdw[i] = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
270 dlfac_vdw[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
272 /* precalculate */
273 sigma2_def = pow(sigma6_def, 1.0/3.0);
274 sigma2_min = pow(sigma6_min, 1.0/3.0);
276 /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
278 do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
279 ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
280 if (do_tab)
282 tabscale = kernel_data->table_elec_vdw->scale;
283 VFtab = kernel_data->table_elec_vdw->data;
284 /* we always use the combined table here */
285 tab_elemsize = 12;
288 for (n = 0; (n < nri); n++)
290 int npair_within_cutoff;
292 npair_within_cutoff = 0;
294 is3 = 3*shift[n];
295 shX = shiftvec[is3];
296 shY = shiftvec[is3+1];
297 shZ = shiftvec[is3+2];
298 nj0 = jindex[n];
299 nj1 = jindex[n+1];
300 ii = iinr[n];
301 ii3 = 3*ii;
302 ix = shX + x[ii3+0];
303 iy = shY + x[ii3+1];
304 iz = shZ + x[ii3+2];
305 iqA = facel*chargeA[ii];
306 iqB = facel*chargeB[ii];
307 ntiA = 2*ntype*typeA[ii];
308 ntiB = 2*ntype*typeB[ii];
309 vctot = 0;
310 vvtot = 0;
311 fix = 0;
312 fiy = 0;
313 fiz = 0;
315 for (k = nj0; (k < nj1); k++)
317 jnr = jjnr[k];
318 j3 = 3*jnr;
319 dx = ix - x[j3];
320 dy = iy - x[j3+1];
321 dz = iz - x[j3+2];
322 rsq = dx*dx + dy*dy + dz*dz;
324 if (bExactCutoffAll && rsq >= rcutoff_max2)
326 /* We save significant time by skipping all code below.
327 * Note that with soft-core interactions, the actual cut-off
328 * check might be different. But since the soft-core distance
329 * is always larger than r, checking on r here is safe.
331 continue;
333 npair_within_cutoff++;
335 if (rsq > 0)
337 rinv = gmx_invsqrt(rsq);
338 r = rsq*rinv;
340 else
342 /* The force at r=0 is zero, because of symmetry.
343 * But note that the potential is in general non-zero,
344 * since the soft-cored r will be non-zero.
346 rinv = 0;
347 r = 0;
350 if (sc_r_power == 6.0)
352 rpm2 = rsq*rsq; /* r4 */
353 rp = rpm2*rsq; /* r6 */
355 else if (sc_r_power == 48.0)
357 rp = rsq*rsq*rsq; /* r6 */
358 rp = rp*rp; /* r12 */
359 rp = rp*rp; /* r24 */
360 rp = rp*rp; /* r48 */
361 rpm2 = rp/rsq; /* r46 */
363 else
365 rp = pow(r, sc_r_power); /* not currently supported as input, but can handle it */
366 rpm2 = rp/rsq;
369 Fscal = 0;
371 qq[STATE_A] = iqA*chargeA[jnr];
372 qq[STATE_B] = iqB*chargeB[jnr];
374 tj[STATE_A] = ntiA+2*typeA[jnr];
375 tj[STATE_B] = ntiB+2*typeB[jnr];
377 if (ivdw == GMX_NBKERNEL_VDW_LJEWALD)
379 c6grid[STATE_A] = nbfp_grid[tj[STATE_A]];
380 c6grid[STATE_B] = nbfp_grid[tj[STATE_B]];
383 if (nlist->excl_fep == NULL || nlist->excl_fep[k])
385 c6[STATE_A] = nbfp[tj[STATE_A]];
386 c6[STATE_B] = nbfp[tj[STATE_B]];
388 for (i = 0; i < NSTATES; i++)
390 c12[i] = nbfp[tj[i]+1];
391 if ((c6[i] > 0) && (c12[i] > 0))
393 /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
394 sigma6[i] = 0.5*c12[i]/c6[i];
395 sigma2[i] = pow(sigma6[i], 1.0/3.0);
396 /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
397 what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */
398 if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
400 sigma6[i] = sigma6_min;
401 sigma2[i] = sigma2_min;
404 else
406 sigma6[i] = sigma6_def;
407 sigma2[i] = sigma2_def;
409 if (sc_r_power == 6.0)
411 sigma_pow[i] = sigma6[i];
412 sigma_powm2[i] = sigma6[i]/sigma2[i];
414 else if (sc_r_power == 48.0)
416 sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
417 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
418 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
419 sigma_powm2[i] = sigma_pow[i]/sigma2[i];
421 else
422 { /* not really supported as input, but in here for testing the general case*/
423 sigma_pow[i] = pow(sigma2[i], sc_r_power/2);
424 sigma_powm2[i] = sigma_pow[i]/(sigma2[i]);
428 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
429 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
431 alpha_vdw_eff = 0;
432 alpha_coul_eff = 0;
434 else
436 alpha_vdw_eff = alpha_vdw;
437 alpha_coul_eff = alpha_coul;
440 for (i = 0; i < NSTATES; i++)
442 FscalC[i] = 0;
443 FscalV[i] = 0;
444 Vcoul[i] = 0;
445 Vvdw[i] = 0;
447 /* Only spend time on A or B state if it is non-zero */
448 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
450 /* this section has to be inside the loop because of the dependence on sigma_pow */
451 rpinvC = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
452 rinvC = pow(rpinvC, 1.0/sc_r_power);
453 rC = 1.0/rinvC;
455 rpinvV = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
456 rinvV = pow(rpinvV, 1.0/sc_r_power);
457 rV = 1.0/rinvV;
459 if (do_tab)
461 rtC = rC*tabscale;
462 n0 = rtC;
463 epsC = rtC-n0;
464 eps2C = epsC*epsC;
465 n1C = tab_elemsize*n0;
467 rtV = rV*tabscale;
468 n0 = rtV;
469 epsV = rtV-n0;
470 eps2V = epsV*epsV;
471 n1V = tab_elemsize*n0;
474 /* With Ewald and soft-core we should put the cut-off on r,
475 * not on the soft-cored rC, as the real-space and
476 * reciprocal space contributions should (almost) cancel.
478 if (qq[i] != 0 &&
479 !(bExactElecCutoff &&
480 ((!bEwald && rC >= rcoulomb) ||
481 (bEwald && r >= rcoulomb))))
483 switch (icoul)
485 case GMX_NBKERNEL_ELEC_COULOMB:
486 /* simple cutoff */
487 Vcoul[i] = qq[i]*rinvC;
488 FscalC[i] = Vcoul[i];
489 break;
491 case GMX_NBKERNEL_ELEC_EWALD:
492 /* Ewald FEP is done only on the 1/r part */
493 Vcoul[i] = qq[i]*(rinvC - sh_ewald);
494 FscalC[i] = Vcoul[i];
495 break;
497 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
498 /* reaction-field */
499 Vcoul[i] = qq[i]*(rinvC + krf*rC*rC-crf);
500 FscalC[i] = qq[i]*(rinvC - 2.0*krf*rC*rC);
501 break;
503 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
504 /* non-Ewald tabulated coulomb */
505 nnn = n1C;
506 Y = VFtab[nnn];
507 F = VFtab[nnn+1];
508 Geps = epsC*VFtab[nnn+2];
509 Heps2 = eps2C*VFtab[nnn+3];
510 Fp = F+Geps+Heps2;
511 VV = Y+epsC*Fp;
512 FF = Fp+Geps+2.0*Heps2;
513 Vcoul[i] = qq[i]*VV;
514 FscalC[i] = -qq[i]*tabscale*FF*rC;
515 break;
517 case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
518 gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
519 break;
521 case GMX_NBKERNEL_ELEC_NONE:
522 FscalC[i] = 0.0;
523 Vcoul[i] = 0.0;
524 break;
526 default:
527 gmx_incons("Invalid icoul in free energy kernel");
528 break;
531 if (fr->coulomb_modifier == eintmodPOTSWITCH)
533 d = rC-rswitch;
534 d = (d > 0.0) ? d : 0.0;
535 d2 = d*d;
536 sw = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
537 dsw = d2*(swF2+d*(swF3+d*swF4));
539 Vcoul[i] *= sw;
540 FscalC[i] = FscalC[i]*sw + Vcoul[i]*dsw;
544 if ((c6[i] != 0 || c12[i] != 0) &&
545 !(bExactVdwCutoff &&
546 ((ivdw != GMX_NBKERNEL_VDW_LJEWALD && rV >= rvdw) ||
547 (ivdw == GMX_NBKERNEL_VDW_LJEWALD && r >= rvdw))))
549 switch (ivdw)
551 case GMX_NBKERNEL_VDW_LENNARDJONES:
552 case GMX_NBKERNEL_VDW_LJEWALD:
553 /* cutoff LJ */
554 if (sc_r_power == 6.0)
556 rinv6 = rpinvV;
558 else
560 rinv6 = pow(rinvV, 6.0);
562 Vvdw6 = c6[i]*rinv6;
563 Vvdw12 = c12[i]*rinv6*rinv6;
564 if (fr->vdw_modifier == eintmodPOTSHIFT)
566 Vvdw[i] = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
567 -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
569 else
571 Vvdw[i] = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
573 FscalV[i] = Vvdw12 - Vvdw6;
574 break;
576 case GMX_NBKERNEL_VDW_BUCKINGHAM:
577 gmx_fatal(FARGS, "Buckingham free energy not supported.");
578 break;
580 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
581 /* Table LJ */
582 nnn = n1V+4;
583 /* dispersion */
584 Y = VFtab[nnn];
585 F = VFtab[nnn+1];
586 Geps = epsV*VFtab[nnn+2];
587 Heps2 = eps2V*VFtab[nnn+3];
588 Fp = F+Geps+Heps2;
589 VV = Y+epsV*Fp;
590 FF = Fp+Geps+2.0*Heps2;
591 Vvdw[i] += c6[i]*VV;
592 FscalV[i] -= c6[i]*tabscale*FF*rV;
594 /* repulsion */
595 Y = VFtab[nnn+4];
596 F = VFtab[nnn+5];
597 Geps = epsV*VFtab[nnn+6];
598 Heps2 = eps2V*VFtab[nnn+7];
599 Fp = F+Geps+Heps2;
600 VV = Y+epsV*Fp;
601 FF = Fp+Geps+2.0*Heps2;
602 Vvdw[i] += c12[i]*VV;
603 FscalV[i] -= c12[i]*tabscale*FF*rV;
604 break;
606 case GMX_NBKERNEL_VDW_NONE:
607 Vvdw[i] = 0.0;
608 FscalV[i] = 0.0;
609 break;
611 default:
612 gmx_incons("Invalid ivdw in free energy kernel");
613 break;
616 if (fr->vdw_modifier == eintmodPOTSWITCH)
618 d = rV-rswitch;
619 d = (d > 0.0) ? d : 0.0;
620 d2 = d*d;
621 sw = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
622 dsw = d2*(swF2+d*(swF3+d*swF4));
624 Vvdw[i] *= sw;
625 FscalV[i] = FscalV[i]*sw + Vvdw[i]*dsw;
627 FscalV[i] = (rV < rvdw) ? FscalV[i] : 0.0;
628 Vvdw[i] = (rV < rvdw) ? Vvdw[i] : 0.0;
632 /* FscalC (and FscalV) now contain: dV/drC * rC
633 * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
634 * Further down we first multiply by r^p-2 and then by
635 * the vector r, which in total gives: dV/drC * (r/rC)^1-p
637 FscalC[i] *= rpinvC;
638 FscalV[i] *= rpinvV;
642 /* Assemble A and B states */
643 for (i = 0; i < NSTATES; i++)
645 vctot += LFC[i]*Vcoul[i];
646 vvtot += LFV[i]*Vvdw[i];
648 Fscal += LFC[i]*FscalC[i]*rpm2;
649 Fscal += LFV[i]*FscalV[i]*rpm2;
651 dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
652 dvdl_vdw += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
655 else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
657 /* For excluded pairs, which are only in this pair list when
658 * using the Verlet scheme, we don't use soft-core.
659 * The group scheme also doesn't soft-core for these.
660 * As there is no singularity, there is no need for soft-core.
662 VV = krf*rsq - crf;
663 FF = -2.0*krf;
665 if (ii == jnr)
667 VV *= 0.5;
670 for (i = 0; i < NSTATES; i++)
672 vctot += LFC[i]*qq[i]*VV;
673 Fscal += LFC[i]*qq[i]*FF;
674 dvdl_coul += DLF[i]*qq[i]*VV;
678 if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
679 !(bExactElecCutoff && r >= rcoulomb))
681 /* Because we compute the soft-core normally,
682 * we have to remove the Ewald short range portion.
683 * Done outside of the states loop because this part
684 * doesn't depend on the scaled R.
686 real rs, frac, f_lr;
687 int ri;
689 rs = rsq*rinv*tab_ewald_scale;
690 ri = (int)rs;
691 frac = rs - ri;
692 f_lr = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1];
693 FF = f_lr*rinv;
694 VV = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr);
696 if (ii == jnr)
698 VV *= 0.5;
701 for (i = 0; i < NSTATES; i++)
703 vctot -= LFC[i]*qq[i]*VV;
704 Fscal -= LFC[i]*qq[i]*FF;
705 dvdl_coul -= (DLF[i]*qq[i])*VV;
709 if (ivdw == GMX_NBKERNEL_VDW_LJEWALD &&
710 !(bExactVdwCutoff && r >= rvdw))
712 real rs, frac, f_lr;
713 int ri;
715 rs = rsq*rinv*tab_ewald_scale;
716 ri = (int)rs;
717 frac = rs - ri;
718 f_lr = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
719 FF = f_lr*rinv;
720 VV = tab_ewald_V_lj[ri] - tab_ewald_halfsp*frac*(tab_ewald_F_lj[ri] + f_lr);
721 for (i = 0; i < NSTATES; i++)
723 vvtot += LFV[i]*c6grid[i]*VV*(1.0/6.0);
724 Fscal += LFV[i]*c6grid[i]*FF*(1.0/6.0);
725 dvdl_vdw += (DLF[i]*c6grid[i])*VV*(1.0/6.0);
730 if (bDoForces)
732 tx = Fscal*dx;
733 ty = Fscal*dy;
734 tz = Fscal*dz;
735 fix = fix + tx;
736 fiy = fiy + ty;
737 fiz = fiz + tz;
738 /* OpenMP atomics are expensive, but this kernels is also
739 * expensive, so we can take this hit, instead of using
740 * thread-local output buffers and extra reduction.
742 #pragma omp atomic
743 f[j3] -= tx;
744 #pragma omp atomic
745 f[j3+1] -= ty;
746 #pragma omp atomic
747 f[j3+2] -= tz;
751 /* The atomics below are expensive with many OpenMP threads.
752 * Here unperturbed i-particles will usually only have a few
753 * (perturbed) j-particles in the list. Thus with a buffered list
754 * we can skip a significant number of i-reductions with a check.
756 if (npair_within_cutoff > 0)
758 if (bDoForces)
760 #pragma omp atomic
761 f[ii3] += fix;
762 #pragma omp atomic
763 f[ii3+1] += fiy;
764 #pragma omp atomic
765 f[ii3+2] += fiz;
767 if (bDoShiftForces)
769 #pragma omp atomic
770 fshift[is3] += fix;
771 #pragma omp atomic
772 fshift[is3+1] += fiy;
773 #pragma omp atomic
774 fshift[is3+2] += fiz;
776 if (bDoPotential)
778 ggid = gid[n];
779 #pragma omp atomic
780 Vc[ggid] += vctot;
781 #pragma omp atomic
782 Vv[ggid] += vvtot;
787 #pragma omp atomic
788 dvdl[efptCOUL] += dvdl_coul;
789 #pragma omp atomic
790 dvdl[efptVDW] += dvdl_vdw;
792 /* Estimate flops, average for free energy stuff:
793 * 12 flops per outer iteration
794 * 150 flops per inner iteration
796 inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
799 real
800 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
801 real tabscale, real *vftab,
802 real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
803 real LFC[2], real LFV[2], real DLF[2],
804 real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
805 real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
806 real *velectot, real *vvdwtot, real *dvdl)
808 real r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
809 real qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
810 real alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
811 real rpinv, r_coul, r_vdw, velecsum, vvdwsum;
812 real fscal_vdw[2], fscal_elec[2];
813 real velec[2], vvdw[2];
814 int i, ntab;
816 qq[0] = qqA;
817 qq[1] = qqB;
818 c6[0] = c6A;
819 c6[1] = c6B;
820 c12[0] = c12A;
821 c12[1] = c12B;
823 if (sc_r_power == 6.0)
825 rpm2 = r2*r2; /* r4 */
826 rp = rpm2*r2; /* r6 */
828 else if (sc_r_power == 48.0)
830 rp = r2*r2*r2; /* r6 */
831 rp = rp*rp; /* r12 */
832 rp = rp*rp; /* r24 */
833 rp = rp*rp; /* r48 */
834 rpm2 = rp/r2; /* r46 */
836 else
838 rp = pow(r2, 0.5*sc_r_power); /* not currently supported as input, but can handle it */
839 rpm2 = rp/r2;
842 /* Loop over state A(0) and B(1) */
843 for (i = 0; i < 2; i++)
845 if ((c6[i] > 0) && (c12[i] > 0))
847 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
848 * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
850 sigma6[i] = 0.5*c12[i]/c6[i];
851 sigma2[i] = pow(0.5*c12[i]/c6[i], 1.0/3.0);
852 /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
853 what data to store externally. Can't be fixed without larger scale changes, so not 5.0 */
854 if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
856 sigma6[i] = sigma6_min;
857 sigma2[i] = sigma2_min;
860 else
862 sigma6[i] = sigma6_def;
863 sigma2[i] = sigma2_def;
865 if (sc_r_power == 6.0)
867 sigma_pow[i] = sigma6[i];
868 sigma_powm2[i] = sigma6[i]/sigma2[i];
870 else if (sc_r_power == 48.0)
872 sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
873 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
874 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
875 sigma_powm2[i] = sigma_pow[i]/sigma2[i];
877 else
878 { /* not really supported as input, but in here for testing the general case*/
879 sigma_pow[i] = pow(sigma2[i], sc_r_power/2);
880 sigma_powm2[i] = sigma_pow[i]/(sigma2[i]);
884 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
885 if ((c12[0] > 0) && (c12[1] > 0))
887 alpha_vdw_eff = 0;
888 alpha_coul_eff = 0;
890 else
892 alpha_vdw_eff = alpha_vdw;
893 alpha_coul_eff = alpha_coul;
896 /* Loop over A and B states again */
897 for (i = 0; i < 2; i++)
899 fscal_elec[i] = 0;
900 fscal_vdw[i] = 0;
901 velec[i] = 0;
902 vvdw[i] = 0;
904 /* Only spend time on A or B state if it is non-zero */
905 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
907 /* Coulomb */
908 rpinv = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
909 r_coul = pow(rpinv, -1.0/sc_r_power);
911 /* Electrostatics table lookup data */
912 rtab = r_coul*tabscale;
913 ntab = rtab;
914 eps = rtab-ntab;
915 eps2 = eps*eps;
916 ntab = 12*ntab;
917 /* Electrostatics */
918 Y = vftab[ntab];
919 F = vftab[ntab+1];
920 Geps = eps*vftab[ntab+2];
921 Heps2 = eps2*vftab[ntab+3];
922 Fp = F+Geps+Heps2;
923 VV = Y+eps*Fp;
924 FF = Fp+Geps+2.0*Heps2;
925 velec[i] = qq[i]*VV;
926 fscal_elec[i] = -qq[i]*FF*r_coul*rpinv*tabscale;
928 /* Vdw */
929 rpinv = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
930 r_vdw = pow(rpinv, -1.0/sc_r_power);
931 /* Vdw table lookup data */
932 rtab = r_vdw*tabscale;
933 ntab = rtab;
934 eps = rtab-ntab;
935 eps2 = eps*eps;
936 ntab = 12*ntab;
937 /* Dispersion */
938 Y = vftab[ntab+4];
939 F = vftab[ntab+5];
940 Geps = eps*vftab[ntab+6];
941 Heps2 = eps2*vftab[ntab+7];
942 Fp = F+Geps+Heps2;
943 VV = Y+eps*Fp;
944 FF = Fp+Geps+2.0*Heps2;
945 vvdw[i] = c6[i]*VV;
946 fscal_vdw[i] = -c6[i]*FF;
948 /* Repulsion */
949 Y = vftab[ntab+8];
950 F = vftab[ntab+9];
951 Geps = eps*vftab[ntab+10];
952 Heps2 = eps2*vftab[ntab+11];
953 Fp = F+Geps+Heps2;
954 VV = Y+eps*Fp;
955 FF = Fp+Geps+2.0*Heps2;
956 vvdw[i] += c12[i]*VV;
957 fscal_vdw[i] -= c12[i]*FF;
958 fscal_vdw[i] *= r_vdw*rpinv*tabscale;
961 /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
962 /* Assemble A and B states */
963 velecsum = 0;
964 vvdwsum = 0;
965 dvdl_coul = 0;
966 dvdl_vdw = 0;
967 fscal = 0;
968 for (i = 0; i < 2; i++)
970 velecsum += LFC[i]*velec[i];
971 vvdwsum += LFV[i]*vvdw[i];
973 fscal += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
975 dvdl_coul += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
976 dvdl_vdw += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
979 dvdl[efptCOUL] += dvdl_coul;
980 dvdl[efptVDW] += dvdl_vdw;
982 *velectot = velecsum;
983 *vvdwtot = vvdwsum;
985 return fscal;