2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "nonbonded.h"
46 #include "nb_kernel.h"
49 #include "nb_free_energy.h"
52 gmx_nb_free_energy_kernel(const t_nblist
* gmx_restrict nlist
,
53 rvec
* gmx_restrict xx
,
54 rvec
* gmx_restrict ff
,
55 t_forcerec
* gmx_restrict fr
,
56 const t_mdatoms
* gmx_restrict mdatoms
,
57 nb_kernel_data_t
* gmx_restrict kernel_data
,
58 t_nrnb
* gmx_restrict nrnb
)
64 int i
, j
, n
, ii
, is3
, ii3
, k
, nj0
, nj1
, jnr
, j3
, ggid
;
66 real Fscal
, FscalC
[NSTATES
], FscalV
[NSTATES
], tx
, ty
, tz
;
67 real Vcoul
[NSTATES
], Vvdw
[NSTATES
];
68 real rinv6
, r
, rt
, rtC
, rtV
;
70 real qq
[NSTATES
], vctot
, krsq
;
71 int ntiA
, ntiB
, tj
[NSTATES
];
72 real Vvdw6
, Vvdw12
, vvtot
;
73 real ix
, iy
, iz
, fix
, fiy
, fiz
;
74 real dx
, dy
, dz
, rsq
, rinv
;
75 real c6
[NSTATES
], c12
[NSTATES
], c6grid
[NSTATES
];
76 real LFC
[NSTATES
], LFV
[NSTATES
], DLF
[NSTATES
];
77 double dvdl_coul
, dvdl_vdw
;
78 real lfac_coul
[NSTATES
], dlfac_coul
[NSTATES
], lfac_vdw
[NSTATES
], dlfac_vdw
[NSTATES
];
79 real sigma6
[NSTATES
], alpha_vdw_eff
, alpha_coul_eff
, sigma2_def
, sigma2_min
;
80 real rp
, rpm2
, rC
, rV
, rinvC
, rpinvC
, rinvV
, rpinvV
;
81 real sigma2
[NSTATES
], sigma_pow
[NSTATES
], sigma_powm2
[NSTATES
], rs
, rs2
;
82 int do_tab
, tab_elemsize
;
83 int n0
, n1C
, n1V
, nnn
;
84 real Y
, F
, G
, H
, Fp
, Geps
, Heps2
, epsC
, eps2C
, epsV
, eps2V
, VV
, FF
;
95 const real
* shiftvec
;
99 const real
* VFtab
= NULL
;
102 real facel
, krf
, crf
;
103 const real
* chargeA
;
104 const real
* chargeB
;
105 real sigma6_min
, sigma6_def
, lam_power
, sc_power
, sc_r_power
;
106 real alpha_coul
, alpha_vdw
, lambda_coul
, lambda_vdw
, ewc_lj
;
107 const real
* nbfp
, *nbfp_grid
;
111 gmx_bool bDoForces
, bDoShiftForces
, bDoPotential
;
112 real rcoulomb
, sh_ewald
;
113 real rvdw
, sh_invrc6
;
114 gmx_bool bExactElecCutoff
, bExactVdwCutoff
, bExactCutoffAll
, bEwald
;
116 real rcutoff
, rcutoff2
, rswitch
, d
, d2
, swV3
, swV4
, swV5
, swF2
, swF3
, swF4
, sw
, dsw
, rinvcorr
;
117 const real
* tab_ewald_F
;
118 const real
* tab_ewald_V
;
119 const real
* tab_ewald_F_lj
;
120 const real
* tab_ewald_V_lj
;
121 real tab_ewald_scale
, tab_ewald_halfsp
;
126 fshift
= fr
->fshift
[0];
130 jindex
= nlist
->jindex
;
132 icoul
= nlist
->ielec
;
134 shift
= nlist
->shift
;
137 shiftvec
= fr
->shift_vec
[0];
138 chargeA
= mdatoms
->chargeA
;
139 chargeB
= mdatoms
->chargeB
;
143 ewc_lj
= fr
->ewaldcoeff_lj
;
144 Vc
= kernel_data
->energygrp_elec
;
145 typeA
= mdatoms
->typeA
;
146 typeB
= mdatoms
->typeB
;
149 nbfp_grid
= fr
->ljpme_c6grid
;
150 Vv
= kernel_data
->energygrp_vdw
;
151 lambda_coul
= kernel_data
->lambda
[efptCOUL
];
152 lambda_vdw
= kernel_data
->lambda
[efptVDW
];
153 dvdl
= kernel_data
->dvdl
;
154 alpha_coul
= fr
->sc_alphacoul
;
155 alpha_vdw
= fr
->sc_alphavdw
;
156 lam_power
= fr
->sc_power
;
157 sc_r_power
= fr
->sc_r_power
;
158 sigma6_def
= fr
->sc_sigma6_def
;
159 sigma6_min
= fr
->sc_sigma6_min
;
160 bDoForces
= kernel_data
->flags
& GMX_NONBONDED_DO_FORCE
;
161 bDoShiftForces
= kernel_data
->flags
& GMX_NONBONDED_DO_SHIFTFORCE
;
162 bDoPotential
= kernel_data
->flags
& GMX_NONBONDED_DO_POTENTIAL
;
164 rcoulomb
= fr
->rcoulomb
;
165 sh_ewald
= fr
->ic
->sh_ewald
;
167 sh_invrc6
= fr
->ic
->sh_invrc6
;
169 /* Ewald (PME) reciprocal force and energy quadratic spline tables */
170 tab_ewald_F
= fr
->ic
->tabq_coul_F
;
171 tab_ewald_V
= fr
->ic
->tabq_coul_V
;
172 tab_ewald_scale
= fr
->ic
->tabq_scale
;
173 tab_ewald_F_lj
= fr
->ic
->tabq_vdw_F
;
174 tab_ewald_V_lj
= fr
->ic
->tabq_vdw_V
;
175 tab_ewald_halfsp
= 0.5/tab_ewald_scale
;
177 if (fr
->coulomb_modifier
== eintmodPOTSWITCH
|| fr
->vdw_modifier
== eintmodPOTSWITCH
)
179 rcutoff
= (fr
->coulomb_modifier
== eintmodPOTSWITCH
) ? fr
->rcoulomb
: fr
->rvdw
;
180 rcutoff2
= rcutoff
*rcutoff
;
181 rswitch
= (fr
->coulomb_modifier
== eintmodPOTSWITCH
) ? fr
->rcoulomb_switch
: fr
->rvdw_switch
;
183 swV3
= -10.0/(d
*d
*d
);
184 swV4
= 15.0/(d
*d
*d
*d
);
185 swV5
= -6.0/(d
*d
*d
*d
*d
);
186 swF2
= -30.0/(d
*d
*d
);
187 swF3
= 60.0/(d
*d
*d
*d
);
188 swF4
= -30.0/(d
*d
*d
*d
*d
);
192 /* Stupid compilers dont realize these variables will not be used */
202 if (fr
->cutoff_scheme
== ecutsVERLET
)
204 const interaction_const_t
*ic
;
207 if (EVDW_PME(ic
->vdwtype
))
209 ivdw
= GMX_NBKERNEL_VDW_LJEWALD
;
213 ivdw
= GMX_NBKERNEL_VDW_LENNARDJONES
;
216 if (ic
->eeltype
== eelCUT
|| EEL_RF(ic
->eeltype
))
218 icoul
= GMX_NBKERNEL_ELEC_REACTIONFIELD
;
220 else if (EEL_PME_EWALD(ic
->eeltype
))
222 icoul
= GMX_NBKERNEL_ELEC_EWALD
;
226 gmx_incons("Unsupported eeltype with Verlet and free-energy");
229 bExactElecCutoff
= TRUE
;
230 bExactVdwCutoff
= TRUE
;
234 bExactElecCutoff
= (fr
->coulomb_modifier
!= eintmodNONE
) || fr
->eeltype
== eelRF_ZERO
;
235 bExactVdwCutoff
= (fr
->vdw_modifier
!= eintmodNONE
);
238 bExactCutoffAll
= (bExactElecCutoff
&& bExactVdwCutoff
);
239 rcutoff_max2
= max(fr
->rcoulomb
, fr
->rvdw
);
240 rcutoff_max2
= rcutoff_max2
*rcutoff_max2
;
242 bEwald
= (icoul
== GMX_NBKERNEL_ELEC_EWALD
);
244 /* fix compiler warnings */
253 /* Lambda factor for state A, 1-lambda*/
254 LFC
[STATE_A
] = 1.0 - lambda_coul
;
255 LFV
[STATE_A
] = 1.0 - lambda_vdw
;
257 /* Lambda factor for state B, lambda*/
258 LFC
[STATE_B
] = lambda_coul
;
259 LFV
[STATE_B
] = lambda_vdw
;
261 /*derivative of the lambda factor for state A and B */
265 for (i
= 0; i
< NSTATES
; i
++)
267 lfac_coul
[i
] = (lam_power
== 2 ? (1-LFC
[i
])*(1-LFC
[i
]) : (1-LFC
[i
]));
268 dlfac_coul
[i
] = DLF
[i
]*lam_power
/sc_r_power
*(lam_power
== 2 ? (1-LFC
[i
]) : 1);
269 lfac_vdw
[i
] = (lam_power
== 2 ? (1-LFV
[i
])*(1-LFV
[i
]) : (1-LFV
[i
]));
270 dlfac_vdw
[i
] = DLF
[i
]*lam_power
/sc_r_power
*(lam_power
== 2 ? (1-LFV
[i
]) : 1);
273 sigma2_def
= pow(sigma6_def
, 1.0/3.0);
274 sigma2_min
= pow(sigma6_min
, 1.0/3.0);
276 /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
278 do_tab
= (icoul
== GMX_NBKERNEL_ELEC_CUBICSPLINETABLE
||
279 ivdw
== GMX_NBKERNEL_VDW_CUBICSPLINETABLE
);
282 tabscale
= kernel_data
->table_elec_vdw
->scale
;
283 VFtab
= kernel_data
->table_elec_vdw
->data
;
284 /* we always use the combined table here */
288 for (n
= 0; (n
< nri
); n
++)
290 int npair_within_cutoff
;
292 npair_within_cutoff
= 0;
296 shY
= shiftvec
[is3
+1];
297 shZ
= shiftvec
[is3
+2];
305 iqA
= facel
*chargeA
[ii
];
306 iqB
= facel
*chargeB
[ii
];
307 ntiA
= 2*ntype
*typeA
[ii
];
308 ntiB
= 2*ntype
*typeB
[ii
];
315 for (k
= nj0
; (k
< nj1
); k
++)
322 rsq
= dx
*dx
+ dy
*dy
+ dz
*dz
;
324 if (bExactCutoffAll
&& rsq
>= rcutoff_max2
)
326 /* We save significant time by skipping all code below.
327 * Note that with soft-core interactions, the actual cut-off
328 * check might be different. But since the soft-core distance
329 * is always larger than r, checking on r here is safe.
333 npair_within_cutoff
++;
337 rinv
= gmx_invsqrt(rsq
);
342 /* The force at r=0 is zero, because of symmetry.
343 * But note that the potential is in general non-zero,
344 * since the soft-cored r will be non-zero.
350 if (sc_r_power
== 6.0)
352 rpm2
= rsq
*rsq
; /* r4 */
353 rp
= rpm2
*rsq
; /* r6 */
355 else if (sc_r_power
== 48.0)
357 rp
= rsq
*rsq
*rsq
; /* r6 */
358 rp
= rp
*rp
; /* r12 */
359 rp
= rp
*rp
; /* r24 */
360 rp
= rp
*rp
; /* r48 */
361 rpm2
= rp
/rsq
; /* r46 */
365 rp
= pow(r
, sc_r_power
); /* not currently supported as input, but can handle it */
371 qq
[STATE_A
] = iqA
*chargeA
[jnr
];
372 qq
[STATE_B
] = iqB
*chargeB
[jnr
];
374 tj
[STATE_A
] = ntiA
+2*typeA
[jnr
];
375 tj
[STATE_B
] = ntiB
+2*typeB
[jnr
];
377 if (ivdw
== GMX_NBKERNEL_VDW_LJEWALD
)
379 c6grid
[STATE_A
] = nbfp_grid
[tj
[STATE_A
]];
380 c6grid
[STATE_B
] = nbfp_grid
[tj
[STATE_B
]];
383 if (nlist
->excl_fep
== NULL
|| nlist
->excl_fep
[k
])
385 c6
[STATE_A
] = nbfp
[tj
[STATE_A
]];
386 c6
[STATE_B
] = nbfp
[tj
[STATE_B
]];
388 for (i
= 0; i
< NSTATES
; i
++)
390 c12
[i
] = nbfp
[tj
[i
]+1];
391 if ((c6
[i
] > 0) && (c12
[i
] > 0))
393 /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
394 sigma6
[i
] = 0.5*c12
[i
]/c6
[i
];
395 sigma2
[i
] = pow(sigma6
[i
], 1.0/3.0);
396 /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
397 what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */
398 if (sigma6
[i
] < sigma6_min
) /* for disappearing coul and vdw with soft core at the same time */
400 sigma6
[i
] = sigma6_min
;
401 sigma2
[i
] = sigma2_min
;
406 sigma6
[i
] = sigma6_def
;
407 sigma2
[i
] = sigma2_def
;
409 if (sc_r_power
== 6.0)
411 sigma_pow
[i
] = sigma6
[i
];
412 sigma_powm2
[i
] = sigma6
[i
]/sigma2
[i
];
414 else if (sc_r_power
== 48.0)
416 sigma_pow
[i
] = sigma6
[i
]*sigma6
[i
]; /* sigma^12 */
417 sigma_pow
[i
] = sigma_pow
[i
]*sigma_pow
[i
]; /* sigma^24 */
418 sigma_pow
[i
] = sigma_pow
[i
]*sigma_pow
[i
]; /* sigma^48 */
419 sigma_powm2
[i
] = sigma_pow
[i
]/sigma2
[i
];
422 { /* not really supported as input, but in here for testing the general case*/
423 sigma_pow
[i
] = pow(sigma2
[i
], sc_r_power
/2);
424 sigma_powm2
[i
] = sigma_pow
[i
]/(sigma2
[i
]);
428 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
429 if ((c12
[STATE_A
] > 0) && (c12
[STATE_B
] > 0))
436 alpha_vdw_eff
= alpha_vdw
;
437 alpha_coul_eff
= alpha_coul
;
440 for (i
= 0; i
< NSTATES
; i
++)
447 /* Only spend time on A or B state if it is non-zero */
448 if ( (qq
[i
] != 0) || (c6
[i
] != 0) || (c12
[i
] != 0) )
450 /* this section has to be inside the loop because of the dependence on sigma_pow */
451 rpinvC
= 1.0/(alpha_coul_eff
*lfac_coul
[i
]*sigma_pow
[i
]+rp
);
452 rinvC
= pow(rpinvC
, 1.0/sc_r_power
);
455 rpinvV
= 1.0/(alpha_vdw_eff
*lfac_vdw
[i
]*sigma_pow
[i
]+rp
);
456 rinvV
= pow(rpinvV
, 1.0/sc_r_power
);
465 n1C
= tab_elemsize
*n0
;
471 n1V
= tab_elemsize
*n0
;
474 /* With Ewald and soft-core we should put the cut-off on r,
475 * not on the soft-cored rC, as the real-space and
476 * reciprocal space contributions should (almost) cancel.
479 !(bExactElecCutoff
&&
480 ((!bEwald
&& rC
>= rcoulomb
) ||
481 (bEwald
&& r
>= rcoulomb
))))
485 case GMX_NBKERNEL_ELEC_COULOMB
:
487 Vcoul
[i
] = qq
[i
]*rinvC
;
488 FscalC
[i
] = Vcoul
[i
];
491 case GMX_NBKERNEL_ELEC_EWALD
:
492 /* Ewald FEP is done only on the 1/r part */
493 Vcoul
[i
] = qq
[i
]*(rinvC
- sh_ewald
);
494 FscalC
[i
] = Vcoul
[i
];
497 case GMX_NBKERNEL_ELEC_REACTIONFIELD
:
499 Vcoul
[i
] = qq
[i
]*(rinvC
+ krf
*rC
*rC
-crf
);
500 FscalC
[i
] = qq
[i
]*(rinvC
- 2.0*krf
*rC
*rC
);
503 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE
:
504 /* non-Ewald tabulated coulomb */
508 Geps
= epsC
*VFtab
[nnn
+2];
509 Heps2
= eps2C
*VFtab
[nnn
+3];
512 FF
= Fp
+Geps
+2.0*Heps2
;
514 FscalC
[i
] = -qq
[i
]*tabscale
*FF
*rC
;
517 case GMX_NBKERNEL_ELEC_GENERALIZEDBORN
:
518 gmx_fatal(FARGS
, "Free energy and GB not implemented.\n");
521 case GMX_NBKERNEL_ELEC_NONE
:
527 gmx_incons("Invalid icoul in free energy kernel");
531 if (fr
->coulomb_modifier
== eintmodPOTSWITCH
)
534 d
= (d
> 0.0) ? d
: 0.0;
536 sw
= 1.0+d2
*d
*(swV3
+d
*(swV4
+d
*swV5
));
537 dsw
= d2
*(swF2
+d
*(swF3
+d
*swF4
));
540 FscalC
[i
] = FscalC
[i
]*sw
+ Vcoul
[i
]*dsw
;
544 if ((c6
[i
] != 0 || c12
[i
] != 0) &&
546 ((ivdw
!= GMX_NBKERNEL_VDW_LJEWALD
&& rV
>= rvdw
) ||
547 (ivdw
== GMX_NBKERNEL_VDW_LJEWALD
&& r
>= rvdw
))))
551 case GMX_NBKERNEL_VDW_LENNARDJONES
:
552 case GMX_NBKERNEL_VDW_LJEWALD
:
554 if (sc_r_power
== 6.0)
560 rinv6
= pow(rinvV
, 6.0);
563 Vvdw12
= c12
[i
]*rinv6
*rinv6
;
564 if (fr
->vdw_modifier
== eintmodPOTSHIFT
)
566 Vvdw
[i
] = ( (Vvdw12
-c12
[i
]*sh_invrc6
*sh_invrc6
)*(1.0/12.0)
567 -(Vvdw6
-c6
[i
]*sh_invrc6
)*(1.0/6.0));
571 Vvdw
[i
] = Vvdw12
*(1.0/12.0) - Vvdw6
*(1.0/6.0);
573 FscalV
[i
] = Vvdw12
- Vvdw6
;
576 case GMX_NBKERNEL_VDW_BUCKINGHAM
:
577 gmx_fatal(FARGS
, "Buckingham free energy not supported.");
580 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE
:
586 Geps
= epsV
*VFtab
[nnn
+2];
587 Heps2
= eps2V
*VFtab
[nnn
+3];
590 FF
= Fp
+Geps
+2.0*Heps2
;
592 FscalV
[i
] -= c6
[i
]*tabscale
*FF
*rV
;
597 Geps
= epsV
*VFtab
[nnn
+6];
598 Heps2
= eps2V
*VFtab
[nnn
+7];
601 FF
= Fp
+Geps
+2.0*Heps2
;
602 Vvdw
[i
] += c12
[i
]*VV
;
603 FscalV
[i
] -= c12
[i
]*tabscale
*FF
*rV
;
606 case GMX_NBKERNEL_VDW_NONE
:
612 gmx_incons("Invalid ivdw in free energy kernel");
616 if (fr
->vdw_modifier
== eintmodPOTSWITCH
)
619 d
= (d
> 0.0) ? d
: 0.0;
621 sw
= 1.0+d2
*d
*(swV3
+d
*(swV4
+d
*swV5
));
622 dsw
= d2
*(swF2
+d
*(swF3
+d
*swF4
));
625 FscalV
[i
] = FscalV
[i
]*sw
+ Vvdw
[i
]*dsw
;
627 FscalV
[i
] = (rV
< rvdw
) ? FscalV
[i
] : 0.0;
628 Vvdw
[i
] = (rV
< rvdw
) ? Vvdw
[i
] : 0.0;
632 /* FscalC (and FscalV) now contain: dV/drC * rC
633 * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
634 * Further down we first multiply by r^p-2 and then by
635 * the vector r, which in total gives: dV/drC * (r/rC)^1-p
642 /* Assemble A and B states */
643 for (i
= 0; i
< NSTATES
; i
++)
645 vctot
+= LFC
[i
]*Vcoul
[i
];
646 vvtot
+= LFV
[i
]*Vvdw
[i
];
648 Fscal
+= LFC
[i
]*FscalC
[i
]*rpm2
;
649 Fscal
+= LFV
[i
]*FscalV
[i
]*rpm2
;
651 dvdl_coul
+= Vcoul
[i
]*DLF
[i
] + LFC
[i
]*alpha_coul_eff
*dlfac_coul
[i
]*FscalC
[i
]*sigma_pow
[i
];
652 dvdl_vdw
+= Vvdw
[i
]*DLF
[i
] + LFV
[i
]*alpha_vdw_eff
*dlfac_vdw
[i
]*FscalV
[i
]*sigma_pow
[i
];
655 else if (icoul
== GMX_NBKERNEL_ELEC_REACTIONFIELD
)
657 /* For excluded pairs, which are only in this pair list when
658 * using the Verlet scheme, we don't use soft-core.
659 * The group scheme also doesn't soft-core for these.
660 * As there is no singularity, there is no need for soft-core.
670 for (i
= 0; i
< NSTATES
; i
++)
672 vctot
+= LFC
[i
]*qq
[i
]*VV
;
673 Fscal
+= LFC
[i
]*qq
[i
]*FF
;
674 dvdl_coul
+= DLF
[i
]*qq
[i
]*VV
;
678 if (icoul
== GMX_NBKERNEL_ELEC_EWALD
&&
679 !(bExactElecCutoff
&& r
>= rcoulomb
))
681 /* Because we compute the soft-core normally,
682 * we have to remove the Ewald short range portion.
683 * Done outside of the states loop because this part
684 * doesn't depend on the scaled R.
689 rs
= rsq
*rinv
*tab_ewald_scale
;
692 f_lr
= (1 - frac
)*tab_ewald_F
[ri
] + frac
*tab_ewald_F
[ri
+1];
694 VV
= tab_ewald_V
[ri
] - tab_ewald_halfsp
*frac
*(tab_ewald_F
[ri
] + f_lr
);
701 for (i
= 0; i
< NSTATES
; i
++)
703 vctot
-= LFC
[i
]*qq
[i
]*VV
;
704 Fscal
-= LFC
[i
]*qq
[i
]*FF
;
705 dvdl_coul
-= (DLF
[i
]*qq
[i
])*VV
;
709 if (ivdw
== GMX_NBKERNEL_VDW_LJEWALD
&&
710 !(bExactVdwCutoff
&& r
>= rvdw
))
715 rs
= rsq
*rinv
*tab_ewald_scale
;
718 f_lr
= (1 - frac
)*tab_ewald_F_lj
[ri
] + frac
*tab_ewald_F_lj
[ri
+1];
720 VV
= tab_ewald_V_lj
[ri
] - tab_ewald_halfsp
*frac
*(tab_ewald_F_lj
[ri
] + f_lr
);
721 for (i
= 0; i
< NSTATES
; i
++)
723 vvtot
+= LFV
[i
]*c6grid
[i
]*VV
*(1.0/6.0);
724 Fscal
+= LFV
[i
]*c6grid
[i
]*FF
*(1.0/6.0);
725 dvdl_vdw
+= (DLF
[i
]*c6grid
[i
])*VV
*(1.0/6.0);
738 /* OpenMP atomics are expensive, but this kernels is also
739 * expensive, so we can take this hit, instead of using
740 * thread-local output buffers and extra reduction.
751 /* The atomics below are expensive with many OpenMP threads.
752 * Here unperturbed i-particles will usually only have a few
753 * (perturbed) j-particles in the list. Thus with a buffered list
754 * we can skip a significant number of i-reductions with a check.
756 if (npair_within_cutoff
> 0)
772 fshift
[is3
+1] += fiy
;
774 fshift
[is3
+2] += fiz
;
788 dvdl
[efptCOUL
] += dvdl_coul
;
790 dvdl
[efptVDW
] += dvdl_vdw
;
792 /* Estimate flops, average for free energy stuff:
793 * 12 flops per outer iteration
794 * 150 flops per inner iteration
796 inc_nrnb(nrnb
, eNR_NBKERNEL_FREE_ENERGY
, nlist
->nri
*12 + nlist
->jindex
[n
]*150);
800 nb_free_energy_evaluate_single(real r2
, real sc_r_power
, real alpha_coul
, real alpha_vdw
,
801 real tabscale
, real
*vftab
,
802 real qqA
, real c6A
, real c12A
, real qqB
, real c6B
, real c12B
,
803 real LFC
[2], real LFV
[2], real DLF
[2],
804 real lfac_coul
[2], real lfac_vdw
[2], real dlfac_coul
[2], real dlfac_vdw
[2],
805 real sigma6_def
, real sigma6_min
, real sigma2_def
, real sigma2_min
,
806 real
*velectot
, real
*vvdwtot
, real
*dvdl
)
808 real r
, rp
, rpm2
, rtab
, eps
, eps2
, Y
, F
, Geps
, Heps2
, Fp
, VV
, FF
, fscal
;
809 real qq
[2], c6
[2], c12
[2], sigma6
[2], sigma2
[2], sigma_pow
[2], sigma_powm2
[2];
810 real alpha_coul_eff
, alpha_vdw_eff
, dvdl_coul
, dvdl_vdw
;
811 real rpinv
, r_coul
, r_vdw
, velecsum
, vvdwsum
;
812 real fscal_vdw
[2], fscal_elec
[2];
813 real velec
[2], vvdw
[2];
823 if (sc_r_power
== 6.0)
825 rpm2
= r2
*r2
; /* r4 */
826 rp
= rpm2
*r2
; /* r6 */
828 else if (sc_r_power
== 48.0)
830 rp
= r2
*r2
*r2
; /* r6 */
831 rp
= rp
*rp
; /* r12 */
832 rp
= rp
*rp
; /* r24 */
833 rp
= rp
*rp
; /* r48 */
834 rpm2
= rp
/r2
; /* r46 */
838 rp
= pow(r2
, 0.5*sc_r_power
); /* not currently supported as input, but can handle it */
842 /* Loop over state A(0) and B(1) */
843 for (i
= 0; i
< 2; i
++)
845 if ((c6
[i
] > 0) && (c12
[i
] > 0))
847 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
848 * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
850 sigma6
[i
] = 0.5*c12
[i
]/c6
[i
];
851 sigma2
[i
] = pow(0.5*c12
[i
]/c6
[i
], 1.0/3.0);
852 /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
853 what data to store externally. Can't be fixed without larger scale changes, so not 5.0 */
854 if (sigma6
[i
] < sigma6_min
) /* for disappearing coul and vdw with soft core at the same time */
856 sigma6
[i
] = sigma6_min
;
857 sigma2
[i
] = sigma2_min
;
862 sigma6
[i
] = sigma6_def
;
863 sigma2
[i
] = sigma2_def
;
865 if (sc_r_power
== 6.0)
867 sigma_pow
[i
] = sigma6
[i
];
868 sigma_powm2
[i
] = sigma6
[i
]/sigma2
[i
];
870 else if (sc_r_power
== 48.0)
872 sigma_pow
[i
] = sigma6
[i
]*sigma6
[i
]; /* sigma^12 */
873 sigma_pow
[i
] = sigma_pow
[i
]*sigma_pow
[i
]; /* sigma^24 */
874 sigma_pow
[i
] = sigma_pow
[i
]*sigma_pow
[i
]; /* sigma^48 */
875 sigma_powm2
[i
] = sigma_pow
[i
]/sigma2
[i
];
878 { /* not really supported as input, but in here for testing the general case*/
879 sigma_pow
[i
] = pow(sigma2
[i
], sc_r_power
/2);
880 sigma_powm2
[i
] = sigma_pow
[i
]/(sigma2
[i
]);
884 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
885 if ((c12
[0] > 0) && (c12
[1] > 0))
892 alpha_vdw_eff
= alpha_vdw
;
893 alpha_coul_eff
= alpha_coul
;
896 /* Loop over A and B states again */
897 for (i
= 0; i
< 2; i
++)
904 /* Only spend time on A or B state if it is non-zero */
905 if ( (qq
[i
] != 0) || (c6
[i
] != 0) || (c12
[i
] != 0) )
908 rpinv
= 1.0/(alpha_coul_eff
*lfac_coul
[i
]*sigma_pow
[i
]+rp
);
909 r_coul
= pow(rpinv
, -1.0/sc_r_power
);
911 /* Electrostatics table lookup data */
912 rtab
= r_coul
*tabscale
;
920 Geps
= eps
*vftab
[ntab
+2];
921 Heps2
= eps2
*vftab
[ntab
+3];
924 FF
= Fp
+Geps
+2.0*Heps2
;
926 fscal_elec
[i
] = -qq
[i
]*FF
*r_coul
*rpinv
*tabscale
;
929 rpinv
= 1.0/(alpha_vdw_eff
*lfac_vdw
[i
]*sigma_pow
[i
]+rp
);
930 r_vdw
= pow(rpinv
, -1.0/sc_r_power
);
931 /* Vdw table lookup data */
932 rtab
= r_vdw
*tabscale
;
940 Geps
= eps
*vftab
[ntab
+6];
941 Heps2
= eps2
*vftab
[ntab
+7];
944 FF
= Fp
+Geps
+2.0*Heps2
;
946 fscal_vdw
[i
] = -c6
[i
]*FF
;
951 Geps
= eps
*vftab
[ntab
+10];
952 Heps2
= eps2
*vftab
[ntab
+11];
955 FF
= Fp
+Geps
+2.0*Heps2
;
956 vvdw
[i
] += c12
[i
]*VV
;
957 fscal_vdw
[i
] -= c12
[i
]*FF
;
958 fscal_vdw
[i
] *= r_vdw
*rpinv
*tabscale
;
961 /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
962 /* Assemble A and B states */
968 for (i
= 0; i
< 2; i
++)
970 velecsum
+= LFC
[i
]*velec
[i
];
971 vvdwsum
+= LFV
[i
]*vvdw
[i
];
973 fscal
+= (LFC
[i
]*fscal_elec
[i
]+LFV
[i
]*fscal_vdw
[i
])*rpm2
;
975 dvdl_coul
+= velec
[i
]*DLF
[i
] + LFC
[i
]*alpha_coul_eff
*dlfac_coul
[i
]*fscal_elec
[i
]*sigma_pow
[i
];
976 dvdl_vdw
+= vvdw
[i
]*DLF
[i
] + LFV
[i
]*alpha_vdw_eff
*dlfac_vdw
[i
]*fscal_vdw
[i
]*sigma_pow
[i
];
979 dvdl
[efptCOUL
] += dvdl_coul
;
980 dvdl
[efptVDW
] += dvdl_vdw
;
982 *velectot
= velecsum
;