Improve workload data structures' docs
[gromacs.git] / src / gromacs / listed_forces / pairs.cpp
blobcede2598d1ebaca96a24dd27a045813142071411
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
37 * \brief This file defines functions for "pair" interactions
38 * (i.e. listed non-bonded interactions, e.g. 1-4 interactions)
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
42 * \ingroup module_listed_forces
44 #include "gmxpre.h"
46 #include "pairs.h"
48 #include <cmath>
50 #include "gromacs/listed_forces/bonded.h"
51 #include "gromacs/math/functions.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/mdtypes/group.h"
54 #include "gromacs/mdtypes/md_enums.h"
55 #include "gromacs/mdtypes/nblist.h"
56 #include "gromacs/mdtypes/simulation_workload.h"
57 #include "gromacs/pbcutil/ishift.h"
58 #include "gromacs/pbcutil/mshift.h"
59 #include "gromacs/pbcutil/pbc.h"
60 #include "gromacs/pbcutil/pbc_simd.h"
61 #include "gromacs/simd/simd.h"
62 #include "gromacs/simd/simd_math.h"
63 #include "gromacs/simd/vector_operations.h"
64 #include "gromacs/tables/forcetable.h"
65 #include "gromacs/utility/basedefinitions.h"
66 #include "gromacs/utility/fatalerror.h"
67 #include "gromacs/utility/gmxassert.h"
69 #include "listed_internal.h"
71 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
73 /*! \brief Issue a warning if a listed interaction is beyond a table limit */
74 static void
75 warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
77 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
78 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
79 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
80 "a smaller molecule you are decoupling during a free energy calculation.\n"
81 "Since interactions at distances beyond the table cannot be computed,\n"
82 "they are skipped until they are inside the table limit again. You will\n"
83 "only see this message once, even if it occurs for several interactions.\n\n"
84 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
85 "probably something wrong with your system. Only change the table-extension\n"
86 "distance in the mdp file if you are really sure that is the reason.\n",
87 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
89 if (debug)
91 fprintf(debug,
92 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
93 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
94 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
98 /*! \brief Compute the energy and force for a single pair interaction */
99 static real
100 evaluate_single(real r2, real tabscale, const real *vftab, real tableStride,
101 real qq, real c6, real c12, real *velec, real *vvdw)
103 real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
104 int ntab;
106 /* Do the tabulated interactions - first table lookup */
107 rinv = gmx::invsqrt(r2);
108 r = r2*rinv;
109 rtab = r*tabscale;
110 ntab = static_cast<int>(rtab);
111 eps = rtab-ntab;
112 eps2 = eps*eps;
113 ntab = static_cast<int>(tableStride*ntab);
114 /* Electrostatics */
115 Y = vftab[ntab];
116 F = vftab[ntab+1];
117 Geps = eps*vftab[ntab+2];
118 Heps2 = eps2*vftab[ntab+3];
119 Fp = F+Geps+Heps2;
120 VVe = Y+eps*Fp;
121 FFe = Fp+Geps+2.0*Heps2;
122 /* Dispersion */
123 Y = vftab[ntab+4];
124 F = vftab[ntab+5];
125 Geps = eps*vftab[ntab+6];
126 Heps2 = eps2*vftab[ntab+7];
127 Fp = F+Geps+Heps2;
128 VVd = Y+eps*Fp;
129 FFd = Fp+Geps+2.0*Heps2;
130 /* Repulsion */
131 Y = vftab[ntab+8];
132 F = vftab[ntab+9];
133 Geps = eps*vftab[ntab+10];
134 Heps2 = eps2*vftab[ntab+11];
135 Fp = F+Geps+Heps2;
136 VVr = Y+eps*Fp;
137 FFr = Fp+Geps+2.0*Heps2;
139 *velec = qq*VVe;
140 *vvdw = c6*VVd+c12*VVr;
142 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
144 return fscal;
147 /*! \brief Compute the energy and force for a single pair interaction under FEP */
148 static real
149 free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul,
150 real alpha_vdw, real tabscale, const real *vftab, real tableStride,
151 real qqA, real c6A, real c12A, real qqB,
152 real c6B, real c12B, const real LFC[2], const real LFV[2], const real DLF[2],
153 const real lfac_coul[2], const real lfac_vdw[2], const real dlfac_coul[2],
154 const real dlfac_vdw[2], real sigma6_def, real sigma6_min,
155 real sigma2_def, real sigma2_min,
156 real *velectot, real *vvdwtot, real *dvdl)
158 real rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
159 real qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2];
160 real alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
161 real rpinv, r_coul, r_vdw, velecsum, vvdwsum;
162 real fscal_vdw[2], fscal_elec[2];
163 real velec[2], vvdw[2];
164 int i, ntab;
165 const real half = 0.5;
166 const real minusOne = -1.0;
167 const real one = 1.0;
168 const real two = 2.0;
169 const real six = 6.0;
170 const real fourtyeight = 48.0;
172 qq[0] = qqA;
173 qq[1] = qqB;
174 c6[0] = c6A;
175 c6[1] = c6B;
176 c12[0] = c12A;
177 c12[1] = c12B;
179 if (sc_r_power == six)
181 rpm2 = r2*r2; /* r4 */
182 rp = rpm2*r2; /* r6 */
184 else if (sc_r_power == fourtyeight)
186 rp = r2*r2*r2; /* r6 */
187 rp = rp*rp; /* r12 */
188 rp = rp*rp; /* r24 */
189 rp = rp*rp; /* r48 */
190 rpm2 = rp/r2; /* r46 */
192 else
194 rp = std::pow(r2, half * sc_r_power); /* not currently supported as input, but can handle it */
195 rpm2 = rp/r2;
198 /* Loop over state A(0) and B(1) */
199 for (i = 0; i < 2; i++)
201 if ((c6[i] > 0) && (c12[i] > 0))
203 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
204 * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
206 sigma6[i] = half*c12[i]/c6[i];
207 sigma2[i] = std::cbrt(half*c12[i]/c6[i]);
208 /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
209 what data to store externally. Can't be fixed without larger scale changes, so not 5.0 */
210 if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
212 sigma6[i] = sigma6_min;
213 sigma2[i] = sigma2_min;
216 else
218 sigma6[i] = sigma6_def;
219 sigma2[i] = sigma2_def;
221 if (sc_r_power == six)
223 sigma_pow[i] = sigma6[i];
225 else if (sc_r_power == fourtyeight)
227 sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
228 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
229 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
231 else
232 { /* not really supported as input, but in here for testing the general case*/
233 sigma_pow[i] = std::pow(sigma2[i], sc_r_power/2);
237 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
238 if ((c12[0] > 0) && (c12[1] > 0))
240 alpha_vdw_eff = 0;
241 alpha_coul_eff = 0;
243 else
245 alpha_vdw_eff = alpha_vdw;
246 alpha_coul_eff = alpha_coul;
249 /* Loop over A and B states again */
250 for (i = 0; i < 2; i++)
252 fscal_elec[i] = 0;
253 fscal_vdw[i] = 0;
254 velec[i] = 0;
255 vvdw[i] = 0;
257 /* Only spend time on A or B state if it is non-zero */
258 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
260 /* Coulomb */
261 rpinv = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
262 r_coul = std::pow(rpinv, minusOne / sc_r_power);
264 /* Electrostatics table lookup data */
265 rtab = r_coul*tabscale;
266 ntab = static_cast<int>(rtab);
267 eps = rtab-ntab;
268 eps2 = eps*eps;
269 ntab = static_cast<int>(tableStride*ntab);
270 /* Electrostatics */
271 Y = vftab[ntab];
272 F = vftab[ntab+1];
273 Geps = eps*vftab[ntab+2];
274 Heps2 = eps2*vftab[ntab+3];
275 Fp = F+Geps+Heps2;
276 VV = Y+eps*Fp;
277 FF = Fp+Geps+two*Heps2;
278 velec[i] = qq[i]*VV;
279 fscal_elec[i] = -qq[i]*FF*r_coul*rpinv*tabscale;
281 /* Vdw */
282 rpinv = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
283 r_vdw = std::pow(rpinv, minusOne / sc_r_power);
284 /* Vdw table lookup data */
285 rtab = r_vdw*tabscale;
286 ntab = static_cast<int>(rtab);
287 eps = rtab-ntab;
288 eps2 = eps*eps;
289 ntab = 12*ntab;
290 /* Dispersion */
291 Y = vftab[ntab+4];
292 F = vftab[ntab+5];
293 Geps = eps*vftab[ntab+6];
294 Heps2 = eps2*vftab[ntab+7];
295 Fp = F+Geps+Heps2;
296 VV = Y+eps*Fp;
297 FF = Fp+Geps+two*Heps2;
298 vvdw[i] = c6[i]*VV;
299 fscal_vdw[i] = -c6[i]*FF;
301 /* Repulsion */
302 Y = vftab[ntab+8];
303 F = vftab[ntab+9];
304 Geps = eps*vftab[ntab+10];
305 Heps2 = eps2*vftab[ntab+11];
306 Fp = F+Geps+Heps2;
307 VV = Y+eps*Fp;
308 FF = Fp+Geps+two*Heps2;
309 vvdw[i] += c12[i]*VV;
310 fscal_vdw[i] -= c12[i]*FF;
311 fscal_vdw[i] *= r_vdw*rpinv*tabscale;
314 /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
315 /* Assemble A and B states */
316 velecsum = 0;
317 vvdwsum = 0;
318 dvdl_coul = 0;
319 dvdl_vdw = 0;
320 fscal = 0;
321 for (i = 0; i < 2; i++)
323 velecsum += LFC[i]*velec[i];
324 vvdwsum += LFV[i]*vvdw[i];
326 fscal += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
328 dvdl_coul += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
329 dvdl_vdw += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
332 dvdl[efptCOUL] += dvdl_coul;
333 dvdl[efptVDW] += dvdl_vdw;
335 *velectot = velecsum;
336 *vvdwtot = vvdwsum;
338 return fscal;
341 /*! \brief Calculate pair interactions, supports all types and conditions. */
342 template <BondedKernelFlavor flavor>
343 static real
344 do_pairs_general(int ftype, int nbonds,
345 const t_iatom iatoms[], const t_iparams iparams[],
346 const rvec x[], rvec4 f[], rvec fshift[],
347 const struct t_pbc *pbc, const struct t_graph *g,
348 const real *lambda, real *dvdl,
349 const t_mdatoms *md,
350 const t_forcerec *fr, gmx_grppairener_t *grppener,
351 int *global_atom_index)
353 real qq, c6, c12;
354 rvec dx;
355 ivec dt;
356 int i, itype, ai, aj, gid;
357 int fshift_index;
358 real r2;
359 real fscal, velec, vvdw;
360 real * energygrp_elec;
361 real * energygrp_vdw;
362 static gmx_bool warned_rlimit = FALSE;
363 /* Free energy stuff */
364 gmx_bool bFreeEnergy;
365 real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
366 real qqB, c6B, c12B, sigma2_def, sigma2_min;
368 switch (ftype)
370 case F_LJ14:
371 case F_LJC14_Q:
372 energygrp_elec = grppener->ener[egCOUL14].data();
373 energygrp_vdw = grppener->ener[egLJ14].data();
374 break;
375 case F_LJC_PAIRS_NB:
376 energygrp_elec = grppener->ener[egCOULSR].data();
377 energygrp_vdw = grppener->ener[egLJSR].data();
378 break;
379 default:
380 energygrp_elec = nullptr; /* Keep compiler happy */
381 energygrp_vdw = nullptr; /* Keep compiler happy */
382 gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
385 if (fr->efep != efepNO)
387 /* Lambda factor for state A=1-lambda and B=lambda */
388 LFC[0] = 1.0 - lambda[efptCOUL];
389 LFV[0] = 1.0 - lambda[efptVDW];
390 LFC[1] = lambda[efptCOUL];
391 LFV[1] = lambda[efptVDW];
393 /*derivative of the lambda factor for state A and B */
394 DLF[0] = -1;
395 DLF[1] = 1;
397 /* precalculate */
398 sigma2_def = std::cbrt(fr->sc_sigma6_def);
399 sigma2_min = std::cbrt(fr->sc_sigma6_min);
401 for (i = 0; i < 2; i++)
403 lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
404 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
405 lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
406 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
409 else
411 sigma2_min = sigma2_def = 0;
414 /* TODO This code depends on the logic in tables.c that constructs
415 the table layout, which should be made explicit in future
416 cleanup. */
417 GMX_ASSERT(etiNR == 3, "Pair-interaction code that uses GROMACS interaction tables supports exactly 3 tables");
418 GMX_ASSERT(fr->pairsTable->interaction == GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP,
419 "Pair interaction kernels need a table with Coulomb, repulsion and dispersion entries");
421 const real epsfac = fr->ic->epsfac;
423 bFreeEnergy = FALSE;
424 for (i = 0; (i < nbonds); )
426 itype = iatoms[i++];
427 ai = iatoms[i++];
428 aj = iatoms[i++];
429 gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
431 /* Get parameters */
432 switch (ftype)
434 case F_LJ14:
435 bFreeEnergy =
436 (fr->efep != efepNO &&
437 (((md->nPerturbed != 0) && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
438 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
439 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
440 qq = md->chargeA[ai]*md->chargeA[aj]*epsfac*fr->fudgeQQ;
441 c6 = iparams[itype].lj14.c6A;
442 c12 = iparams[itype].lj14.c12A;
443 break;
444 case F_LJC14_Q:
445 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*epsfac*iparams[itype].ljc14.fqq;
446 c6 = iparams[itype].ljc14.c6;
447 c12 = iparams[itype].ljc14.c12;
448 break;
449 case F_LJC_PAIRS_NB:
450 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*epsfac;
451 c6 = iparams[itype].ljcnb.c6;
452 c12 = iparams[itype].ljcnb.c12;
453 break;
454 default:
455 /* Cannot happen since we called gmx_fatal() above in this case */
456 qq = c6 = c12 = 0; /* Keep compiler happy */
457 break;
460 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
461 * included in the general nfbp array now. This means the tables are scaled down by the
462 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
463 * be scaled up.
465 c6 *= 6.0;
466 c12 *= 12.0;
468 /* Do we need to apply full periodic boundary conditions? */
469 if (fr->bMolPBC)
471 fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
473 else
475 fshift_index = CENTRAL;
476 rvec_sub(x[ai], x[aj], dx);
478 r2 = norm2(dx);
480 if (r2 >= fr->pairsTable->r*fr->pairsTable->r)
482 /* This check isn't race free. But it doesn't matter because if a race occurs the only
483 * disadvantage is that the warning is printed twice */
484 if (!warned_rlimit)
486 warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->pairsTable->r);
487 warned_rlimit = TRUE;
489 continue;
492 if (bFreeEnergy)
494 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
495 qqB = md->chargeB[ai]*md->chargeB[aj]*epsfac*fr->fudgeQQ;
496 c6B = iparams[itype].lj14.c6B*6.0;
497 c12B = iparams[itype].lj14.c12B*12.0;
499 fscal = free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
500 fr->pairsTable->scale, fr->pairsTable->data, fr->pairsTable->stride,
501 qq, c6, c12, qqB, c6B, c12B,
502 LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
503 fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
505 else
507 /* Evaluate tabulated interaction without free energy */
508 fscal = evaluate_single(r2, fr->pairsTable->scale, fr->pairsTable->data, fr->pairsTable->stride,
509 qq, c6, c12, &velec, &vvdw);
512 energygrp_elec[gid] += velec;
513 energygrp_vdw[gid] += vvdw;
514 svmul(fscal, dx, dx);
516 /* Add the forces */
517 rvec_inc(f[ai], dx);
518 rvec_dec(f[aj], dx);
520 if (computeVirial(flavor))
522 if (g)
524 /* Correct the shift forces using the graph */
525 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
526 fshift_index = IVEC2IS(dt);
528 if (fshift_index != CENTRAL)
530 rvec_inc(fshift[fshift_index], dx);
531 rvec_dec(fshift[CENTRAL], dx);
535 return 0.0;
538 /*! \brief Calculate pairs, only for plain-LJ + plain Coulomb normal type.
540 * This function is templated for real/SimdReal and for optimization.
542 template<typename T, int pack_size,
543 typename pbc_type>
544 static void
545 do_pairs_simple(int nbonds,
546 const t_iatom iatoms[], const t_iparams iparams[],
547 const rvec x[], rvec4 f[],
548 const pbc_type pbc,
549 const t_mdatoms *md,
550 const real scale_factor)
552 const int nfa1 = 1 + 2;
554 T six(6);
555 T twelve(12);
556 T ef(scale_factor);
558 #if GMX_SIMD_HAVE_REAL
559 alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[pack_size];
560 alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[pack_size];
561 alignas(GMX_SIMD_ALIGNMENT) real coeff[3*pack_size];
562 #else
563 std::int32_t ai[pack_size];
564 std::int32_t aj[pack_size];
565 real coeff[3*pack_size];
566 #endif
568 /* nbonds is #pairs*nfa1, here we step pack_size pairs */
569 for (int i = 0; i < nbonds; i += pack_size*nfa1)
571 /* Collect atoms for pack_size pairs.
572 * iu indexes into iatoms, we should not let iu go beyond nbonds.
574 int iu = i;
575 for (int s = 0; s < pack_size; s++)
577 int itype = iatoms[iu];
578 ai[s] = iatoms[iu + 1];
579 aj[s] = iatoms[iu + 2];
581 if (i + s*nfa1 < nbonds)
583 coeff[0*pack_size + s] = iparams[itype].lj14.c6A;
584 coeff[1*pack_size + s] = iparams[itype].lj14.c12A;
585 coeff[2*pack_size + s] = md->chargeA[ai[s]]*md->chargeA[aj[s]];
587 /* Avoid indexing the iatoms array out of bounds.
588 * We pad the coordinate indices with the last atom pair.
590 if (iu + nfa1 < nbonds)
592 iu += nfa1;
595 else
597 /* Pad the coefficient arrays with zeros to get zero forces */
598 coeff[0*pack_size + s] = 0;
599 coeff[1*pack_size + s] = 0;
600 coeff[2*pack_size + s] = 0;
604 /* Load the coordinates */
605 T xi[DIM], xj[DIM];
606 gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]);
607 gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]);
609 T c6 = load<T>(coeff + 0*pack_size);
610 T c12 = load<T>(coeff + 1*pack_size);
611 T qq = load<T>(coeff + 2*pack_size);
613 /* We could save these operations by storing 6*C6,12*C12 */
614 c6 = six*c6;
615 c12 = twelve*c12;
617 T dr[DIM];
618 pbc_dx_aiuc(pbc, xi, xj, dr);
620 T rsq = dr[XX]*dr[XX] + dr[YY]*dr[YY] + dr[ZZ]*dr[ZZ];
621 T rinv = gmx::invsqrt(rsq);
622 T rinv2 = rinv*rinv;
623 T rinv6 = rinv2*rinv2*rinv2;
625 /* Calculate the Coulomb force * r */
626 T cfr = ef*qq*rinv;
628 /* Calculate the LJ force * r and add it to the Coulomb part */
629 T fr = gmx::fma(fms(c12, rinv6, c6), rinv6, cfr);
631 T finvr = fr*rinv2;
632 T fx = finvr*dr[XX];
633 T fy = finvr*dr[YY];
634 T fz = finvr*dr[ZZ];
636 /* Add the pair forces to the force array.
637 * Note that here we might add multiple force components for some atoms
638 * due to the SIMD padding. But the extra force components are zero.
640 transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ai, fx, fy, fz);
641 transposeScatterDecrU<4>(reinterpret_cast<real *>(f), aj, fx, fy, fz);
645 /*! \brief Calculate all listed pair interactions */
646 void
647 do_pairs(int ftype, int nbonds,
648 const t_iatom iatoms[], const t_iparams iparams[],
649 const rvec x[], rvec4 f[], rvec fshift[],
650 const struct t_pbc *pbc, const struct t_graph *g,
651 const real *lambda, real *dvdl,
652 const t_mdatoms *md,
653 const t_forcerec *fr,
654 const bool havePerturbedInteractions,
655 const gmx::StepWorkload &stepWork,
656 gmx_grppairener_t *grppener,
657 int *global_atom_index)
659 if (ftype == F_LJ14 &&
660 fr->ic->vdwtype != evdwUSER && !EEL_USER(fr->ic->eeltype) &&
661 !havePerturbedInteractions &&
662 (!stepWork.computeVirial && !stepWork.computeEnergy))
664 /* We use a fast code-path for plain LJ 1-4 without FEP.
666 * TODO: Add support for energies (straightforward) and virial
667 * in the SIMD template. For the virial it's inconvenient to store
668 * the force sums for the shifts and we should directly calculate
669 * and sum the virial for the shifts. But we should do this
670 * at once for the angles and dihedrals as well.
672 #if GMX_SIMD_HAVE_REAL
673 if (fr->use_simd_kernels)
675 alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9*GMX_SIMD_REAL_WIDTH];
676 set_pbc_simd(pbc, pbc_simd);
678 do_pairs_simple<SimdReal, GMX_SIMD_REAL_WIDTH,
679 const real *>(nbonds, iatoms, iparams,
680 x, f, pbc_simd,
681 md, fr->ic->epsfac*fr->fudgeQQ);
683 else
684 #endif
686 /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
687 t_pbc pbc_no;
688 const t_pbc *pbc_nonnull;
690 if (pbc != nullptr)
692 pbc_nonnull = pbc;
694 else
696 set_pbc(&pbc_no, epbcNONE, nullptr);
697 pbc_nonnull = &pbc_no;
700 do_pairs_simple<real, 1,
701 const t_pbc *>(nbonds, iatoms, iparams,
702 x, f, pbc_nonnull,
703 md, fr->ic->epsfac*fr->fudgeQQ);
706 else if (stepWork.computeVirial)
708 do_pairs_general<BondedKernelFlavor::ForcesAndVirialAndEnergy>(
709 ftype, nbonds, iatoms, iparams,
710 x, f, fshift, pbc, g,
711 lambda, dvdl,
712 md, fr, grppener, global_atom_index);
714 else
716 do_pairs_general<BondedKernelFlavor::ForcesAndEnergy>(
717 ftype, nbonds, iatoms, iparams,
718 x, f, fshift, pbc, g,
719 lambda, dvdl,
720 md, fr, grppener, global_atom_index);