Address review comments
[gromacs.git] / src / gromacs / listed_forces / listed_forces.cpp
blob10738a759cb2af39937465a7feb387307ed68b1f
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
38 * \brief This file defines high-level functions for mdrun to compute
39 * energies and forces for listed interactions.
41 * \author Mark Abraham <mark.j.abraham@gmail.com>
43 * \ingroup module_listed_forces
45 #include "gmxpre.h"
47 #include "listed_forces.h"
49 #include <cassert>
51 #include <algorithm>
52 #include <array>
54 #include "gromacs/gmxlib/network.h"
55 #include "gromacs/gmxlib/nrnb.h"
56 #include "gromacs/listed_forces/bonded.h"
57 #include "gromacs/listed_forces/disre.h"
58 #include "gromacs/listed_forces/orires.h"
59 #include "gromacs/listed_forces/pairs.h"
60 #include "gromacs/listed_forces/position_restraints.h"
61 #include "gromacs/math/vec.h"
62 #include "gromacs/mdlib/enerdata_utils.h"
63 #include "gromacs/mdlib/force.h"
64 #include "gromacs/mdtypes/commrec.h"
65 #include "gromacs/mdtypes/fcdata.h"
66 #include "gromacs/mdtypes/forceoutput.h"
67 #include "gromacs/mdtypes/forcerec.h"
68 #include "gromacs/mdtypes/inputrec.h"
69 #include "gromacs/mdtypes/md_enums.h"
70 #include "gromacs/mdtypes/simulation_workload.h"
71 #include "gromacs/pbcutil/ishift.h"
72 #include "gromacs/pbcutil/pbc.h"
73 #include "gromacs/timing/wallcycle.h"
74 #include "gromacs/topology/topology.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/smalloc.h"
79 #include "listed_internal.h"
80 #include "utilities.h"
82 namespace
85 using gmx::ArrayRef;
87 /*! \brief Return true if ftype is an explicit pair-listed LJ or
88 * COULOMB interaction type: bonded LJ (usually 1-4), or special
89 * listed non-bonded for FEP. */
90 bool isPairInteraction(int ftype)
92 return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
95 /*! \brief Zero thread-local output buffers */
96 void zero_thread_output(f_thread_t* f_t)
98 constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
100 for (int i = 0; i < f_t->nblock_used; i++)
102 int a0 = f_t->block_index[i] * reduction_block_size;
103 int a1 = a0 + reduction_block_size;
104 for (int a = a0; a < a1; a++)
106 for (int d = 0; d < nelem_fa; d++)
108 f_t->f[a][d] = 0;
113 for (int i = 0; i < SHIFTS; i++)
115 clear_rvec(f_t->fshift[i]);
117 for (int i = 0; i < F_NRE; i++)
119 f_t->ener[i] = 0;
121 for (int i = 0; i < egNR; i++)
123 for (int j = 0; j < f_t->grpp.nener; j++)
125 f_t->grpp.ener[i][j] = 0;
128 for (int i = 0; i < efptNR; i++)
130 f_t->dvdl[i] = 0;
134 /*! \brief The max thread number is arbitrary, we used a fixed number
135 * to avoid memory management. Using more than 16 threads is probably
136 * never useful performance wise. */
137 #define MAX_BONDED_THREADS 256
139 /*! \brief Reduce thread-local force buffers */
140 void reduce_thread_forces(int n, gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
142 if (nthreads > MAX_BONDED_THREADS)
144 gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
147 rvec* gmx_restrict f = as_rvec_array(force.data());
149 /* This reduction can run on any number of threads,
150 * independently of bt->nthreads.
151 * But if nthreads matches bt->nthreads (which it currently does)
152 * the uniform distribution of the touched blocks over nthreads will
153 * match the distribution of bonded over threads well in most cases,
154 * which means that threads mostly reduce their own data which increases
155 * the number of cache hits.
157 #pragma omp parallel for num_threads(nthreads) schedule(static)
158 for (int b = 0; b < bt->nblock_used; b++)
162 int ind = bt->block_index[b];
163 rvec4* fp[MAX_BONDED_THREADS];
165 /* Determine which threads contribute to this block */
166 int nfb = 0;
167 for (int ft = 0; ft < bt->nthreads; ft++)
169 if (bitmask_is_set(bt->mask[ind], ft))
171 fp[nfb++] = bt->f_t[ft]->f;
174 if (nfb > 0)
176 /* Reduce force buffers for threads that contribute */
177 int a0 = ind * reduction_block_size;
178 int a1 = (ind + 1) * reduction_block_size;
179 /* It would be nice if we could pad f to avoid this min */
180 a1 = std::min(a1, n);
181 for (int a = a0; a < a1; a++)
183 for (int fb = 0; fb < nfb; fb++)
185 rvec_inc(f[a], fp[fb][a]);
190 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
194 /*! \brief Reduce thread-local forces, shift forces and energies */
195 void reduce_thread_output(int n,
196 gmx::ForceWithShiftForces* forceWithShiftForces,
197 real* ener,
198 gmx_grppairener_t* grpp,
199 real* dvdl,
200 const bonded_threading_t* bt,
201 const gmx::StepWorkload& stepWork)
203 assert(bt->haveBondeds);
205 if (bt->nblock_used > 0)
207 /* Reduce the bonded force buffer */
208 reduce_thread_forces(n, forceWithShiftForces->force(), bt, bt->nthreads);
211 rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
213 /* When necessary, reduce energy and virial using one thread only */
214 if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
216 gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
218 if (stepWork.computeVirial)
220 for (int i = 0; i < SHIFTS; i++)
222 for (int t = 1; t < bt->nthreads; t++)
224 rvec_inc(fshift[i], f_t[t]->fshift[i]);
228 if (stepWork.computeEnergy)
230 for (int i = 0; i < F_NRE; i++)
232 for (int t = 1; t < bt->nthreads; t++)
234 ener[i] += f_t[t]->ener[i];
237 for (int i = 0; i < egNR; i++)
239 for (int j = 0; j < f_t[1]->grpp.nener; j++)
241 for (int t = 1; t < bt->nthreads; t++)
243 grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
248 if (stepWork.computeDhdl)
250 for (int i = 0; i < efptNR; i++)
253 for (int t = 1; t < bt->nthreads; t++)
255 dvdl[i] += f_t[t]->dvdl[i];
262 /*! \brief Returns the bonded kernel flavor
264 * Note that energies are always requested when the virial
265 * is requested (performance gain would be small).
266 * Note that currently we do not have bonded kernels that
267 * do not compute forces.
269 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
270 const bool useSimdKernels,
271 const bool havePerturbedInteractions)
273 BondedKernelFlavor flavor;
274 if (stepWork.computeEnergy || stepWork.computeVirial)
276 if (stepWork.computeVirial)
278 flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
280 else
282 flavor = BondedKernelFlavor::ForcesAndEnergy;
285 else
287 if (useSimdKernels && !havePerturbedInteractions)
289 flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
291 else
293 flavor = BondedKernelFlavor::ForcesNoSimd;
297 return flavor;
300 /*! \brief Calculate one element of the list of bonded interactions
301 for this thread */
302 real calc_one_bond(int thread,
303 int ftype,
304 const InteractionDefinitions& idef,
305 ArrayRef<const int> iatoms,
306 const int numNonperturbedInteractions,
307 const WorkDivision& workDivision,
308 const rvec x[],
309 rvec4 f[],
310 rvec fshift[],
311 const t_forcerec* fr,
312 const t_pbc* pbc,
313 gmx_grppairener_t* grpp,
314 t_nrnb* nrnb,
315 const real* lambda,
316 real* dvdl,
317 const t_mdatoms* md,
318 t_fcdata* fcd,
319 const gmx::StepWorkload& stepWork,
320 int* global_atom_index)
322 GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
323 "The topology should be marked either as no FE or sorted on FE");
325 const bool havePerturbedInteractions =
326 (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
327 BondedKernelFlavor flavor =
328 selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
329 int efptFTYPE;
330 if (IS_RESTRAINT_TYPE(ftype))
332 efptFTYPE = efptRESTRAINT;
334 else
336 efptFTYPE = efptBONDED;
339 const int nat1 = interaction_function[ftype].nratoms + 1;
340 const int nbonds = iatoms.ssize() / nat1;
342 GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
343 "The thread division should match the topology");
345 const int nb0 = workDivision.bound(ftype, thread);
346 const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
348 ArrayRef<const t_iparams> iparams = idef.iparams;
350 real v = 0;
351 if (!isPairInteraction(ftype))
353 if (ftype == F_CMAP)
355 /* TODO The execution time for CMAP dihedrals might be
356 nice to account to its own subtimer, but first
357 wallcycle needs to be extended to support calling from
358 multiple threads. */
359 v = cmap_dihs(nbn, iatoms.data() + nb0, iparams.data(), &idef.cmap_grid, x, f, fshift,
360 pbc, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd, global_atom_index);
362 else
364 v = calculateSimpleBond(ftype, nbn, iatoms.data() + nb0, iparams.data(), x, f, fshift,
365 pbc, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd,
366 global_atom_index, flavor);
369 else
371 /* TODO The execution time for pairs might be nice to account
372 to its own subtimer, but first wallcycle needs to be
373 extended to support calling from multiple threads. */
374 do_pairs(ftype, nbn, iatoms.data() + nb0, iparams.data(), x, f, fshift, pbc, lambda, dvdl,
375 md, fr, havePerturbedInteractions, stepWork, grpp, global_atom_index);
378 if (thread == 0)
380 inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
383 return v;
386 } // namespace
388 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
390 static void calcBondedForces(const InteractionDefinitions& idef,
391 const rvec x[],
392 const t_forcerec* fr,
393 const t_pbc* pbc_null,
394 rvec* fshiftMasterBuffer,
395 gmx_enerdata_t* enerd,
396 t_nrnb* nrnb,
397 const real* lambda,
398 real* dvdl,
399 const t_mdatoms* md,
400 t_fcdata* fcd,
401 const gmx::StepWorkload& stepWork,
402 int* global_atom_index)
404 bonded_threading_t* bt = fr->bondedThreading;
406 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
407 for (int thread = 0; thread < bt->nthreads; thread++)
411 f_thread_t& threadBuffers = *bt->f_t[thread];
412 int ftype;
413 real * epot, v;
414 /* thread stuff */
415 rvec* fshift;
416 real* dvdlt;
417 gmx_grppairener_t* grpp;
419 zero_thread_output(&threadBuffers);
421 rvec4* ft = threadBuffers.f;
423 /* Thread 0 writes directly to the main output buffers.
424 * We might want to reconsider this.
426 if (thread == 0)
428 fshift = fshiftMasterBuffer;
429 epot = enerd->term;
430 grpp = &enerd->grpp;
431 dvdlt = dvdl;
433 else
435 fshift = threadBuffers.fshift;
436 epot = threadBuffers.ener;
437 grpp = &threadBuffers.grpp;
438 dvdlt = threadBuffers.dvdl;
440 /* Loop over all bonded force types to calculate the bonded forces */
441 for (ftype = 0; (ftype < F_NRE); ftype++)
443 const InteractionList& ilist = idef.il[ftype];
444 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
446 ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
447 v = calc_one_bond(thread, ftype, idef, iatoms, idef.numNonperturbedInteractions[ftype],
448 fr->bondedThreading->workDivision, x, ft, fshift, fr, pbc_null,
449 grpp, nrnb, lambda, dvdlt, md, fcd, stepWork, global_atom_index);
450 epot[ftype] += v;
454 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
458 bool haveRestraints(const InteractionDefinitions& idef, const t_fcdata& fcd)
460 return (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty() || fcd.orires.nr > 0
461 || fcd.disres.nres > 0);
464 bool haveCpuBondeds(const t_forcerec& fr)
466 return fr.bondedThreading->haveBondeds;
469 bool haveCpuListedForces(const t_forcerec& fr, const InteractionDefinitions& idef, const t_fcdata& fcd)
471 return haveCpuBondeds(fr) || haveRestraints(idef, fcd);
474 namespace
477 /*! \brief Calculates all listed force interactions. */
478 void calc_listed(struct gmx_wallcycle* wcycle,
479 const InteractionDefinitions& idef,
480 const rvec x[],
481 gmx::ForceOutputs* forceOutputs,
482 const t_forcerec* fr,
483 const t_pbc* pbc,
484 gmx_enerdata_t* enerd,
485 t_nrnb* nrnb,
486 const real* lambda,
487 const t_mdatoms* md,
488 t_fcdata* fcd,
489 int* global_atom_index,
490 const gmx::StepWorkload& stepWork)
492 bonded_threading_t* bt = fr->bondedThreading;
494 if (haveCpuBondeds(*fr))
496 gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
498 wallcycle_sub_start(wcycle, ewcsLISTED);
499 /* The dummy array is to have a place to store the dhdl at other values
500 of lambda, which will be thrown away in the end */
501 real dvdl[efptNR] = { 0 };
502 calcBondedForces(idef, x, fr, fr->bMolPBC ? pbc : nullptr,
503 as_rvec_array(forceWithShiftForces.shiftForces().data()), enerd, nrnb,
504 lambda, dvdl, md, fcd, stepWork, global_atom_index);
505 wallcycle_sub_stop(wcycle, ewcsLISTED);
507 wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
508 reduce_thread_output(fr->natoms_force, &forceWithShiftForces, enerd->term, &enerd->grpp,
509 dvdl, bt, stepWork);
511 if (stepWork.computeDhdl)
513 for (int i = 0; i < efptNR; i++)
515 enerd->dvdl_nonlin[i] += dvdl[i];
518 wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
521 /* Copy the sum of violations for the distance restraints from fcd */
522 if (fcd)
524 enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
528 /*! \brief As calc_listed(), but only determines the potential energy
529 * for the perturbed interactions.
531 * The shift forces in fr are not affected.
533 void calc_listed_lambda(const InteractionDefinitions& idef,
534 const rvec x[],
535 const t_forcerec* fr,
536 const struct t_pbc* pbc,
537 gmx_grppairener_t* grpp,
538 real* epot,
539 gmx::ArrayRef<real> dvdl,
540 t_nrnb* nrnb,
541 const real* lambda,
542 const t_mdatoms* md,
543 t_fcdata* fcd,
544 int* global_atom_index)
546 real v;
547 rvec4* f;
548 rvec* fshift;
549 const t_pbc* pbc_null;
550 WorkDivision& workDivision = fr->bondedThreading->foreignLambdaWorkDivision;
552 if (fr->bMolPBC)
554 pbc_null = pbc;
556 else
558 pbc_null = nullptr;
561 /* We already have the forces, so we use temp buffers here */
562 // TODO: Get rid of these allocations by using permanent force buffers
563 snew(f, fr->natoms_force);
564 snew(fshift, SHIFTS);
566 /* Loop over all bonded force types to calculate the bonded energies */
567 for (int ftype = 0; (ftype < F_NRE); ftype++)
569 if (ftype_is_bonded_potential(ftype))
571 const InteractionList& ilist = idef.il[ftype];
572 /* Create a temporary iatom list with only perturbed interactions */
573 const int numNonperturbed = idef.numNonperturbedInteractions[ftype];
574 ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
575 ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
576 if (!iatomsPerturbed.empty())
578 /* Set the work range of thread 0 to the perturbed bondeds */
579 workDivision.setBound(ftype, 0, 0);
580 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
582 gmx::StepWorkload tempFlags;
583 tempFlags.computeEnergy = true;
584 v = calc_one_bond(0, ftype, idef, iatomsPerturbed, iatomsPerturbed.ssize(),
585 workDivision, x, f, fshift, fr, pbc_null, grpp, nrnb, lambda,
586 dvdl.data(), md, fcd, tempFlags, global_atom_index);
587 epot[ftype] += v;
592 sfree(fshift);
593 sfree(f);
596 } // namespace
598 void do_force_listed(struct gmx_wallcycle* wcycle,
599 const matrix box,
600 const t_lambda* fepvals,
601 const t_commrec* cr,
602 const gmx_multisim_t* ms,
603 const InteractionDefinitions& idef,
604 const rvec x[],
605 gmx::ArrayRef<const gmx::RVec> xWholeMolecules,
606 history_t* hist,
607 gmx::ForceOutputs* forceOutputs,
608 const t_forcerec* fr,
609 const struct t_pbc* pbc,
610 gmx_enerdata_t* enerd,
611 t_nrnb* nrnb,
612 const real* lambda,
613 const t_mdatoms* md,
614 t_fcdata* fcd,
615 int* global_atom_index,
616 const gmx::StepWorkload& stepWork)
618 if (!stepWork.computeListedForces)
620 return;
623 t_pbc pbc_full; /* Full PBC is needed for position restraints */
624 if (haveRestraints(idef, *fcd))
626 if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
628 /* Not enough flops to bother counting */
629 set_pbc(&pbc_full, fr->pbcType, box);
632 /* TODO Use of restraints triggers further function calls
633 inside the loop over calc_one_bond(), but those are too
634 awkward to account to this subtimer properly in the present
635 code. We don't test / care much about performance with
636 restraints, anyway. */
637 wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
639 if (!idef.il[F_POSRES].empty())
641 posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
644 if (!idef.il[F_FBPOSRES].empty())
646 fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
649 /* Do pre force calculation stuff which might require communication */
650 if (fcd->orires.nr > 0)
652 GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
653 enerd->term[F_ORIRESDEV] = calc_orires_dev(
654 ms, idef.il[F_ORIRES].size(), idef.il[F_ORIRES].iatoms.data(), idef.iparams.data(),
655 md, xWholeMolecules, x, fr->bMolPBC ? pbc : nullptr, fcd, hist);
657 if (fcd->disres.nres > 0)
659 calc_disres_R_6(cr, ms, idef.il[F_DISRES].size(), idef.il[F_DISRES].iatoms.data(), x,
660 fr->bMolPBC ? pbc : nullptr, fcd, hist);
663 wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
666 calc_listed(wcycle, idef, x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md, fcd,
667 global_atom_index, stepWork);
669 /* Check if we have to determine energy differences
670 * at foreign lambda's.
672 if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
674 real dvdl[efptNR] = { 0 };
675 if (!idef.il[F_POSRES].empty())
677 posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
679 if (idef.ilsort != ilsortNO_FE)
681 wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
682 if (idef.ilsort != ilsortFE_SORTED)
684 gmx_incons("The bonded interactions are not sorted for free energy");
686 for (size_t i = 0; i < enerd->enerpart_lambda.size(); i++)
688 real lam_i[efptNR];
690 reset_foreign_enerdata(enerd);
691 for (int j = 0; j < efptNR; j++)
693 lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i - 1]);
695 calc_listed_lambda(idef, x, fr, pbc, &(enerd->foreign_grpp), enerd->foreign_term,
696 dvdl, nrnb, lam_i, md, fcd, global_atom_index);
697 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
698 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
699 for (int j = 0; j < efptNR; j++)
701 enerd->dhdlLambda[i] += dvdl[j];
702 dvdl[j] = 0;
705 wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);