Improve workload data structures' docs
[gromacs.git] / src / gromacs / listed_forces / listed_forces.cpp
blob0a094c4bbe486b90252ccf959ccb126facd996f2
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
37 * \brief This file defines high-level functions for mdrun to compute
38 * energies and forces for listed interactions.
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
42 * \ingroup module_listed_forces
44 #include "gmxpre.h"
46 #include "listed_forces.h"
48 #include <cassert>
50 #include <algorithm>
51 #include <array>
53 #include "gromacs/gmxlib/network.h"
54 #include "gromacs/gmxlib/nrnb.h"
55 #include "gromacs/listed_forces/bonded.h"
56 #include "gromacs/listed_forces/disre.h"
57 #include "gromacs/listed_forces/orires.h"
58 #include "gromacs/listed_forces/pairs.h"
59 #include "gromacs/listed_forces/position_restraints.h"
60 #include "gromacs/math/vec.h"
61 #include "gromacs/mdlib/enerdata_utils.h"
62 #include "gromacs/mdlib/force.h"
63 #include "gromacs/mdtypes/commrec.h"
64 #include "gromacs/mdtypes/fcdata.h"
65 #include "gromacs/mdtypes/forcerec.h"
66 #include "gromacs/mdtypes/inputrec.h"
67 #include "gromacs/mdtypes/md_enums.h"
68 #include "gromacs/mdtypes/simulation_workload.h"
69 #include "gromacs/pbcutil/ishift.h"
70 #include "gromacs/pbcutil/pbc.h"
71 #include "gromacs/timing/wallcycle.h"
72 #include "gromacs/topology/topology.h"
73 #include "gromacs/utility/exceptions.h"
74 #include "gromacs/utility/fatalerror.h"
75 #include "gromacs/utility/smalloc.h"
77 #include "listed_internal.h"
78 #include "utilities.h"
80 namespace
83 /*! \brief Return true if ftype is an explicit pair-listed LJ or
84 * COULOMB interaction type: bonded LJ (usually 1-4), or special
85 * listed non-bonded for FEP. */
86 bool
87 isPairInteraction(int ftype)
89 return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
92 /*! \brief Zero thread-local output buffers */
93 void
94 zero_thread_output(f_thread_t *f_t)
96 constexpr int nelem_fa = sizeof(f_t->f[0])/sizeof(real);
98 for (int i = 0; i < f_t->nblock_used; i++)
100 int a0 = f_t->block_index[i]*reduction_block_size;
101 int a1 = a0 + reduction_block_size;
102 for (int a = a0; a < a1; a++)
104 for (int d = 0; d < nelem_fa; d++)
106 f_t->f[a][d] = 0;
111 for (int i = 0; i < SHIFTS; i++)
113 clear_rvec(f_t->fshift[i]);
115 for (int i = 0; i < F_NRE; i++)
117 f_t->ener[i] = 0;
119 for (int i = 0; i < egNR; i++)
121 for (int j = 0; j < f_t->grpp.nener; j++)
123 f_t->grpp.ener[i][j] = 0;
126 for (int i = 0; i < efptNR; i++)
128 f_t->dvdl[i] = 0;
132 /*! \brief The max thread number is arbitrary, we used a fixed number
133 * to avoid memory management. Using more than 16 threads is probably
134 * never useful performance wise. */
135 #define MAX_BONDED_THREADS 256
137 /*! \brief Reduce thread-local force buffers */
138 void
139 reduce_thread_forces(int n,
140 gmx::ArrayRef<gmx::RVec> force,
141 const bonded_threading_t *bt,
142 int nthreads)
144 if (nthreads > MAX_BONDED_THREADS)
146 gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads",
147 MAX_BONDED_THREADS);
150 rvec * gmx_restrict f = as_rvec_array(force.data());
152 /* This reduction can run on any number of threads,
153 * independently of bt->nthreads.
154 * But if nthreads matches bt->nthreads (which it currently does)
155 * the uniform distribution of the touched blocks over nthreads will
156 * match the distribution of bonded over threads well in most cases,
157 * which means that threads mostly reduce their own data which increases
158 * the number of cache hits.
160 #pragma omp parallel for num_threads(nthreads) schedule(static)
161 for (int b = 0; b < bt->nblock_used; b++)
165 int ind = bt->block_index[b];
166 rvec4 *fp[MAX_BONDED_THREADS];
168 /* Determine which threads contribute to this block */
169 int nfb = 0;
170 for (int ft = 0; ft < bt->nthreads; ft++)
172 if (bitmask_is_set(bt->mask[ind], ft))
174 fp[nfb++] = bt->f_t[ft]->f;
177 if (nfb > 0)
179 /* Reduce force buffers for threads that contribute */
180 int a0 = ind *reduction_block_size;
181 int a1 = (ind + 1)*reduction_block_size;
182 /* It would be nice if we could pad f to avoid this min */
183 a1 = std::min(a1, n);
184 for (int a = a0; a < a1; a++)
186 for (int fb = 0; fb < nfb; fb++)
188 rvec_inc(f[a], fp[fb][a]);
193 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
197 /*! \brief Reduce thread-local forces, shift forces and energies */
198 void
199 reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces,
200 real *ener, gmx_grppairener_t *grpp, real *dvdl,
201 const bonded_threading_t *bt,
202 const gmx::StepWorkload &stepWork)
204 assert(bt->haveBondeds);
206 if (bt->nblock_used > 0)
208 /* Reduce the bonded force buffer */
209 reduce_thread_forces(n, forceWithShiftForces->force(), bt, bt->nthreads);
212 rvec * gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
214 /* When necessary, reduce energy and virial using one thread only */
215 if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) &&
216 bt->nthreads > 1)
218 gmx::ArrayRef < const std::unique_ptr < f_thread_t>> f_t = bt->f_t;
220 if (stepWork.computeVirial)
222 for (int i = 0; i < SHIFTS; i++)
224 for (int t = 1; t < bt->nthreads; t++)
226 rvec_inc(fshift[i], f_t[t]->fshift[i]);
230 if (stepWork.computeEnergy)
232 for (int i = 0; i < F_NRE; i++)
234 for (int t = 1; t < bt->nthreads; t++)
236 ener[i] += f_t[t]->ener[i];
239 for (int i = 0; i < egNR; i++)
241 for (int j = 0; j < f_t[1]->grpp.nener; j++)
243 for (int t = 1; t < bt->nthreads; t++)
245 grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
250 if (stepWork.computeDhdl)
252 for (int i = 0; i < efptNR; i++)
255 for (int t = 1; t < bt->nthreads; t++)
257 dvdl[i] += f_t[t]->dvdl[i];
264 /*! \brief Returns the bonded kernel flavor
266 * Note that energies are always requested when the virial
267 * is requested (performance gain would be small).
268 * Note that currently we do not have bonded kernels that
269 * do not compute forces.
271 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload &stepWork,
272 const bool useSimdKernels,
273 const bool havePerturbedInteractions)
275 BondedKernelFlavor flavor;
276 if (stepWork.computeEnergy || stepWork.computeVirial)
278 if (stepWork.computeVirial)
280 flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
282 else
284 flavor = BondedKernelFlavor::ForcesAndEnergy;
287 else
289 if (useSimdKernels && !havePerturbedInteractions)
291 flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
293 else
295 flavor = BondedKernelFlavor::ForcesNoSimd;
299 return flavor;
302 /*! \brief Calculate one element of the list of bonded interactions
303 for this thread */
304 real
305 calc_one_bond(int thread,
306 int ftype, const t_idef *idef,
307 const WorkDivision &workDivision,
308 const rvec x[], rvec4 f[], rvec fshift[],
309 const t_forcerec *fr,
310 const t_pbc *pbc, const t_graph *g,
311 gmx_grppairener_t *grpp,
312 t_nrnb *nrnb,
313 const real *lambda, real *dvdl,
314 const t_mdatoms *md, t_fcdata *fcd,
315 const gmx::StepWorkload &stepWork,
316 int *global_atom_index)
318 GMX_ASSERT(idef->ilsort == ilsortNO_FE || idef->ilsort == ilsortFE_SORTED,
319 "The topology should be marked either as no FE or sorted on FE");
321 const bool havePerturbedInteractions =
322 (idef->ilsort == ilsortFE_SORTED &&
323 idef->il[ftype].nr_nonperturbed < idef->il[ftype].nr);
324 BondedKernelFlavor flavor =
325 selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
326 int efptFTYPE;
327 if (IS_RESTRAINT_TYPE(ftype))
329 efptFTYPE = efptRESTRAINT;
331 else
333 efptFTYPE = efptBONDED;
336 const int nat1 = interaction_function[ftype].nratoms + 1;
337 const int nbonds = idef->il[ftype].nr/nat1;
338 const t_iatom *iatoms = idef->il[ftype].iatoms;
340 GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == idef->il[ftype].nr,
341 "The thread division should match the topology");
343 const int nb0 = workDivision.bound(ftype, thread);
344 const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
346 real v = 0;
347 if (!isPairInteraction(ftype))
349 if (ftype == F_CMAP)
351 /* TODO The execution time for CMAP dihedrals might be
352 nice to account to its own subtimer, but first
353 wallcycle needs to be extended to support calling from
354 multiple threads. */
355 v = cmap_dihs(nbn, iatoms+nb0,
356 idef->iparams, idef->cmap_grid,
357 x, f, fshift,
358 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
359 md, fcd, global_atom_index);
361 else
363 v = calculateSimpleBond(ftype, nbn, iatoms + nb0,
364 idef->iparams,
365 x, f, fshift,
366 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
367 md, fcd, global_atom_index,
368 flavor);
371 else
373 /* TODO The execution time for pairs might be nice to account
374 to its own subtimer, but first wallcycle needs to be
375 extended to support calling from multiple threads. */
376 do_pairs(ftype, nbn, iatoms+nb0, idef->iparams, x, f, fshift,
377 pbc, g, lambda, dvdl, md, fr,
378 havePerturbedInteractions, stepWork,
379 grpp, global_atom_index);
382 if (thread == 0)
384 inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
387 return v;
390 } // namespace
392 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
394 static void
395 calcBondedForces(const t_idef *idef,
396 const rvec x[],
397 const t_forcerec *fr,
398 const t_pbc *pbc_null,
399 const t_graph *g,
400 rvec *fshiftMasterBuffer,
401 gmx_enerdata_t *enerd,
402 t_nrnb *nrnb,
403 const real *lambda,
404 real *dvdl,
405 const t_mdatoms *md,
406 t_fcdata *fcd,
407 const gmx::StepWorkload &stepWork,
408 int *global_atom_index)
410 bonded_threading_t *bt = fr->bondedThreading;
412 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
413 for (int thread = 0; thread < bt->nthreads; thread++)
417 f_thread_t &threadBuffers = *bt->f_t[thread];
418 int ftype;
419 real *epot, v;
420 /* thread stuff */
421 rvec *fshift;
422 real *dvdlt;
423 gmx_grppairener_t *grpp;
425 zero_thread_output(&threadBuffers);
427 rvec4 *ft = threadBuffers.f;
429 /* Thread 0 writes directly to the main output buffers.
430 * We might want to reconsider this.
432 if (thread == 0)
434 fshift = fshiftMasterBuffer;
435 epot = enerd->term;
436 grpp = &enerd->grpp;
437 dvdlt = dvdl;
439 else
441 fshift = threadBuffers.fshift;
442 epot = threadBuffers.ener;
443 grpp = &threadBuffers.grpp;
444 dvdlt = threadBuffers.dvdl;
446 /* Loop over all bonded force types to calculate the bonded forces */
447 for (ftype = 0; (ftype < F_NRE); ftype++)
449 if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype))
451 v = calc_one_bond(thread, ftype, idef,
452 fr->bondedThreading->workDivision, x,
453 ft, fshift, fr, pbc_null, g, grpp,
454 nrnb, lambda, dvdlt,
455 md, fcd, stepWork,
456 global_atom_index);
457 epot[ftype] += v;
461 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
465 bool haveRestraints(const t_idef &idef,
466 const t_fcdata &fcd)
468 return
469 ((idef.il[F_POSRES].nr > 0) ||
470 (idef.il[F_FBPOSRES].nr > 0) ||
471 fcd.orires.nr > 0 ||
472 fcd.disres.nres > 0);
475 bool haveCpuBondeds(const t_forcerec &fr)
477 return fr.bondedThreading->haveBondeds;
480 bool haveCpuListedForces(const t_forcerec &fr,
481 const t_idef &idef,
482 const t_fcdata &fcd)
484 return haveCpuBondeds(fr) || haveRestraints(idef, fcd);
487 void calc_listed(const t_commrec *cr,
488 const gmx_multisim_t *ms,
489 struct gmx_wallcycle *wcycle,
490 const t_idef *idef,
491 const rvec x[], history_t *hist,
492 gmx::ForceOutputs *forceOutputs,
493 const t_forcerec *fr,
494 const struct t_pbc *pbc,
495 const struct t_pbc *pbc_full,
496 const struct t_graph *g,
497 gmx_enerdata_t *enerd, t_nrnb *nrnb,
498 const real *lambda,
499 const t_mdatoms *md,
500 t_fcdata *fcd, int *global_atom_index,
501 const gmx::StepWorkload &stepWork)
503 const t_pbc *pbc_null;
504 bonded_threading_t *bt = fr->bondedThreading;
506 if (fr->bMolPBC)
508 pbc_null = pbc;
510 else
512 pbc_null = nullptr;
515 if (haveRestraints(*idef, *fcd))
517 /* TODO Use of restraints triggers further function calls
518 inside the loop over calc_one_bond(), but those are too
519 awkward to account to this subtimer properly in the present
520 code. We don't test / care much about performance with
521 restraints, anyway. */
522 wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
524 if (idef->il[F_POSRES].nr > 0)
526 posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr,
527 &forceOutputs->forceWithVirial());
530 if (idef->il[F_FBPOSRES].nr > 0)
532 fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr,
533 &forceOutputs->forceWithVirial());
536 /* Do pre force calculation stuff which might require communication */
537 if (fcd->orires.nr > 0)
539 /* This assertion is to ensure we have whole molecules.
540 * Unfortunately we do not have an mdrun state variable that tells
541 * us if molecules in x are not broken over PBC, so we have to make
542 * do with checking graph!=nullptr, which should tell us if we made
543 * molecules whole before calling the current function.
545 GMX_RELEASE_ASSERT(fr->ePBC == epbcNONE || g != nullptr, "With orientation restraints molecules should be whole");
546 enerd->term[F_ORIRESDEV] =
547 calc_orires_dev(ms, idef->il[F_ORIRES].nr,
548 idef->il[F_ORIRES].iatoms,
549 idef->iparams, md, x,
550 pbc_null, fcd, hist);
552 if (fcd->disres.nres > 0)
554 calc_disres_R_6(cr, ms,
555 idef->il[F_DISRES].nr,
556 idef->il[F_DISRES].iatoms,
557 x, pbc_null,
558 fcd, hist);
561 wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
564 if (haveCpuBondeds(*fr))
566 gmx::ForceWithShiftForces &forceWithShiftForces = forceOutputs->forceWithShiftForces();
568 wallcycle_sub_start(wcycle, ewcsLISTED);
569 /* The dummy array is to have a place to store the dhdl at other values
570 of lambda, which will be thrown away in the end */
571 real dvdl[efptNR] = {0};
572 calcBondedForces(idef, x, fr, pbc_null, g,
573 as_rvec_array(forceWithShiftForces.shiftForces().data()),
574 enerd, nrnb, lambda, dvdl, md,
575 fcd, stepWork, global_atom_index);
576 wallcycle_sub_stop(wcycle, ewcsLISTED);
578 wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
579 reduce_thread_output(fr->natoms_force, &forceWithShiftForces,
580 enerd->term, &enerd->grpp, dvdl,
582 stepWork);
584 if (stepWork.computeDhdl)
586 for (int i = 0; i < efptNR; i++)
588 enerd->dvdl_nonlin[i] += dvdl[i];
591 wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
594 /* Copy the sum of violations for the distance restraints from fcd */
595 if (fcd)
597 enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
601 void calc_listed_lambda(const t_idef *idef,
602 const rvec x[],
603 const t_forcerec *fr,
604 const struct t_pbc *pbc, const struct t_graph *g,
605 gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
606 const real *lambda,
607 const t_mdatoms *md,
608 t_fcdata *fcd,
609 int *global_atom_index)
611 real v;
612 real dvdl_dum[efptNR] = {0};
613 rvec4 *f;
614 rvec *fshift;
615 const t_pbc *pbc_null;
616 t_idef idef_fe;
617 WorkDivision &workDivision = fr->bondedThreading->foreignLambdaWorkDivision;
619 if (fr->bMolPBC)
621 pbc_null = pbc;
623 else
625 pbc_null = nullptr;
628 /* Copy the whole idef, so we can modify the contents locally */
629 idef_fe = *idef;
631 /* We already have the forces, so we use temp buffers here */
632 // TODO: Get rid of these allocations by using permanent force buffers
633 snew(f, fr->natoms_force);
634 snew(fshift, SHIFTS);
636 /* Loop over all bonded force types to calculate the bonded energies */
637 for (int ftype = 0; (ftype < F_NRE); ftype++)
639 if (ftype_is_bonded_potential(ftype))
641 const t_ilist &ilist = idef->il[ftype];
642 /* Create a temporary t_ilist with only perturbed interactions */
643 t_ilist &ilist_fe = idef_fe.il[ftype];
644 ilist_fe.iatoms = ilist.iatoms + ilist.nr_nonperturbed;
645 ilist_fe.nr_nonperturbed = 0;
646 ilist_fe.nr = ilist.nr - ilist.nr_nonperturbed;
647 /* Set the work range of thread 0 to the perturbed bondeds */
648 workDivision.setBound(ftype, 0, 0);
649 workDivision.setBound(ftype, 1, ilist_fe.nr);
651 if (ilist_fe.nr > 0)
653 gmx::StepWorkload tempFlags;
654 tempFlags.computeEnergy = true;
655 v = calc_one_bond(0, ftype, &idef_fe, workDivision,
656 x, f, fshift, fr, pbc_null, g,
657 grpp, nrnb, lambda, dvdl_dum,
658 md, fcd, tempFlags,
659 global_atom_index);
660 epot[ftype] += v;
665 sfree(fshift);
666 sfree(f);
669 void
670 do_force_listed(struct gmx_wallcycle *wcycle,
671 const matrix box,
672 const t_lambda *fepvals,
673 const t_commrec *cr,
674 const gmx_multisim_t *ms,
675 const t_idef *idef,
676 const rvec x[],
677 history_t *hist,
678 gmx::ForceOutputs *forceOutputs,
679 const t_forcerec *fr,
680 const struct t_pbc *pbc,
681 const struct t_graph *graph,
682 gmx_enerdata_t *enerd,
683 t_nrnb *nrnb,
684 const real *lambda,
685 const t_mdatoms *md,
686 t_fcdata *fcd,
687 int *global_atom_index,
688 const gmx::StepWorkload &stepWork)
690 t_pbc pbc_full; /* Full PBC is needed for position restraints */
692 if (!stepWork.computeListedForces)
694 return;
697 if ((idef->il[F_POSRES].nr > 0) ||
698 (idef->il[F_FBPOSRES].nr > 0))
700 /* Not enough flops to bother counting */
701 set_pbc(&pbc_full, fr->ePBC, box);
703 calc_listed(cr, ms, wcycle, idef, x, hist,
704 forceOutputs,
705 fr, pbc, &pbc_full,
706 graph, enerd, nrnb, lambda, md, fcd,
707 global_atom_index, stepWork);
709 /* Check if we have to determine energy differences
710 * at foreign lambda's.
712 if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
714 posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
716 if (idef->ilsort != ilsortNO_FE)
718 wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
719 if (idef->ilsort != ilsortFE_SORTED)
721 gmx_incons("The bonded interactions are not sorted for free energy");
723 for (size_t i = 0; i < enerd->enerpart_lambda.size(); i++)
725 real lam_i[efptNR];
727 reset_foreign_enerdata(enerd);
728 for (int j = 0; j < efptNR; j++)
730 lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
732 calc_listed_lambda(idef, x, fr, pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md,
733 fcd, global_atom_index);
734 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
735 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
737 wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);