2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief This file defines high-level functions for mdrun to compute
39 * energies and forces for listed interactions.
41 * \author Mark Abraham <mark.j.abraham@gmail.com>
43 * \ingroup module_listed_forces
47 #include "listed_forces.h"
54 #include "gromacs/gmxlib/network.h"
55 #include "gromacs/gmxlib/nrnb.h"
56 #include "gromacs/listed_forces/bonded.h"
57 #include "gromacs/listed_forces/disre.h"
58 #include "gromacs/listed_forces/orires.h"
59 #include "gromacs/listed_forces/pairs.h"
60 #include "gromacs/listed_forces/position_restraints.h"
61 #include "gromacs/math/vec.h"
62 #include "gromacs/mdlib/enerdata_utils.h"
63 #include "gromacs/mdlib/force.h"
64 #include "gromacs/mdtypes/commrec.h"
65 #include "gromacs/mdtypes/fcdata.h"
66 #include "gromacs/mdtypes/forceoutput.h"
67 #include "gromacs/mdtypes/forcerec.h"
68 #include "gromacs/mdtypes/inputrec.h"
69 #include "gromacs/mdtypes/md_enums.h"
70 #include "gromacs/mdtypes/simulation_workload.h"
71 #include "gromacs/pbcutil/ishift.h"
72 #include "gromacs/pbcutil/pbc.h"
73 #include "gromacs/timing/wallcycle.h"
74 #include "gromacs/topology/topology.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/smalloc.h"
79 #include "listed_internal.h"
80 #include "utilities.h"
87 /*! \brief Return true if ftype is an explicit pair-listed LJ or
88 * COULOMB interaction type: bonded LJ (usually 1-4), or special
89 * listed non-bonded for FEP. */
90 bool isPairInteraction(int ftype
)
92 return ((ftype
) >= F_LJ14
&& (ftype
) <= F_LJC_PAIRS_NB
);
95 /*! \brief Zero thread-local output buffers */
96 void zero_thread_output(f_thread_t
* f_t
)
98 constexpr int nelem_fa
= sizeof(f_t
->f
[0]) / sizeof(real
);
100 for (int i
= 0; i
< f_t
->nblock_used
; i
++)
102 int a0
= f_t
->block_index
[i
] * reduction_block_size
;
103 int a1
= a0
+ reduction_block_size
;
104 for (int a
= a0
; a
< a1
; a
++)
106 for (int d
= 0; d
< nelem_fa
; d
++)
113 for (int i
= 0; i
< SHIFTS
; i
++)
115 clear_rvec(f_t
->fshift
[i
]);
117 for (int i
= 0; i
< F_NRE
; i
++)
121 for (int i
= 0; i
< egNR
; i
++)
123 for (int j
= 0; j
< f_t
->grpp
.nener
; j
++)
125 f_t
->grpp
.ener
[i
][j
] = 0;
128 for (int i
= 0; i
< efptNR
; i
++)
134 /*! \brief The max thread number is arbitrary, we used a fixed number
135 * to avoid memory management. Using more than 16 threads is probably
136 * never useful performance wise. */
137 #define MAX_BONDED_THREADS 256
139 /*! \brief Reduce thread-local force buffers */
140 void reduce_thread_forces(int n
, gmx::ArrayRef
<gmx::RVec
> force
, const bonded_threading_t
* bt
, int nthreads
)
142 if (nthreads
> MAX_BONDED_THREADS
)
144 gmx_fatal(FARGS
, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS
);
147 rvec
* gmx_restrict f
= as_rvec_array(force
.data());
149 /* This reduction can run on any number of threads,
150 * independently of bt->nthreads.
151 * But if nthreads matches bt->nthreads (which it currently does)
152 * the uniform distribution of the touched blocks over nthreads will
153 * match the distribution of bonded over threads well in most cases,
154 * which means that threads mostly reduce their own data which increases
155 * the number of cache hits.
157 #pragma omp parallel for num_threads(nthreads) schedule(static)
158 for (int b
= 0; b
< bt
->nblock_used
; b
++)
162 int ind
= bt
->block_index
[b
];
163 rvec4
* fp
[MAX_BONDED_THREADS
];
165 /* Determine which threads contribute to this block */
167 for (int ft
= 0; ft
< bt
->nthreads
; ft
++)
169 if (bitmask_is_set(bt
->mask
[ind
], ft
))
171 fp
[nfb
++] = bt
->f_t
[ft
]->f
;
176 /* Reduce force buffers for threads that contribute */
177 int a0
= ind
* reduction_block_size
;
178 int a1
= (ind
+ 1) * reduction_block_size
;
179 /* It would be nice if we could pad f to avoid this min */
180 a1
= std::min(a1
, n
);
181 for (int a
= a0
; a
< a1
; a
++)
183 for (int fb
= 0; fb
< nfb
; fb
++)
185 rvec_inc(f
[a
], fp
[fb
][a
]);
190 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
194 /*! \brief Reduce thread-local forces, shift forces and energies */
195 void reduce_thread_output(int n
,
196 gmx::ForceWithShiftForces
* forceWithShiftForces
,
198 gmx_grppairener_t
* grpp
,
200 const bonded_threading_t
* bt
,
201 const gmx::StepWorkload
& stepWork
)
203 assert(bt
->haveBondeds
);
205 if (bt
->nblock_used
> 0)
207 /* Reduce the bonded force buffer */
208 reduce_thread_forces(n
, forceWithShiftForces
->force(), bt
, bt
->nthreads
);
211 rvec
* gmx_restrict fshift
= as_rvec_array(forceWithShiftForces
->shiftForces().data());
213 /* When necessary, reduce energy and virial using one thread only */
214 if ((stepWork
.computeEnergy
|| stepWork
.computeVirial
|| stepWork
.computeDhdl
) && bt
->nthreads
> 1)
216 gmx::ArrayRef
<const std::unique_ptr
<f_thread_t
>> f_t
= bt
->f_t
;
218 if (stepWork
.computeVirial
)
220 for (int i
= 0; i
< SHIFTS
; i
++)
222 for (int t
= 1; t
< bt
->nthreads
; t
++)
224 rvec_inc(fshift
[i
], f_t
[t
]->fshift
[i
]);
228 if (stepWork
.computeEnergy
)
230 for (int i
= 0; i
< F_NRE
; i
++)
232 for (int t
= 1; t
< bt
->nthreads
; t
++)
234 ener
[i
] += f_t
[t
]->ener
[i
];
237 for (int i
= 0; i
< egNR
; i
++)
239 for (int j
= 0; j
< f_t
[1]->grpp
.nener
; j
++)
241 for (int t
= 1; t
< bt
->nthreads
; t
++)
243 grpp
->ener
[i
][j
] += f_t
[t
]->grpp
.ener
[i
][j
];
248 if (stepWork
.computeDhdl
)
250 for (int i
= 0; i
< efptNR
; i
++)
253 for (int t
= 1; t
< bt
->nthreads
; t
++)
255 dvdl
[i
] += f_t
[t
]->dvdl
[i
];
262 /*! \brief Returns the bonded kernel flavor
264 * Note that energies are always requested when the virial
265 * is requested (performance gain would be small).
266 * Note that currently we do not have bonded kernels that
267 * do not compute forces.
269 BondedKernelFlavor
selectBondedKernelFlavor(const gmx::StepWorkload
& stepWork
,
270 const bool useSimdKernels
,
271 const bool havePerturbedInteractions
)
273 BondedKernelFlavor flavor
;
274 if (stepWork
.computeEnergy
|| stepWork
.computeVirial
)
276 if (stepWork
.computeVirial
)
278 flavor
= BondedKernelFlavor::ForcesAndVirialAndEnergy
;
282 flavor
= BondedKernelFlavor::ForcesAndEnergy
;
287 if (useSimdKernels
&& !havePerturbedInteractions
)
289 flavor
= BondedKernelFlavor::ForcesSimdWhenAvailable
;
293 flavor
= BondedKernelFlavor::ForcesNoSimd
;
300 /*! \brief Calculate one element of the list of bonded interactions
302 real
calc_one_bond(int thread
,
304 const InteractionDefinitions
& idef
,
305 ArrayRef
<const int> iatoms
,
306 const int numNonperturbedInteractions
,
307 const WorkDivision
& workDivision
,
311 const t_forcerec
* fr
,
313 gmx_grppairener_t
* grpp
,
319 const gmx::StepWorkload
& stepWork
,
320 int* global_atom_index
)
322 GMX_ASSERT(idef
.ilsort
== ilsortNO_FE
|| idef
.ilsort
== ilsortFE_SORTED
,
323 "The topology should be marked either as no FE or sorted on FE");
325 const bool havePerturbedInteractions
=
326 (idef
.ilsort
== ilsortFE_SORTED
&& numNonperturbedInteractions
< iatoms
.ssize());
327 BondedKernelFlavor flavor
=
328 selectBondedKernelFlavor(stepWork
, fr
->use_simd_kernels
, havePerturbedInteractions
);
330 if (IS_RESTRAINT_TYPE(ftype
))
332 efptFTYPE
= efptRESTRAINT
;
336 efptFTYPE
= efptBONDED
;
339 const int nat1
= interaction_function
[ftype
].nratoms
+ 1;
340 const int nbonds
= iatoms
.ssize() / nat1
;
342 GMX_ASSERT(fr
->gpuBonded
!= nullptr || workDivision
.end(ftype
) == iatoms
.ssize(),
343 "The thread division should match the topology");
345 const int nb0
= workDivision
.bound(ftype
, thread
);
346 const int nbn
= workDivision
.bound(ftype
, thread
+ 1) - nb0
;
348 ArrayRef
<const t_iparams
> iparams
= idef
.iparams
;
351 if (!isPairInteraction(ftype
))
355 /* TODO The execution time for CMAP dihedrals might be
356 nice to account to its own subtimer, but first
357 wallcycle needs to be extended to support calling from
359 v
= cmap_dihs(nbn
, iatoms
.data() + nb0
, iparams
.data(), &idef
.cmap_grid
, x
, f
, fshift
,
360 pbc
, lambda
[efptFTYPE
], &(dvdl
[efptFTYPE
]), md
, fcd
, global_atom_index
);
364 v
= calculateSimpleBond(ftype
, nbn
, iatoms
.data() + nb0
, iparams
.data(), x
, f
, fshift
,
365 pbc
, lambda
[efptFTYPE
], &(dvdl
[efptFTYPE
]), md
, fcd
,
366 global_atom_index
, flavor
);
371 /* TODO The execution time for pairs might be nice to account
372 to its own subtimer, but first wallcycle needs to be
373 extended to support calling from multiple threads. */
374 do_pairs(ftype
, nbn
, iatoms
.data() + nb0
, iparams
.data(), x
, f
, fshift
, pbc
, lambda
, dvdl
,
375 md
, fr
, havePerturbedInteractions
, stepWork
, grpp
, global_atom_index
);
380 inc_nrnb(nrnb
, nrnbIndex(ftype
), nbonds
);
388 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
390 static void calcBondedForces(const InteractionDefinitions
& idef
,
392 const t_forcerec
* fr
,
393 const t_pbc
* pbc_null
,
394 rvec
* fshiftMasterBuffer
,
395 gmx_enerdata_t
* enerd
,
401 const gmx::StepWorkload
& stepWork
,
402 int* global_atom_index
)
404 bonded_threading_t
* bt
= fr
->bondedThreading
;
406 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
407 for (int thread
= 0; thread
< bt
->nthreads
; thread
++)
411 f_thread_t
& threadBuffers
= *bt
->f_t
[thread
];
417 gmx_grppairener_t
* grpp
;
419 zero_thread_output(&threadBuffers
);
421 rvec4
* ft
= threadBuffers
.f
;
423 /* Thread 0 writes directly to the main output buffers.
424 * We might want to reconsider this.
428 fshift
= fshiftMasterBuffer
;
435 fshift
= threadBuffers
.fshift
;
436 epot
= threadBuffers
.ener
;
437 grpp
= &threadBuffers
.grpp
;
438 dvdlt
= threadBuffers
.dvdl
;
440 /* Loop over all bonded force types to calculate the bonded forces */
441 for (ftype
= 0; (ftype
< F_NRE
); ftype
++)
443 const InteractionList
& ilist
= idef
.il
[ftype
];
444 if (!ilist
.empty() && ftype_is_bonded_potential(ftype
))
446 ArrayRef
<const int> iatoms
= gmx::makeConstArrayRef(ilist
.iatoms
);
447 v
= calc_one_bond(thread
, ftype
, idef
, iatoms
, idef
.numNonperturbedInteractions
[ftype
],
448 fr
->bondedThreading
->workDivision
, x
, ft
, fshift
, fr
, pbc_null
,
449 grpp
, nrnb
, lambda
, dvdlt
, md
, fcd
, stepWork
, global_atom_index
);
454 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
458 bool haveRestraints(const InteractionDefinitions
& idef
, const t_fcdata
& fcd
)
460 return (!idef
.il
[F_POSRES
].empty() || !idef
.il
[F_FBPOSRES
].empty() || fcd
.orires
.nr
> 0
461 || fcd
.disres
.nres
> 0);
464 bool haveCpuBondeds(const t_forcerec
& fr
)
466 return fr
.bondedThreading
->haveBondeds
;
469 bool haveCpuListedForces(const t_forcerec
& fr
, const InteractionDefinitions
& idef
, const t_fcdata
& fcd
)
471 return haveCpuBondeds(fr
) || haveRestraints(idef
, fcd
);
477 /*! \brief Calculates all listed force interactions. */
478 void calc_listed(struct gmx_wallcycle
* wcycle
,
479 const InteractionDefinitions
& idef
,
481 gmx::ForceOutputs
* forceOutputs
,
482 const t_forcerec
* fr
,
484 gmx_enerdata_t
* enerd
,
489 int* global_atom_index
,
490 const gmx::StepWorkload
& stepWork
)
492 bonded_threading_t
* bt
= fr
->bondedThreading
;
494 if (haveCpuBondeds(*fr
))
496 gmx::ForceWithShiftForces
& forceWithShiftForces
= forceOutputs
->forceWithShiftForces();
498 wallcycle_sub_start(wcycle
, ewcsLISTED
);
499 /* The dummy array is to have a place to store the dhdl at other values
500 of lambda, which will be thrown away in the end */
501 real dvdl
[efptNR
] = { 0 };
502 calcBondedForces(idef
, x
, fr
, fr
->bMolPBC
? pbc
: nullptr,
503 as_rvec_array(forceWithShiftForces
.shiftForces().data()), enerd
, nrnb
,
504 lambda
, dvdl
, md
, fcd
, stepWork
, global_atom_index
);
505 wallcycle_sub_stop(wcycle
, ewcsLISTED
);
507 wallcycle_sub_start(wcycle
, ewcsLISTED_BUF_OPS
);
508 reduce_thread_output(fr
->natoms_force
, &forceWithShiftForces
, enerd
->term
, &enerd
->grpp
,
511 if (stepWork
.computeDhdl
)
513 for (int i
= 0; i
< efptNR
; i
++)
515 enerd
->dvdl_nonlin
[i
] += dvdl
[i
];
518 wallcycle_sub_stop(wcycle
, ewcsLISTED_BUF_OPS
);
521 /* Copy the sum of violations for the distance restraints from fcd */
524 enerd
->term
[F_DISRESVIOL
] = fcd
->disres
.sumviol
;
528 /*! \brief As calc_listed(), but only determines the potential energy
529 * for the perturbed interactions.
531 * The shift forces in fr are not affected.
533 void calc_listed_lambda(const InteractionDefinitions
& idef
,
535 const t_forcerec
* fr
,
536 const struct t_pbc
* pbc
,
537 gmx_grppairener_t
* grpp
,
539 gmx::ArrayRef
<real
> dvdl
,
544 int* global_atom_index
)
549 const t_pbc
* pbc_null
;
550 WorkDivision
& workDivision
= fr
->bondedThreading
->foreignLambdaWorkDivision
;
561 /* We already have the forces, so we use temp buffers here */
562 // TODO: Get rid of these allocations by using permanent force buffers
563 snew(f
, fr
->natoms_force
);
564 snew(fshift
, SHIFTS
);
566 /* Loop over all bonded force types to calculate the bonded energies */
567 for (int ftype
= 0; (ftype
< F_NRE
); ftype
++)
569 if (ftype_is_bonded_potential(ftype
))
571 const InteractionList
& ilist
= idef
.il
[ftype
];
572 /* Create a temporary iatom list with only perturbed interactions */
573 const int numNonperturbed
= idef
.numNonperturbedInteractions
[ftype
];
574 ArrayRef
<const int> iatomsPerturbed
= gmx::constArrayRefFromArray(
575 ilist
.iatoms
.data() + numNonperturbed
, ilist
.size() - numNonperturbed
);
576 if (!iatomsPerturbed
.empty())
578 /* Set the work range of thread 0 to the perturbed bondeds */
579 workDivision
.setBound(ftype
, 0, 0);
580 workDivision
.setBound(ftype
, 1, iatomsPerturbed
.ssize());
582 gmx::StepWorkload tempFlags
;
583 tempFlags
.computeEnergy
= true;
584 v
= calc_one_bond(0, ftype
, idef
, iatomsPerturbed
, iatomsPerturbed
.ssize(),
585 workDivision
, x
, f
, fshift
, fr
, pbc_null
, grpp
, nrnb
, lambda
,
586 dvdl
.data(), md
, fcd
, tempFlags
, global_atom_index
);
598 void do_force_listed(struct gmx_wallcycle
* wcycle
,
600 const t_lambda
* fepvals
,
602 const gmx_multisim_t
* ms
,
603 const InteractionDefinitions
& idef
,
605 gmx::ArrayRef
<const gmx::RVec
> xWholeMolecules
,
607 gmx::ForceOutputs
* forceOutputs
,
608 const t_forcerec
* fr
,
609 const struct t_pbc
* pbc
,
610 gmx_enerdata_t
* enerd
,
615 int* global_atom_index
,
616 const gmx::StepWorkload
& stepWork
)
618 if (!stepWork
.computeListedForces
)
623 t_pbc pbc_full
; /* Full PBC is needed for position restraints */
624 if (haveRestraints(idef
, *fcd
))
626 if (!idef
.il
[F_POSRES
].empty() || !idef
.il
[F_FBPOSRES
].empty())
628 /* Not enough flops to bother counting */
629 set_pbc(&pbc_full
, fr
->pbcType
, box
);
632 /* TODO Use of restraints triggers further function calls
633 inside the loop over calc_one_bond(), but those are too
634 awkward to account to this subtimer properly in the present
635 code. We don't test / care much about performance with
636 restraints, anyway. */
637 wallcycle_sub_start(wcycle
, ewcsRESTRAINTS
);
639 if (!idef
.il
[F_POSRES
].empty())
641 posres_wrapper(nrnb
, idef
, &pbc_full
, x
, enerd
, lambda
, fr
, &forceOutputs
->forceWithVirial());
644 if (!idef
.il
[F_FBPOSRES
].empty())
646 fbposres_wrapper(nrnb
, idef
, &pbc_full
, x
, enerd
, fr
, &forceOutputs
->forceWithVirial());
649 /* Do pre force calculation stuff which might require communication */
650 if (fcd
->orires
.nr
> 0)
652 GMX_ASSERT(!xWholeMolecules
.empty(), "Need whole molecules for orienation restraints");
653 enerd
->term
[F_ORIRESDEV
] = calc_orires_dev(
654 ms
, idef
.il
[F_ORIRES
].size(), idef
.il
[F_ORIRES
].iatoms
.data(), idef
.iparams
.data(),
655 md
, xWholeMolecules
, x
, fr
->bMolPBC
? pbc
: nullptr, fcd
, hist
);
657 if (fcd
->disres
.nres
> 0)
659 calc_disres_R_6(cr
, ms
, idef
.il
[F_DISRES
].size(), idef
.il
[F_DISRES
].iatoms
.data(), x
,
660 fr
->bMolPBC
? pbc
: nullptr, fcd
, hist
);
663 wallcycle_sub_stop(wcycle
, ewcsRESTRAINTS
);
666 calc_listed(wcycle
, idef
, x
, forceOutputs
, fr
, pbc
, enerd
, nrnb
, lambda
, md
, fcd
,
667 global_atom_index
, stepWork
);
669 /* Check if we have to determine energy differences
670 * at foreign lambda's.
672 if (fepvals
->n_lambda
> 0 && stepWork
.computeDhdl
)
674 real dvdl
[efptNR
] = { 0 };
675 if (!idef
.il
[F_POSRES
].empty())
677 posres_wrapper_lambda(wcycle
, fepvals
, idef
, &pbc_full
, x
, enerd
, lambda
, fr
);
679 if (idef
.ilsort
!= ilsortNO_FE
)
681 wallcycle_sub_start(wcycle
, ewcsLISTED_FEP
);
682 if (idef
.ilsort
!= ilsortFE_SORTED
)
684 gmx_incons("The bonded interactions are not sorted for free energy");
686 for (size_t i
= 0; i
< enerd
->enerpart_lambda
.size(); i
++)
690 reset_foreign_enerdata(enerd
);
691 for (int j
= 0; j
< efptNR
; j
++)
693 lam_i
[j
] = (i
== 0 ? lambda
[j
] : fepvals
->all_lambda
[j
][i
- 1]);
695 calc_listed_lambda(idef
, x
, fr
, pbc
, &(enerd
->foreign_grpp
), enerd
->foreign_term
,
696 dvdl
, nrnb
, lam_i
, md
, fcd
, global_atom_index
);
697 sum_epot(&(enerd
->foreign_grpp
), enerd
->foreign_term
);
698 enerd
->enerpart_lambda
[i
] += enerd
->foreign_term
[F_EPOT
];
699 for (int j
= 0; j
< efptNR
; j
++)
701 enerd
->dhdlLambda
[i
] += dvdl
[j
];
705 wallcycle_sub_stop(wcycle
, ewcsLISTED_FEP
);