2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief This file defines high-level functions for mdrun to compute
38 * energies and forces for listed interactions.
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
42 * \ingroup module_listed_forces
46 #include "listed_forces.h"
53 #include "gromacs/gmxlib/network.h"
54 #include "gromacs/gmxlib/nrnb.h"
55 #include "gromacs/listed_forces/bonded.h"
56 #include "gromacs/listed_forces/disre.h"
57 #include "gromacs/listed_forces/orires.h"
58 #include "gromacs/listed_forces/pairs.h"
59 #include "gromacs/listed_forces/position_restraints.h"
60 #include "gromacs/math/vec.h"
61 #include "gromacs/mdlib/enerdata_utils.h"
62 #include "gromacs/mdlib/force.h"
63 #include "gromacs/mdtypes/commrec.h"
64 #include "gromacs/mdtypes/fcdata.h"
65 #include "gromacs/mdtypes/forcerec.h"
66 #include "gromacs/mdtypes/inputrec.h"
67 #include "gromacs/mdtypes/md_enums.h"
68 #include "gromacs/mdtypes/simulation_workload.h"
69 #include "gromacs/pbcutil/ishift.h"
70 #include "gromacs/pbcutil/pbc.h"
71 #include "gromacs/timing/wallcycle.h"
72 #include "gromacs/topology/topology.h"
73 #include "gromacs/utility/exceptions.h"
74 #include "gromacs/utility/fatalerror.h"
75 #include "gromacs/utility/smalloc.h"
77 #include "listed_internal.h"
78 #include "utilities.h"
83 /*! \brief Return true if ftype is an explicit pair-listed LJ or
84 * COULOMB interaction type: bonded LJ (usually 1-4), or special
85 * listed non-bonded for FEP. */
87 isPairInteraction(int ftype
)
89 return ((ftype
) >= F_LJ14
&& (ftype
) <= F_LJC_PAIRS_NB
);
92 /*! \brief Zero thread-local output buffers */
94 zero_thread_output(f_thread_t
*f_t
)
96 constexpr int nelem_fa
= sizeof(f_t
->f
[0])/sizeof(real
);
98 for (int i
= 0; i
< f_t
->nblock_used
; i
++)
100 int a0
= f_t
->block_index
[i
]*reduction_block_size
;
101 int a1
= a0
+ reduction_block_size
;
102 for (int a
= a0
; a
< a1
; a
++)
104 for (int d
= 0; d
< nelem_fa
; d
++)
111 for (int i
= 0; i
< SHIFTS
; i
++)
113 clear_rvec(f_t
->fshift
[i
]);
115 for (int i
= 0; i
< F_NRE
; i
++)
119 for (int i
= 0; i
< egNR
; i
++)
121 for (int j
= 0; j
< f_t
->grpp
.nener
; j
++)
123 f_t
->grpp
.ener
[i
][j
] = 0;
126 for (int i
= 0; i
< efptNR
; i
++)
132 /*! \brief The max thread number is arbitrary, we used a fixed number
133 * to avoid memory management. Using more than 16 threads is probably
134 * never useful performance wise. */
135 #define MAX_BONDED_THREADS 256
137 /*! \brief Reduce thread-local force buffers */
139 reduce_thread_forces(int n
,
140 gmx::ArrayRef
<gmx::RVec
> force
,
141 const bonded_threading_t
*bt
,
144 if (nthreads
> MAX_BONDED_THREADS
)
146 gmx_fatal(FARGS
, "Can not reduce bonded forces on more than %d threads",
150 rvec
* gmx_restrict f
= as_rvec_array(force
.data());
152 /* This reduction can run on any number of threads,
153 * independently of bt->nthreads.
154 * But if nthreads matches bt->nthreads (which it currently does)
155 * the uniform distribution of the touched blocks over nthreads will
156 * match the distribution of bonded over threads well in most cases,
157 * which means that threads mostly reduce their own data which increases
158 * the number of cache hits.
160 #pragma omp parallel for num_threads(nthreads) schedule(static)
161 for (int b
= 0; b
< bt
->nblock_used
; b
++)
165 int ind
= bt
->block_index
[b
];
166 rvec4
*fp
[MAX_BONDED_THREADS
];
168 /* Determine which threads contribute to this block */
170 for (int ft
= 0; ft
< bt
->nthreads
; ft
++)
172 if (bitmask_is_set(bt
->mask
[ind
], ft
))
174 fp
[nfb
++] = bt
->f_t
[ft
]->f
;
179 /* Reduce force buffers for threads that contribute */
180 int a0
= ind
*reduction_block_size
;
181 int a1
= (ind
+ 1)*reduction_block_size
;
182 /* It would be nice if we could pad f to avoid this min */
183 a1
= std::min(a1
, n
);
184 for (int a
= a0
; a
< a1
; a
++)
186 for (int fb
= 0; fb
< nfb
; fb
++)
188 rvec_inc(f
[a
], fp
[fb
][a
]);
193 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
197 /*! \brief Reduce thread-local forces, shift forces and energies */
199 reduce_thread_output(int n
, gmx::ForceWithShiftForces
*forceWithShiftForces
,
200 real
*ener
, gmx_grppairener_t
*grpp
, real
*dvdl
,
201 const bonded_threading_t
*bt
,
202 const gmx::StepWorkload
&stepWork
)
204 assert(bt
->haveBondeds
);
206 if (bt
->nblock_used
> 0)
208 /* Reduce the bonded force buffer */
209 reduce_thread_forces(n
, forceWithShiftForces
->force(), bt
, bt
->nthreads
);
212 rvec
* gmx_restrict fshift
= as_rvec_array(forceWithShiftForces
->shiftForces().data());
214 /* When necessary, reduce energy and virial using one thread only */
215 if ((stepWork
.computeEnergy
|| stepWork
.computeVirial
|| stepWork
.computeDhdl
) &&
218 gmx::ArrayRef
< const std::unique_ptr
< f_thread_t
>> f_t
= bt
->f_t
;
220 if (stepWork
.computeVirial
)
222 for (int i
= 0; i
< SHIFTS
; i
++)
224 for (int t
= 1; t
< bt
->nthreads
; t
++)
226 rvec_inc(fshift
[i
], f_t
[t
]->fshift
[i
]);
230 if (stepWork
.computeEnergy
)
232 for (int i
= 0; i
< F_NRE
; i
++)
234 for (int t
= 1; t
< bt
->nthreads
; t
++)
236 ener
[i
] += f_t
[t
]->ener
[i
];
239 for (int i
= 0; i
< egNR
; i
++)
241 for (int j
= 0; j
< f_t
[1]->grpp
.nener
; j
++)
243 for (int t
= 1; t
< bt
->nthreads
; t
++)
245 grpp
->ener
[i
][j
] += f_t
[t
]->grpp
.ener
[i
][j
];
250 if (stepWork
.computeDhdl
)
252 for (int i
= 0; i
< efptNR
; i
++)
255 for (int t
= 1; t
< bt
->nthreads
; t
++)
257 dvdl
[i
] += f_t
[t
]->dvdl
[i
];
264 /*! \brief Returns the bonded kernel flavor
266 * Note that energies are always requested when the virial
267 * is requested (performance gain would be small).
268 * Note that currently we do not have bonded kernels that
269 * do not compute forces.
271 BondedKernelFlavor
selectBondedKernelFlavor(const gmx::StepWorkload
&stepWork
,
272 const bool useSimdKernels
,
273 const bool havePerturbedInteractions
)
275 BondedKernelFlavor flavor
;
276 if (stepWork
.computeEnergy
|| stepWork
.computeVirial
)
278 if (stepWork
.computeVirial
)
280 flavor
= BondedKernelFlavor::ForcesAndVirialAndEnergy
;
284 flavor
= BondedKernelFlavor::ForcesAndEnergy
;
289 if (useSimdKernels
&& !havePerturbedInteractions
)
291 flavor
= BondedKernelFlavor::ForcesSimdWhenAvailable
;
295 flavor
= BondedKernelFlavor::ForcesNoSimd
;
302 /*! \brief Calculate one element of the list of bonded interactions
305 calc_one_bond(int thread
,
306 int ftype
, const t_idef
*idef
,
307 const WorkDivision
&workDivision
,
308 const rvec x
[], rvec4 f
[], rvec fshift
[],
309 const t_forcerec
*fr
,
310 const t_pbc
*pbc
, const t_graph
*g
,
311 gmx_grppairener_t
*grpp
,
313 const real
*lambda
, real
*dvdl
,
314 const t_mdatoms
*md
, t_fcdata
*fcd
,
315 const gmx::StepWorkload
&stepWork
,
316 int *global_atom_index
)
318 GMX_ASSERT(idef
->ilsort
== ilsortNO_FE
|| idef
->ilsort
== ilsortFE_SORTED
,
319 "The topology should be marked either as no FE or sorted on FE");
321 const bool havePerturbedInteractions
=
322 (idef
->ilsort
== ilsortFE_SORTED
&&
323 idef
->il
[ftype
].nr_nonperturbed
< idef
->il
[ftype
].nr
);
324 BondedKernelFlavor flavor
=
325 selectBondedKernelFlavor(stepWork
, fr
->use_simd_kernels
, havePerturbedInteractions
);
327 if (IS_RESTRAINT_TYPE(ftype
))
329 efptFTYPE
= efptRESTRAINT
;
333 efptFTYPE
= efptBONDED
;
336 const int nat1
= interaction_function
[ftype
].nratoms
+ 1;
337 const int nbonds
= idef
->il
[ftype
].nr
/nat1
;
338 const t_iatom
*iatoms
= idef
->il
[ftype
].iatoms
;
340 GMX_ASSERT(fr
->gpuBonded
!= nullptr || workDivision
.end(ftype
) == idef
->il
[ftype
].nr
,
341 "The thread division should match the topology");
343 const int nb0
= workDivision
.bound(ftype
, thread
);
344 const int nbn
= workDivision
.bound(ftype
, thread
+ 1) - nb0
;
347 if (!isPairInteraction(ftype
))
351 /* TODO The execution time for CMAP dihedrals might be
352 nice to account to its own subtimer, but first
353 wallcycle needs to be extended to support calling from
355 v
= cmap_dihs(nbn
, iatoms
+nb0
,
356 idef
->iparams
, idef
->cmap_grid
,
358 pbc
, g
, lambda
[efptFTYPE
], &(dvdl
[efptFTYPE
]),
359 md
, fcd
, global_atom_index
);
363 v
= calculateSimpleBond(ftype
, nbn
, iatoms
+ nb0
,
366 pbc
, g
, lambda
[efptFTYPE
], &(dvdl
[efptFTYPE
]),
367 md
, fcd
, global_atom_index
,
373 /* TODO The execution time for pairs might be nice to account
374 to its own subtimer, but first wallcycle needs to be
375 extended to support calling from multiple threads. */
376 do_pairs(ftype
, nbn
, iatoms
+nb0
, idef
->iparams
, x
, f
, fshift
,
377 pbc
, g
, lambda
, dvdl
, md
, fr
,
378 havePerturbedInteractions
, stepWork
,
379 grpp
, global_atom_index
);
384 inc_nrnb(nrnb
, nrnbIndex(ftype
), nbonds
);
392 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
395 calcBondedForces(const t_idef
*idef
,
397 const t_forcerec
*fr
,
398 const t_pbc
*pbc_null
,
400 rvec
*fshiftMasterBuffer
,
401 gmx_enerdata_t
*enerd
,
407 const gmx::StepWorkload
&stepWork
,
408 int *global_atom_index
)
410 bonded_threading_t
*bt
= fr
->bondedThreading
;
412 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
413 for (int thread
= 0; thread
< bt
->nthreads
; thread
++)
417 f_thread_t
&threadBuffers
= *bt
->f_t
[thread
];
423 gmx_grppairener_t
*grpp
;
425 zero_thread_output(&threadBuffers
);
427 rvec4
*ft
= threadBuffers
.f
;
429 /* Thread 0 writes directly to the main output buffers.
430 * We might want to reconsider this.
434 fshift
= fshiftMasterBuffer
;
441 fshift
= threadBuffers
.fshift
;
442 epot
= threadBuffers
.ener
;
443 grpp
= &threadBuffers
.grpp
;
444 dvdlt
= threadBuffers
.dvdl
;
446 /* Loop over all bonded force types to calculate the bonded forces */
447 for (ftype
= 0; (ftype
< F_NRE
); ftype
++)
449 if (idef
->il
[ftype
].nr
> 0 && ftype_is_bonded_potential(ftype
))
451 v
= calc_one_bond(thread
, ftype
, idef
,
452 fr
->bondedThreading
->workDivision
, x
,
453 ft
, fshift
, fr
, pbc_null
, g
, grpp
,
461 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
465 bool haveRestraints(const t_idef
&idef
,
469 ((idef
.il
[F_POSRES
].nr
> 0) ||
470 (idef
.il
[F_FBPOSRES
].nr
> 0) ||
472 fcd
.disres
.nres
> 0);
475 bool haveCpuBondeds(const t_forcerec
&fr
)
477 return fr
.bondedThreading
->haveBondeds
;
480 bool haveCpuListedForces(const t_forcerec
&fr
,
484 return haveCpuBondeds(fr
) || haveRestraints(idef
, fcd
);
487 void calc_listed(const t_commrec
*cr
,
488 const gmx_multisim_t
*ms
,
489 struct gmx_wallcycle
*wcycle
,
491 const rvec x
[], history_t
*hist
,
492 gmx::ForceOutputs
*forceOutputs
,
493 const t_forcerec
*fr
,
494 const struct t_pbc
*pbc
,
495 const struct t_pbc
*pbc_full
,
496 const struct t_graph
*g
,
497 gmx_enerdata_t
*enerd
, t_nrnb
*nrnb
,
500 t_fcdata
*fcd
, int *global_atom_index
,
501 const gmx::StepWorkload
&stepWork
)
503 const t_pbc
*pbc_null
;
504 bonded_threading_t
*bt
= fr
->bondedThreading
;
515 if (haveRestraints(*idef
, *fcd
))
517 /* TODO Use of restraints triggers further function calls
518 inside the loop over calc_one_bond(), but those are too
519 awkward to account to this subtimer properly in the present
520 code. We don't test / care much about performance with
521 restraints, anyway. */
522 wallcycle_sub_start(wcycle
, ewcsRESTRAINTS
);
524 if (idef
->il
[F_POSRES
].nr
> 0)
526 posres_wrapper(nrnb
, idef
, pbc_full
, x
, enerd
, lambda
, fr
,
527 &forceOutputs
->forceWithVirial());
530 if (idef
->il
[F_FBPOSRES
].nr
> 0)
532 fbposres_wrapper(nrnb
, idef
, pbc_full
, x
, enerd
, fr
,
533 &forceOutputs
->forceWithVirial());
536 /* Do pre force calculation stuff which might require communication */
537 if (fcd
->orires
.nr
> 0)
539 /* This assertion is to ensure we have whole molecules.
540 * Unfortunately we do not have an mdrun state variable that tells
541 * us if molecules in x are not broken over PBC, so we have to make
542 * do with checking graph!=nullptr, which should tell us if we made
543 * molecules whole before calling the current function.
545 GMX_RELEASE_ASSERT(fr
->ePBC
== epbcNONE
|| g
!= nullptr, "With orientation restraints molecules should be whole");
546 enerd
->term
[F_ORIRESDEV
] =
547 calc_orires_dev(ms
, idef
->il
[F_ORIRES
].nr
,
548 idef
->il
[F_ORIRES
].iatoms
,
549 idef
->iparams
, md
, x
,
550 pbc_null
, fcd
, hist
);
552 if (fcd
->disres
.nres
> 0)
554 calc_disres_R_6(cr
, ms
,
555 idef
->il
[F_DISRES
].nr
,
556 idef
->il
[F_DISRES
].iatoms
,
561 wallcycle_sub_stop(wcycle
, ewcsRESTRAINTS
);
564 if (haveCpuBondeds(*fr
))
566 gmx::ForceWithShiftForces
&forceWithShiftForces
= forceOutputs
->forceWithShiftForces();
568 wallcycle_sub_start(wcycle
, ewcsLISTED
);
569 /* The dummy array is to have a place to store the dhdl at other values
570 of lambda, which will be thrown away in the end */
571 real dvdl
[efptNR
] = {0};
572 calcBondedForces(idef
, x
, fr
, pbc_null
, g
,
573 as_rvec_array(forceWithShiftForces
.shiftForces().data()),
574 enerd
, nrnb
, lambda
, dvdl
, md
,
575 fcd
, stepWork
, global_atom_index
);
576 wallcycle_sub_stop(wcycle
, ewcsLISTED
);
578 wallcycle_sub_start(wcycle
, ewcsLISTED_BUF_OPS
);
579 reduce_thread_output(fr
->natoms_force
, &forceWithShiftForces
,
580 enerd
->term
, &enerd
->grpp
, dvdl
,
584 if (stepWork
.computeDhdl
)
586 for (int i
= 0; i
< efptNR
; i
++)
588 enerd
->dvdl_nonlin
[i
] += dvdl
[i
];
591 wallcycle_sub_stop(wcycle
, ewcsLISTED_BUF_OPS
);
594 /* Copy the sum of violations for the distance restraints from fcd */
597 enerd
->term
[F_DISRESVIOL
] = fcd
->disres
.sumviol
;
601 void calc_listed_lambda(const t_idef
*idef
,
603 const t_forcerec
*fr
,
604 const struct t_pbc
*pbc
, const struct t_graph
*g
,
605 gmx_grppairener_t
*grpp
, real
*epot
, t_nrnb
*nrnb
,
609 int *global_atom_index
)
612 real dvdl_dum
[efptNR
] = {0};
615 const t_pbc
*pbc_null
;
617 WorkDivision
&workDivision
= fr
->bondedThreading
->foreignLambdaWorkDivision
;
628 /* Copy the whole idef, so we can modify the contents locally */
631 /* We already have the forces, so we use temp buffers here */
632 // TODO: Get rid of these allocations by using permanent force buffers
633 snew(f
, fr
->natoms_force
);
634 snew(fshift
, SHIFTS
);
636 /* Loop over all bonded force types to calculate the bonded energies */
637 for (int ftype
= 0; (ftype
< F_NRE
); ftype
++)
639 if (ftype_is_bonded_potential(ftype
))
641 const t_ilist
&ilist
= idef
->il
[ftype
];
642 /* Create a temporary t_ilist with only perturbed interactions */
643 t_ilist
&ilist_fe
= idef_fe
.il
[ftype
];
644 ilist_fe
.iatoms
= ilist
.iatoms
+ ilist
.nr_nonperturbed
;
645 ilist_fe
.nr_nonperturbed
= 0;
646 ilist_fe
.nr
= ilist
.nr
- ilist
.nr_nonperturbed
;
647 /* Set the work range of thread 0 to the perturbed bondeds */
648 workDivision
.setBound(ftype
, 0, 0);
649 workDivision
.setBound(ftype
, 1, ilist_fe
.nr
);
653 gmx::StepWorkload tempFlags
;
654 tempFlags
.computeEnergy
= true;
655 v
= calc_one_bond(0, ftype
, &idef_fe
, workDivision
,
656 x
, f
, fshift
, fr
, pbc_null
, g
,
657 grpp
, nrnb
, lambda
, dvdl_dum
,
670 do_force_listed(struct gmx_wallcycle
*wcycle
,
672 const t_lambda
*fepvals
,
674 const gmx_multisim_t
*ms
,
678 gmx::ForceOutputs
*forceOutputs
,
679 const t_forcerec
*fr
,
680 const struct t_pbc
*pbc
,
681 const struct t_graph
*graph
,
682 gmx_enerdata_t
*enerd
,
687 int *global_atom_index
,
688 const gmx::StepWorkload
&stepWork
)
690 t_pbc pbc_full
; /* Full PBC is needed for position restraints */
692 if (!stepWork
.computeListedForces
)
697 if ((idef
->il
[F_POSRES
].nr
> 0) ||
698 (idef
->il
[F_FBPOSRES
].nr
> 0))
700 /* Not enough flops to bother counting */
701 set_pbc(&pbc_full
, fr
->ePBC
, box
);
703 calc_listed(cr
, ms
, wcycle
, idef
, x
, hist
,
706 graph
, enerd
, nrnb
, lambda
, md
, fcd
,
707 global_atom_index
, stepWork
);
709 /* Check if we have to determine energy differences
710 * at foreign lambda's.
712 if (fepvals
->n_lambda
> 0 && stepWork
.computeDhdl
)
714 posres_wrapper_lambda(wcycle
, fepvals
, idef
, &pbc_full
, x
, enerd
, lambda
, fr
);
716 if (idef
->ilsort
!= ilsortNO_FE
)
718 wallcycle_sub_start(wcycle
, ewcsLISTED_FEP
);
719 if (idef
->ilsort
!= ilsortFE_SORTED
)
721 gmx_incons("The bonded interactions are not sorted for free energy");
723 for (size_t i
= 0; i
< enerd
->enerpart_lambda
.size(); i
++)
727 reset_foreign_enerdata(enerd
);
728 for (int j
= 0; j
< efptNR
; j
++)
730 lam_i
[j
] = (i
== 0 ? lambda
[j
] : fepvals
->all_lambda
[j
][i
-1]);
732 calc_listed_lambda(idef
, x
, fr
, pbc
, graph
, &(enerd
->foreign_grpp
), enerd
->foreign_term
, nrnb
, lam_i
, md
,
733 fcd
, global_atom_index
);
734 sum_epot(&(enerd
->foreign_grpp
), enerd
->foreign_term
);
735 enerd
->enerpart_lambda
[i
] += enerd
->foreign_term
[F_EPOT
];
737 wallcycle_sub_stop(wcycle
, ewcsLISTED_FEP
);