2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief This file defines high-level functions for mdrun to compute
38 * energies and forces for listed interactions.
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
42 * \ingroup module_listed-forces
46 #include "listed-forces.h"
52 #include "gromacs/gmxlib/network.h"
53 #include "gromacs/gmxlib/nrnb.h"
54 #include "gromacs/listed-forces/bonded.h"
55 #include "gromacs/listed-forces/disre.h"
56 #include "gromacs/listed-forces/orires.h"
57 #include "gromacs/listed-forces/pairs.h"
58 #include "gromacs/listed-forces/position-restraints.h"
59 #include "gromacs/math/vec.h"
60 #include "gromacs/mdlib/force.h"
61 #include "gromacs/mdlib/force_flags.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/fcdata.h"
64 #include "gromacs/mdtypes/forcerec.h"
65 #include "gromacs/mdtypes/inputrec.h"
66 #include "gromacs/mdtypes/md_enums.h"
67 #include "gromacs/pbcutil/ishift.h"
68 #include "gromacs/pbcutil/pbc.h"
69 #include "gromacs/simd/simd.h"
70 #include "gromacs/timing/wallcycle.h"
71 #include "gromacs/topology/topology.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/fatalerror.h"
74 #include "gromacs/utility/smalloc.h"
76 #include "listed-internal.h"
81 /*! \brief Return true if ftype is an explicit pair-listed LJ or
82 * COULOMB interaction type: bonded LJ (usually 1-4), or special
83 * listed non-bonded for FEP. */
85 isPairInteraction(int ftype
)
87 return ((ftype
) >= F_LJ14
&& (ftype
) <= F_LJC_PAIRS_NB
);
90 /*! \brief Zero thread-local output buffers */
92 zero_thread_output(struct bonded_threading_t
*bt
, int thread
)
99 f_thread_t
*f_t
= &bt
->f_t
[thread
];
100 const int nelem_fa
= sizeof(*f_t
->f
)/sizeof(real
);
102 for (int i
= 0; i
< f_t
->nblock_used
; i
++)
104 int a0
= f_t
->block_index
[i
]*reduction_block_size
;
105 int a1
= a0
+ reduction_block_size
;
106 for (int a
= a0
; a
< a1
; a
++)
108 for (int d
= 0; d
< nelem_fa
; d
++)
115 for (int i
= 0; i
< SHIFTS
; i
++)
117 clear_rvec(f_t
->fshift
[i
]);
119 for (int i
= 0; i
< F_NRE
; i
++)
123 for (int i
= 0; i
< egNR
; i
++)
125 for (int j
= 0; j
< f_t
->grpp
.nener
; j
++)
127 f_t
->grpp
.ener
[i
][j
] = 0;
130 for (int i
= 0; i
< efptNR
; i
++)
136 /*! \brief The max thread number is arbitrary, we used a fixed number
137 * to avoid memory management. Using more than 16 threads is probably
138 * never useful performance wise. */
139 #define MAX_BONDED_THREADS 256
141 /*! \brief Reduce thread-local force buffers */
143 reduce_thread_forces(int n
, rvec
*f
,
144 struct bonded_threading_t
*bt
,
147 if (nthreads
> MAX_BONDED_THREADS
)
149 gmx_fatal(FARGS
, "Can not reduce bonded forces on more than %d threads",
153 /* This reduction can run on any number of threads,
154 * independently of bt->nthreads.
155 * But if nthreads matches bt->nthreads (which it currently does)
156 * the uniform distribution of the touched blocks over nthreads will
157 * match the distribution of bonded over threads well in most cases,
158 * which means that threads mostly reduce their own data which increases
159 * the number of cache hits.
161 #pragma omp parallel for num_threads(nthreads) schedule(static)
162 for (int b
= 0; b
< bt
->nblock_used
; b
++)
166 int ind
= bt
->block_index
[b
];
167 rvec4
*fp
[MAX_BONDED_THREADS
];
169 /* Determine which threads contribute to this block */
171 for (int ft
= 0; ft
< bt
->nthreads
; ft
++)
173 if (bitmask_is_set(bt
->mask
[ind
], ft
))
175 fp
[nfb
++] = bt
->f_t
[ft
].f
;
180 /* Reduce force buffers for threads that contribute */
181 int a0
= ind
*reduction_block_size
;
182 int a1
= (ind
+ 1)*reduction_block_size
;
183 /* It would be nice if we could pad f to avoid this min */
184 a1
= std::min(a1
, n
);
185 for (int a
= a0
; a
< a1
; a
++)
187 for (int fb
= 0; fb
< nfb
; fb
++)
189 rvec_inc(f
[a
], fp
[fb
][a
]);
194 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
198 /*! \brief Reduce thread-local forces, shift forces and energies */
200 reduce_thread_output(int n
, rvec
*f
, rvec
*fshift
,
201 real
*ener
, gmx_grppairener_t
*grpp
, real
*dvdl
,
202 struct bonded_threading_t
*bt
,
203 gmx_bool bCalcEnerVir
,
206 assert(bt
->haveBondeds
);
208 if (bt
->nblock_used
> 0)
210 /* Reduce the bonded force buffer */
211 reduce_thread_forces(n
, f
, bt
, bt
->nthreads
);
214 /* When necessary, reduce energy and virial using one thread only */
215 if (bCalcEnerVir
&& bt
->nthreads
> 1)
217 f_thread_t
*f_t
= bt
->f_t
;
219 for (int i
= 0; i
< SHIFTS
; i
++)
221 for (int t
= 1; t
< bt
->nthreads
; t
++)
223 rvec_inc(fshift
[i
], f_t
[t
].fshift
[i
]);
226 for (int i
= 0; i
< F_NRE
; i
++)
228 for (int t
= 1; t
< bt
->nthreads
; t
++)
230 ener
[i
] += f_t
[t
].ener
[i
];
233 for (int i
= 0; i
< egNR
; i
++)
235 for (int j
= 0; j
< f_t
[1].grpp
.nener
; j
++)
237 for (int t
= 1; t
< bt
->nthreads
; t
++)
239 grpp
->ener
[i
][j
] += f_t
[t
].grpp
.ener
[i
][j
];
245 for (int i
= 0; i
< efptNR
; i
++)
248 for (int t
= 1; t
< bt
->nthreads
; t
++)
250 dvdl
[i
] += f_t
[t
].dvdl
[i
];
257 /*! \brief Calculate one element of the list of bonded interactions
260 calc_one_bond(int thread
,
261 int ftype
, const t_idef
*idef
,
262 const rvec x
[], rvec4 f
[], rvec fshift
[],
263 const t_forcerec
*fr
,
264 const t_pbc
*pbc
, const t_graph
*g
,
265 gmx_grppairener_t
*grpp
,
267 const real
*lambda
, real
*dvdl
,
268 const t_mdatoms
*md
, t_fcdata
*fcd
,
269 gmx_bool bCalcEnerVir
,
270 int *global_atom_index
)
272 #if GMX_SIMD_HAVE_REAL
273 bool bUseSIMD
= fr
->use_simd_kernels
;
276 int nat1
, nbonds
, efptFTYPE
;
281 if (IS_RESTRAINT_TYPE(ftype
))
283 efptFTYPE
= efptRESTRAINT
;
287 efptFTYPE
= efptBONDED
;
290 GMX_ASSERT(fr
->efep
== efepNO
|| idef
->ilsort
== ilsortNO_FE
|| idef
->ilsort
== ilsortFE_SORTED
, "With free-energy calculations, we should either have no perturbed bondeds or sorted perturbed bondeds");
291 const bool useFreeEnergy
= (idef
->ilsort
== ilsortFE_SORTED
&& idef
->il
[ftype
].nr_nonperturbed
< idef
->il
[ftype
].nr
);
292 const bool computeForcesOnly
= (!bCalcEnerVir
&& !useFreeEnergy
);
294 nat1
= interaction_function
[ftype
].nratoms
+ 1;
295 nbonds
= idef
->il
[ftype
].nr
/nat1
;
296 iatoms
= idef
->il
[ftype
].iatoms
;
298 nb0
= idef
->il_thread_division
[ftype
*(idef
->nthreads
+1)+thread
];
299 nbn
= idef
->il_thread_division
[ftype
*(idef
->nthreads
+1)+thread
+1] - nb0
;
301 if (!isPairInteraction(ftype
))
305 /* TODO The execution time for CMAP dihedrals might be
306 nice to account to its own subtimer, but first
307 wallcycle needs to be extended to support calling from
309 v
= cmap_dihs(nbn
, iatoms
+nb0
,
310 idef
->iparams
, &idef
->cmap_grid
,
312 pbc
, g
, lambda
[efptFTYPE
], &(dvdl
[efptFTYPE
]),
313 md
, fcd
, global_atom_index
);
315 #if GMX_SIMD_HAVE_REAL
316 else if (ftype
== F_ANGLES
&& bUseSIMD
&& computeForcesOnly
)
318 /* No energies, shift forces, dvdl */
319 angles_noener_simd(nbn
, idef
->il
[ftype
].iatoms
+nb0
,
322 pbc
, g
, lambda
[efptFTYPE
], md
, fcd
,
327 else if (ftype
== F_UREY_BRADLEY
&& bUseSIMD
&& computeForcesOnly
)
329 /* No energies, shift forces, dvdl */
330 urey_bradley_noener_simd(nbn
, idef
->il
[ftype
].iatoms
+nb0
,
333 pbc
, g
, lambda
[efptFTYPE
], md
, fcd
,
338 else if (ftype
== F_PDIHS
&& computeForcesOnly
)
340 /* No energies, shift forces, dvdl */
341 #if GMX_SIMD_HAVE_REAL
344 pdihs_noener_simd(nbn
, idef
->il
[ftype
].iatoms
+nb0
,
347 pbc
, g
, lambda
[efptFTYPE
], md
, fcd
,
353 pdihs_noener(nbn
, idef
->il
[ftype
].iatoms
+nb0
,
356 pbc
, g
, lambda
[efptFTYPE
], md
, fcd
,
361 #if GMX_SIMD_HAVE_REAL
362 else if (ftype
== F_RBDIHS
&& bUseSIMD
&& computeForcesOnly
)
364 /* No energies, shift forces, dvdl */
365 rbdihs_noener_simd(nbn
, idef
->il
[ftype
].iatoms
+nb0
,
368 pbc
, g
, lambda
[efptFTYPE
], md
, fcd
,
375 v
= interaction_function
[ftype
].ifunc(nbn
, iatoms
+nb0
,
378 pbc
, g
, lambda
[efptFTYPE
], &(dvdl
[efptFTYPE
]),
379 md
, fcd
, global_atom_index
);
384 /* TODO The execution time for pairs might be nice to account
385 to its own subtimer, but first wallcycle needs to be
386 extended to support calling from multiple threads. */
387 do_pairs(ftype
, nbn
, iatoms
+nb0
, idef
->iparams
, x
, f
, fshift
,
388 pbc
, g
, lambda
, dvdl
, md
, fr
,
389 bCalcEnerVir
, grpp
, global_atom_index
);
395 inc_nrnb(nrnb
, interaction_function
[ftype
].nrnb_ind
, nbonds
);
404 ftype_is_bonded_potential(int ftype
)
407 (interaction_function
[ftype
].flags
& IF_BOND
) &&
408 !(ftype
== F_CONNBONDS
|| ftype
== F_POSRES
|| ftype
== F_FBPOSRES
);
411 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
414 calcBondedForces(const t_idef
*idef
,
416 const t_forcerec
*fr
,
417 const t_pbc
*pbc_null
,
419 gmx_enerdata_t
*enerd
,
425 gmx_bool bCalcEnerVir
,
426 int *global_atom_index
)
428 struct bonded_threading_t
*bt
= fr
->bonded_threading
;
430 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
431 for (int thread
= 0; thread
< bt
->nthreads
; thread
++)
441 gmx_grppairener_t
*grpp
;
443 zero_thread_output(bt
, thread
);
445 ft
= bt
->f_t
[thread
].f
;
456 fshift
= bt
->f_t
[thread
].fshift
;
457 epot
= bt
->f_t
[thread
].ener
;
458 grpp
= &bt
->f_t
[thread
].grpp
;
459 dvdlt
= bt
->f_t
[thread
].dvdl
;
461 /* Loop over all bonded force types to calculate the bonded forces */
462 for (ftype
= 0; (ftype
< F_NRE
); ftype
++)
464 if (idef
->il
[ftype
].nr
> 0 && ftype_is_bonded_potential(ftype
))
466 v
= calc_one_bond(thread
, ftype
, idef
, x
,
467 ft
, fshift
, fr
, pbc_null
, g
, grpp
,
469 md
, fcd
, bCalcEnerVir
,
475 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
479 void calc_listed(const t_commrec
*cr
,
480 const gmx_multisim_t
*ms
,
481 struct gmx_wallcycle
*wcycle
,
483 const rvec x
[], history_t
*hist
,
485 gmx::ForceWithVirial
*forceWithVirial
,
486 const t_forcerec
*fr
,
487 const struct t_pbc
*pbc
,
488 const struct t_pbc
*pbc_full
,
489 const struct t_graph
*g
,
490 gmx_enerdata_t
*enerd
, t_nrnb
*nrnb
,
493 t_fcdata
*fcd
, int *global_atom_index
,
496 struct bonded_threading_t
*bt
;
497 gmx_bool bCalcEnerVir
;
498 const t_pbc
*pbc_null
;
500 bt
= fr
->bonded_threading
;
502 assert(bt
->nthreads
== idef
->nthreads
);
504 bCalcEnerVir
= (force_flags
& (GMX_FORCE_VIRIAL
| GMX_FORCE_ENERGY
));
515 if ((idef
->il
[F_POSRES
].nr
> 0) ||
516 (idef
->il
[F_FBPOSRES
].nr
> 0) ||
517 fcd
->orires
.nr
> 0 ||
518 fcd
->disres
.nres
> 0)
520 /* TODO Use of restraints triggers further function calls
521 inside the loop over calc_one_bond(), but those are too
522 awkward to account to this subtimer properly in the present
523 code. We don't test / care much about performance with
524 restraints, anyway. */
525 wallcycle_sub_start(wcycle
, ewcsRESTRAINTS
);
527 if (idef
->il
[F_POSRES
].nr
> 0)
529 posres_wrapper(nrnb
, idef
, pbc_full
, x
, enerd
, lambda
, fr
,
533 if (idef
->il
[F_FBPOSRES
].nr
> 0)
535 fbposres_wrapper(nrnb
, idef
, pbc_full
, x
, enerd
, fr
,
539 /* Do pre force calculation stuff which might require communication */
540 if (fcd
->orires
.nr
> 0)
542 /* This assertion is to ensure we have whole molecules.
543 * Unfortunately we do not have an mdrun state variable that tells
544 * us if molecules in x are not broken over PBC, so we have to make
545 * do with checking graph!=nullptr, which should tell us if we made
546 * molecules whole before calling the current function.
548 GMX_RELEASE_ASSERT(fr
->ePBC
== epbcNONE
|| g
!= nullptr, "With orientation restraints molecules should be whole");
549 enerd
->term
[F_ORIRESDEV
] =
550 calc_orires_dev(ms
, idef
->il
[F_ORIRES
].nr
,
551 idef
->il
[F_ORIRES
].iatoms
,
552 idef
->iparams
, md
, x
,
553 pbc_null
, fcd
, hist
);
555 if (fcd
->disres
.nres
> 0)
557 calc_disres_R_6(cr
, ms
,
558 idef
->il
[F_DISRES
].nr
,
559 idef
->il
[F_DISRES
].iatoms
,
564 wallcycle_sub_stop(wcycle
, ewcsRESTRAINTS
);
569 wallcycle_sub_start(wcycle
, ewcsLISTED
);
570 /* The dummy array is to have a place to store the dhdl at other values
571 of lambda, which will be thrown away in the end */
572 real dvdl
[efptNR
] = {0};
573 calcBondedForces(idef
, x
, fr
, pbc_null
, g
, enerd
, nrnb
, lambda
, dvdl
, md
,
574 fcd
, bCalcEnerVir
, global_atom_index
);
575 wallcycle_sub_stop(wcycle
, ewcsLISTED
);
577 wallcycle_sub_start(wcycle
, ewcsLISTED_BUF_OPS
);
578 reduce_thread_output(fr
->natoms_force
, f
, fr
->fshift
,
579 enerd
->term
, &enerd
->grpp
, dvdl
,
582 force_flags
& GMX_FORCE_DHDL
);
584 if (force_flags
& GMX_FORCE_DHDL
)
586 for (int i
= 0; i
< efptNR
; i
++)
588 enerd
->dvdl_nonlin
[i
] += dvdl
[i
];
591 wallcycle_sub_stop(wcycle
, ewcsLISTED_BUF_OPS
);
594 /* Copy the sum of violations for the distance restraints from fcd */
597 enerd
->term
[F_DISRESVIOL
] = fcd
->disres
.sumviol
;
601 void calc_listed_lambda(const t_idef
*idef
,
603 const t_forcerec
*fr
,
604 const struct t_pbc
*pbc
, const struct t_graph
*g
,
605 gmx_grppairener_t
*grpp
, real
*epot
, t_nrnb
*nrnb
,
609 int *global_atom_index
)
611 int ftype
, nr_nonperturbed
, nr
;
613 real dvdl_dum
[efptNR
] = {0};
616 const t_pbc
*pbc_null
;
628 /* Copy the whole idef, so we can modify the contents locally */
630 idef_fe
.nthreads
= 1;
631 snew(idef_fe
.il_thread_division
, F_NRE
*(idef_fe
.nthreads
+1));
633 /* We already have the forces, so we use temp buffers here */
634 snew(f
, fr
->natoms_force
);
635 snew(fshift
, SHIFTS
);
637 /* Loop over all bonded force types to calculate the bonded energies */
638 for (ftype
= 0; (ftype
< F_NRE
); ftype
++)
640 if (ftype_is_bonded_potential(ftype
))
642 /* Set the work range of thread 0 to the perturbed bondeds only */
643 nr_nonperturbed
= idef
->il
[ftype
].nr_nonperturbed
;
644 nr
= idef
->il
[ftype
].nr
;
645 idef_fe
.il_thread_division
[ftype
*2+0] = nr_nonperturbed
;
646 idef_fe
.il_thread_division
[ftype
*2+1] = nr
;
648 /* This is only to get the flop count correct */
649 idef_fe
.il
[ftype
].nr
= nr
- nr_nonperturbed
;
651 if (nr
- nr_nonperturbed
> 0)
653 v
= calc_one_bond(0, ftype
, &idef_fe
,
654 x
, f
, fshift
, fr
, pbc_null
, g
,
655 grpp
, nrnb
, lambda
, dvdl_dum
,
666 sfree(idef_fe
.il_thread_division
);
670 do_force_listed(struct gmx_wallcycle
*wcycle
,
672 const t_lambda
*fepvals
,
674 const gmx_multisim_t
*ms
,
678 rvec
*forceForUseWithShiftForces
,
679 gmx::ForceWithVirial
*forceWithVirial
,
680 const t_forcerec
*fr
,
681 const struct t_pbc
*pbc
,
682 const struct t_graph
*graph
,
683 gmx_enerdata_t
*enerd
,
688 int *global_atom_index
,
691 t_pbc pbc_full
; /* Full PBC is needed for position restraints */
693 if (!(flags
& GMX_FORCE_LISTED
))
698 if ((idef
->il
[F_POSRES
].nr
> 0) ||
699 (idef
->il
[F_FBPOSRES
].nr
> 0))
701 /* Not enough flops to bother counting */
702 set_pbc(&pbc_full
, fr
->ePBC
, box
);
704 calc_listed(cr
, ms
, wcycle
, idef
, x
, hist
,
705 forceForUseWithShiftForces
, forceWithVirial
,
707 graph
, enerd
, nrnb
, lambda
, md
, fcd
,
708 global_atom_index
, flags
);
710 /* Check if we have to determine energy differences
711 * at foreign lambda's.
713 if (fepvals
->n_lambda
> 0 && (flags
& GMX_FORCE_DHDL
))
715 posres_wrapper_lambda(wcycle
, fepvals
, idef
, &pbc_full
, x
, enerd
, lambda
, fr
);
717 if (idef
->ilsort
!= ilsortNO_FE
)
719 wallcycle_sub_start(wcycle
, ewcsLISTED_FEP
);
720 if (idef
->ilsort
!= ilsortFE_SORTED
)
722 gmx_incons("The bonded interactions are not sorted for free energy");
724 for (int i
= 0; i
< enerd
->n_lambda
; i
++)
728 reset_foreign_enerdata(enerd
);
729 for (int j
= 0; j
< efptNR
; j
++)
731 lam_i
[j
] = (i
== 0 ? lambda
[j
] : fepvals
->all_lambda
[j
][i
-1]);
733 calc_listed_lambda(idef
, x
, fr
, pbc
, graph
, &(enerd
->foreign_grpp
), enerd
->foreign_term
, nrnb
, lam_i
, md
,
734 fcd
, global_atom_index
);
735 sum_epot(&(enerd
->foreign_grpp
), enerd
->foreign_term
);
736 enerd
->enerpart_lambda
[i
] += enerd
->foreign_term
[F_EPOT
];
738 wallcycle_sub_stop(wcycle
, ewcsLISTED_FEP
);