2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/domdec/dlbtiming.h"
46 #include "gromacs/domdec/domdec.h"
47 #include "gromacs/domdec/domdec_struct.h"
48 #include "gromacs/ewald/ewald.h"
49 #include "gromacs/ewald/long_range_correction.h"
50 #include "gromacs/ewald/pme.h"
51 #include "gromacs/gmxlib/network.h"
52 #include "gromacs/gmxlib/nrnb.h"
53 #include "gromacs/listed_forces/listed_forces.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/math/vecdump.h"
56 #include "gromacs/mdlib/force_flags.h"
57 #include "gromacs/mdlib/forcerec_threading.h"
58 #include "gromacs/mdlib/qmmm.h"
59 #include "gromacs/mdlib/rf_util.h"
60 #include "gromacs/mdlib/wall.h"
61 #include "gromacs/mdtypes/commrec.h"
62 #include "gromacs/mdtypes/enerdata.h"
63 #include "gromacs/mdtypes/forceoutput.h"
64 #include "gromacs/mdtypes/forcerec.h"
65 #include "gromacs/mdtypes/inputrec.h"
66 #include "gromacs/mdtypes/md_enums.h"
67 #include "gromacs/mdtypes/mdatom.h"
68 #include "gromacs/pbcutil/ishift.h"
69 #include "gromacs/pbcutil/mshift.h"
70 #include "gromacs/pbcutil/pbc.h"
71 #include "gromacs/timing/wallcycle.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/fatalerror.h"
74 #include "gromacs/utility/smalloc.h"
76 static void clearEwaldThreadOutput(ewald_corr_thread_t
*ewc_t
)
80 ewc_t
->dvdl
[efptCOUL
] = 0;
81 ewc_t
->dvdl
[efptVDW
] = 0;
82 clear_mat(ewc_t
->vir_q
);
83 clear_mat(ewc_t
->vir_lj
);
86 static void reduceEwaldThreadOuput(int nthreads
, ewald_corr_thread_t
*ewc_t
)
88 ewald_corr_thread_t
&dest
= ewc_t
[0];
90 for (int t
= 1; t
< nthreads
; t
++)
92 dest
.Vcorr_q
+= ewc_t
[t
].Vcorr_q
;
93 dest
.Vcorr_lj
+= ewc_t
[t
].Vcorr_lj
;
94 dest
.dvdl
[efptCOUL
] += ewc_t
[t
].dvdl
[efptCOUL
];
95 dest
.dvdl
[efptVDW
] += ewc_t
[t
].dvdl
[efptVDW
];
96 m_add(dest
.vir_q
, ewc_t
[t
].vir_q
, dest
.vir_q
);
97 m_add(dest
.vir_lj
, ewc_t
[t
].vir_lj
, dest
.vir_lj
);
102 do_force_lowlevel(t_forcerec
*fr
,
103 const t_inputrec
*ir
,
106 const gmx_multisim_t
*ms
,
108 gmx_wallcycle_t wcycle
,
110 gmx::ArrayRefWithPadding
<gmx::RVec
> coordinates
,
112 gmx::ForceOutputs
*forceOutputs
,
113 gmx_enerdata_t
*enerd
,
117 const t_graph
*graph
,
120 const DDBalanceRegionHandler
&ddBalanceRegionHandler
)
122 // TODO: Replace all uses of x by const coordinates
123 rvec
*x
= as_rvec_array(coordinates
.paddedArrayRef().data());
125 // TODO: Add the shift forces to forceOutputs
126 rvec
*forceForUseWithShiftForces
= forceOutputs
->f();
127 auto &forceWithVirial
= forceOutputs
->forceWithVirial();
129 /* do QMMM first if requested */
132 enerd
->term
[F_EQM
] = calculate_QMMM(cr
, forceForUseWithShiftForces
, fr
);
135 /* Call the short range functions all in one go. */
139 /* foreign lambda component for walls */
140 real dvdl_walls
= do_walls(*ir
, *fr
, box
, *md
, x
,
141 &forceWithVirial
, lambda
[efptVDW
],
142 enerd
->grpp
.ener
[egLJSR
].data(), nrnb
);
143 enerd
->dvdl_lin
[efptVDW
] += dvdl_walls
;
146 /* Shift the coordinates. Must be done before listed forces and PPPM,
147 * but is also necessary for SHAKE and update, therefore it can NOT
148 * go when no listed forces have to be evaluated.
150 * The shifting and PBC code is deliberately not timed, since with
151 * the Verlet scheme it only takes non-zero time with triclinic
152 * boxes, and even then the time is around a factor of 100 less
153 * than the next smallest counter.
157 /* Here sometimes we would not need to shift with NBFonly,
158 * but we do so anyhow for consistency of the returned coordinates.
162 shift_self(graph
, box
, x
);
165 inc_nrnb(nrnb
, eNR_SHIFTX
, 2*graph
->nnodes
);
169 inc_nrnb(nrnb
, eNR_SHIFTX
, graph
->nnodes
);
176 /* Check whether we need to take into account PBC in listed interactions. */
177 const auto needPbcForListedForces
= fr
->bMolPBC
&& bool(flags
& GMX_FORCE_LISTED
) && haveCpuListedForces(*fr
, *idef
, *fcd
);
178 if (needPbcForListedForces
)
180 /* Since all atoms are in the rectangular or triclinic unit-cell,
181 * only single box vector shifts (2 in x) are required.
183 set_pbc_dd(&pbc
, fr
->ePBC
, DOMAINDECOMP(cr
) ? cr
->dd
->nc
: nullptr,
187 do_force_listed(wcycle
, box
, ir
->fepvals
, cr
, ms
,
189 forceForUseWithShiftForces
, &forceWithVirial
,
190 fr
, &pbc
, graph
, enerd
, nrnb
, lambda
, md
, fcd
,
191 DOMAINDECOMP(cr
) ? cr
->dd
->globalAtomIndices
.data() : nullptr,
195 const bool computePmeOnCpu
=
196 (EEL_PME(fr
->ic
->eeltype
) || EVDW_PME(fr
->ic
->vdwtype
)) &&
197 thisRankHasDuty(cr
, DUTY_PME
) &&
198 (pme_run_mode(fr
->pmedata
) == PmeRunMode::CPU
);
200 const bool haveEwaldSurfaceTerms
=
201 EEL_PME_EWALD(fr
->ic
->eeltype
) &&
202 (ir
->ewald_geometry
!= eewg3D
|| ir
->epsilon_surface
!= 0);
204 /* Do long-range electrostatics and/or LJ-PME
205 * and compute PME surface terms when necessary.
207 if (computePmeOnCpu
||
208 fr
->ic
->eeltype
== eelEWALD
||
209 haveEwaldSurfaceTerms
)
212 real Vlr_q
= 0, Vlr_lj
= 0;
214 /* We reduce all virial, dV/dlambda and energy contributions, except
215 * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct.
217 ewald_corr_thread_t
&ewaldOutput
= fr
->ewc_t
[0];
218 clearEwaldThreadOutput(&ewaldOutput
);
220 if (EEL_PME_EWALD(fr
->ic
->eeltype
) || EVDW_PME(fr
->ic
->vdwtype
))
222 /* Calculate Ewald surface terms, when necessary */
223 if (haveEwaldSurfaceTerms
)
225 wallcycle_sub_start(wcycle
, ewcsEWALD_CORRECTION
);
229 gmx_fatal(FARGS
, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
232 int nthreads
= fr
->nthread_ewc
;
233 #pragma omp parallel for num_threads(nthreads) schedule(static)
234 for (int t
= 0; t
< nthreads
; t
++)
238 ewald_corr_thread_t
&ewc_t
= fr
->ewc_t
[t
];
241 clearEwaldThreadOutput(&ewc_t
);
244 /* Threading is only supported with the Verlet cut-off
245 * scheme and then only single particle forces (no
246 * exclusion forces) are calculated, so we can store
247 * the forces in the normal, single forceWithVirial->force_ array.
249 ewald_LRcorrection(md
->homenr
, cr
, nthreads
, t
, fr
, ir
,
250 md
->chargeA
, md
->chargeB
,
251 (md
->nChargePerturbed
!= 0),
255 as_rvec_array(forceWithVirial
.force_
.data()),
258 &ewc_t
.dvdl
[efptCOUL
]);
260 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
;
264 reduceEwaldThreadOuput(nthreads
, fr
->ewc_t
);
266 wallcycle_sub_stop(wcycle
, ewcsEWALD_CORRECTION
);
269 if (EEL_PME_EWALD(fr
->ic
->eeltype
) && fr
->n_tpi
== 0)
271 /* This is not in a subcounter because it takes a
272 negligible and constant-sized amount of time */
273 ewaldOutput
.Vcorr_q
+=
274 ewald_charge_correction(cr
, fr
, lambda
[efptCOUL
], box
,
275 &ewaldOutput
.dvdl
[efptCOUL
],
281 /* Do reciprocal PME for Coulomb and/or LJ. */
282 assert(fr
->n_tpi
>= 0);
283 if (fr
->n_tpi
== 0 || (flags
& GMX_FORCE_STATECHANGED
))
285 int pme_flags
= GMX_PME_SPREAD
| GMX_PME_SOLVE
;
287 if (flags
& GMX_FORCE_FORCES
)
289 pme_flags
|= GMX_PME_CALC_F
;
291 if (flags
& GMX_FORCE_VIRIAL
)
293 pme_flags
|= GMX_PME_CALC_ENER_VIR
;
297 /* We don't calculate f, but we do want the potential */
298 pme_flags
|= GMX_PME_CALC_POT
;
301 /* With domain decomposition we close the CPU side load
302 * balancing region here, because PME does global
303 * communication that acts as a global barrier.
305 ddBalanceRegionHandler
.closeAfterForceComputationCpu();
307 wallcycle_start(wcycle
, ewcPMEMESH
);
308 status
= gmx_pme_do(fr
->pmedata
,
309 gmx::constArrayRefFromArray(coordinates
.unpaddedConstArrayRef().data(), md
->homenr
- fr
->n_tpi
),
310 forceWithVirial
.force_
,
311 md
->chargeA
, md
->chargeB
,
312 md
->sqrt_c6A
, md
->sqrt_c6B
,
313 md
->sigmaA
, md
->sigmaB
,
315 DOMAINDECOMP(cr
) ? dd_pme_maxshift_x(cr
->dd
) : 0,
316 DOMAINDECOMP(cr
) ? dd_pme_maxshift_y(cr
->dd
) : 0,
318 ewaldOutput
.vir_q
, ewaldOutput
.vir_lj
,
320 lambda
[efptCOUL
], lambda
[efptVDW
],
321 &ewaldOutput
.dvdl
[efptCOUL
],
322 &ewaldOutput
.dvdl
[efptVDW
],
324 wallcycle_stop(wcycle
, ewcPMEMESH
);
327 gmx_fatal(FARGS
, "Error %d in reciprocal PME routine", status
);
330 /* We should try to do as little computation after
331 * this as possible, because parallel PME synchronizes
332 * the nodes, so we want all load imbalance of the
333 * rest of the force calculation to be before the PME
334 * call. DD load balancing is done on the whole time
335 * of the force call (without PME).
340 if (EVDW_PME(ir
->vdwtype
))
343 gmx_fatal(FARGS
, "Test particle insertion not implemented with LJ-PME");
345 /* Determine the PME grid energy of the test molecule
346 * with the PME grid potential of the other charges.
348 gmx_pme_calc_energy(fr
->pmedata
,
349 coordinates
.unpaddedConstArrayRef().subArray(md
->homenr
- fr
->n_tpi
, fr
->n_tpi
),
350 gmx::arrayRefFromArray(md
->chargeA
+ md
->homenr
- fr
->n_tpi
, fr
->n_tpi
),
356 if (fr
->ic
->eeltype
== eelEWALD
)
358 Vlr_q
= do_ewald(ir
, x
, as_rvec_array(forceWithVirial
.force_
.data()),
359 md
->chargeA
, md
->chargeB
,
361 ewaldOutput
.vir_q
, fr
->ic
->ewaldcoeff_q
,
362 lambda
[efptCOUL
], &ewaldOutput
.dvdl
[efptCOUL
],
366 /* Note that with separate PME nodes we get the real energies later */
367 // TODO it would be simpler if we just accumulated a single
368 // long-range virial contribution.
369 forceWithVirial
.addVirialContribution(ewaldOutput
.vir_q
);
370 forceWithVirial
.addVirialContribution(ewaldOutput
.vir_lj
);
371 enerd
->dvdl_lin
[efptCOUL
] += ewaldOutput
.dvdl
[efptCOUL
];
372 enerd
->dvdl_lin
[efptVDW
] += ewaldOutput
.dvdl
[efptVDW
];
373 enerd
->term
[F_COUL_RECIP
] = Vlr_q
+ ewaldOutput
.Vcorr_q
;
374 enerd
->term
[F_LJ_RECIP
] = Vlr_lj
+ ewaldOutput
.Vcorr_lj
;
378 fprintf(debug
, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
379 Vlr_q
, ewaldOutput
.Vcorr_q
, enerd
->term
[F_COUL_RECIP
]);
380 pr_rvecs(debug
, 0, "vir_el_recip after corr", ewaldOutput
.vir_q
, DIM
);
381 pr_rvecs(debug
, 0, "fshift after LR Corrections", fr
->fshift
, SHIFTS
);
382 fprintf(debug
, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
383 Vlr_lj
, ewaldOutput
.Vcorr_lj
, enerd
->term
[F_LJ_RECIP
]);
384 pr_rvecs(debug
, 0, "vir_lj_recip after corr", ewaldOutput
.vir_lj
, DIM
);
390 print_nrnb(debug
, nrnb
);
395 pr_rvecs(debug
, 0, "fshift after bondeds", fr
->fshift
, SHIFTS
);