Explicitly destroy PME-PP GPU communication object
[gromacs.git] / src / gromacs / mdrun / md.cpp
blob9e1d12484992f649f978ec8d4bb317fa0695e2d9
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 /*! \internal \file
39 * \brief Implements the integrator for normal molecular dynamics simulations
41 * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42 * \ingroup module_mdrun
44 #include "gmxpre.h"
46 #include <cinttypes>
47 #include <cmath>
48 #include <cstdio>
49 #include <cstdlib>
51 #include <algorithm>
52 #include <memory>
54 #include "gromacs/awh/awh.h"
55 #include "gromacs/commandline/filenm.h"
56 #include "gromacs/domdec/collect.h"
57 #include "gromacs/domdec/dlbtiming.h"
58 #include "gromacs/domdec/domdec.h"
59 #include "gromacs/domdec/domdec_network.h"
60 #include "gromacs/domdec/domdec_struct.h"
61 #include "gromacs/domdec/mdsetup.h"
62 #include "gromacs/domdec/partition.h"
63 #include "gromacs/essentialdynamics/edsam.h"
64 #include "gromacs/ewald/pme.h"
65 #include "gromacs/ewald/pme_load_balancing.h"
66 #include "gromacs/ewald/pme_pp_comm_gpu.h"
67 #include "gromacs/fileio/trxio.h"
68 #include "gromacs/gmxlib/network.h"
69 #include "gromacs/gmxlib/nrnb.h"
70 #include "gromacs/gpu_utils/gpu_utils.h"
71 #include "gromacs/imd/imd.h"
72 #include "gromacs/listed_forces/manage_threading.h"
73 #include "gromacs/math/functions.h"
74 #include "gromacs/math/utilities.h"
75 #include "gromacs/math/vec.h"
76 #include "gromacs/math/vectypes.h"
77 #include "gromacs/mdlib/checkpointhandler.h"
78 #include "gromacs/mdlib/compute_io.h"
79 #include "gromacs/mdlib/constr.h"
80 #include "gromacs/mdlib/ebin.h"
81 #include "gromacs/mdlib/enerdata_utils.h"
82 #include "gromacs/mdlib/energyoutput.h"
83 #include "gromacs/mdlib/expanded.h"
84 #include "gromacs/mdlib/force.h"
85 #include "gromacs/mdlib/force_flags.h"
86 #include "gromacs/mdlib/forcerec.h"
87 #include "gromacs/mdlib/md_support.h"
88 #include "gromacs/mdlib/mdatoms.h"
89 #include "gromacs/mdlib/mdoutf.h"
90 #include "gromacs/mdlib/membed.h"
91 #include "gromacs/mdlib/resethandler.h"
92 #include "gromacs/mdlib/sighandler.h"
93 #include "gromacs/mdlib/simulationsignal.h"
94 #include "gromacs/mdlib/stat.h"
95 #include "gromacs/mdlib/stophandler.h"
96 #include "gromacs/mdlib/tgroup.h"
97 #include "gromacs/mdlib/trajectory_writing.h"
98 #include "gromacs/mdlib/update.h"
99 #include "gromacs/mdlib/update_constrain_cuda.h"
100 #include "gromacs/mdlib/vcm.h"
101 #include "gromacs/mdlib/vsite.h"
102 #include "gromacs/mdrunutility/handlerestart.h"
103 #include "gromacs/mdrunutility/multisim.h"
104 #include "gromacs/mdrunutility/printtime.h"
105 #include "gromacs/mdtypes/awh_history.h"
106 #include "gromacs/mdtypes/awh_params.h"
107 #include "gromacs/mdtypes/commrec.h"
108 #include "gromacs/mdtypes/df_history.h"
109 #include "gromacs/mdtypes/energyhistory.h"
110 #include "gromacs/mdtypes/fcdata.h"
111 #include "gromacs/mdtypes/forcerec.h"
112 #include "gromacs/mdtypes/group.h"
113 #include "gromacs/mdtypes/inputrec.h"
114 #include "gromacs/mdtypes/interaction_const.h"
115 #include "gromacs/mdtypes/md_enums.h"
116 #include "gromacs/mdtypes/mdatom.h"
117 #include "gromacs/mdtypes/mdrunoptions.h"
118 #include "gromacs/mdtypes/observableshistory.h"
119 #include "gromacs/mdtypes/pullhistory.h"
120 #include "gromacs/mdtypes/simulation_workload.h"
121 #include "gromacs/mdtypes/state.h"
122 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
123 #include "gromacs/modularsimulator/energyelement.h"
124 #include "gromacs/nbnxm/gpu_data_mgmt.h"
125 #include "gromacs/nbnxm/nbnxm.h"
126 #include "gromacs/pbcutil/mshift.h"
127 #include "gromacs/pbcutil/pbc.h"
128 #include "gromacs/pulling/output.h"
129 #include "gromacs/pulling/pull.h"
130 #include "gromacs/swap/swapcoords.h"
131 #include "gromacs/timing/wallcycle.h"
132 #include "gromacs/timing/walltime_accounting.h"
133 #include "gromacs/topology/atoms.h"
134 #include "gromacs/topology/idef.h"
135 #include "gromacs/topology/mtop_util.h"
136 #include "gromacs/topology/topology.h"
137 #include "gromacs/trajectory/trajectoryframe.h"
138 #include "gromacs/utility/basedefinitions.h"
139 #include "gromacs/utility/cstringutil.h"
140 #include "gromacs/utility/fatalerror.h"
141 #include "gromacs/utility/logger.h"
142 #include "gromacs/utility/real.h"
143 #include "gromacs/utility/smalloc.h"
145 #include "legacysimulator.h"
146 #include "replicaexchange.h"
147 #include "shellfc.h"
149 #if GMX_FAHCORE
150 #include "corewrap.h"
151 #endif
153 using gmx::SimulationSignaller;
155 void gmx::LegacySimulator::do_md()
157 // TODO Historically, the EM and MD "integrators" used different
158 // names for the t_inputrec *parameter, but these must have the
159 // same name, now that it's a member of a struct. We use this ir
160 // alias to avoid a large ripple of nearly useless changes.
161 // t_inputrec is being replaced by IMdpOptionsProvider, so this
162 // will go away eventually.
163 t_inputrec *ir = inputrec;
164 int64_t step, step_rel;
165 double t, t0 = ir->init_t, lam0[efptNR];
166 gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
167 gmx_bool bNS, bNStList, bStopCM,
168 bFirstStep, bInitStep, bLastStep = FALSE;
169 gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
170 gmx_bool do_ene, do_log, do_verbose;
171 gmx_bool bMasterState;
172 unsigned int force_flags;
173 tensor force_vir = {{0}}, shake_vir = {{0}}, total_vir = {{0}},
174 tmp_vir = {{0}}, pres = {{0}};
175 int i, m;
176 rvec mu_tot;
177 matrix parrinellorahmanMu, M;
178 gmx_repl_ex_t repl_ex = nullptr;
179 gmx_localtop_t top;
180 PaddedHostVector<gmx::RVec> f {};
181 gmx_global_stat_t gstat;
182 t_graph *graph = nullptr;
183 gmx_shellfc_t *shellfc;
184 gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
185 gmx_bool bTemp, bPres, bTrotter;
186 real dvdl_constr;
187 std::vector<RVec> cbuf;
188 matrix lastbox;
189 int lamnew = 0;
190 /* for FEP */
191 int nstfep = 0;
192 double cycles;
193 real saved_conserved_quantity = 0;
194 real last_ekin = 0;
195 t_extmass MassQ;
196 char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
198 /* PME load balancing data for GPU kernels */
199 gmx_bool bPMETune = FALSE;
200 gmx_bool bPMETunePrinting = FALSE;
202 bool bInteractiveMDstep = false;
204 /* Domain decomposition could incorrectly miss a bonded
205 interaction, but checking for that requires a global
206 communication stage, which does not otherwise happen in DD
207 code. So we do that alongside the first global energy reduction
208 after a new DD is made. These variables handle whether the
209 check happens, and the result it returns. */
210 bool shouldCheckNumberOfBondedInteractions = false;
211 int totalNumberOfBondedInteractions = -1;
213 SimulationSignals signals;
214 // Most global communnication stages don't propagate mdrun
215 // signals, and will use this object to achieve that.
216 SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
218 if (!mdrunOptions.writeConfout)
220 // This is on by default, and the main known use case for
221 // turning it off is for convenience in benchmarking, which is
222 // something that should not show up in the general user
223 // interface.
224 GMX_LOG(mdlog.info).asParagraph().
225 appendText("The -noconfout functionality is deprecated, and may be removed in a future version.");
228 /* md-vv uses averaged full step velocities for T-control
229 md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
230 md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
231 bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
233 const bool bRerunMD = false;
235 int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
236 bGStatEveryStep = (nstglobalcomm == 1);
238 SimulationGroups *groups = &top_global->groups;
240 std::unique_ptr<EssentialDynamics> ed = nullptr;
241 if (opt2bSet("-ei", nfile, fnm))
243 /* Initialize essential dynamics sampling */
244 ed = init_edsam(mdlog,
245 opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm),
246 top_global,
247 ir, cr, constr,
248 state_global, observablesHistory,
249 oenv,
250 startingBehavior);
252 else if (observablesHistory->edsamHistory)
254 gmx_fatal(FARGS,
255 "The checkpoint is from a run with essential dynamics sampling, "
256 "but the current run did not specify the -ei option. "
257 "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
260 initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
261 Update upd(ir, deform);
262 const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
263 if (startingBehavior != StartingBehavior::RestartWithAppending)
265 pleaseCiteCouplingAlgorithms(fplog, *ir);
267 gmx_mdoutf *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir, top_global, oenv, wcycle,
268 startingBehavior);
269 gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, mdoutf_get_fp_dhdl(outf), false, mdModulesNotifier);
271 gstat = global_stat_init(ir);
273 /* Check for polarizable models and flexible constraints */
274 shellfc = init_shell_flexcon(fplog,
275 top_global, constr ? constr->numFlexibleConstraints() : 0,
276 ir->nstcalcenergy, DOMAINDECOMP(cr));
279 double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
280 if ((io > 2000) && MASTER(cr))
282 fprintf(stderr,
283 "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
284 io);
288 // Local state only becomes valid now.
289 std::unique_ptr<t_state> stateInstance;
290 t_state * state;
293 auto mdatoms = mdAtoms->mdatoms();
295 std::unique_ptr<UpdateConstrainCuda> integrator;
297 if (DOMAINDECOMP(cr))
299 dd_init_local_top(*top_global, &top);
301 stateInstance = std::make_unique<t_state>();
302 state = stateInstance.get();
303 dd_init_local_state(cr->dd, state_global, state);
305 /* Distribute the charge groups over the nodes from the master node */
306 dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1,
307 state_global, *top_global, ir, imdSession,
308 pull_work,
309 state, &f, mdAtoms, &top, fr,
310 vsite, constr,
311 nrnb, nullptr, FALSE);
312 shouldCheckNumberOfBondedInteractions = true;
313 upd.setNumAtoms(state->natoms);
315 else
317 state_change_natoms(state_global, state_global->natoms);
318 f.resizeWithPadding(state_global->natoms);
319 /* Copy the pointer to the global state */
320 state = state_global;
322 /* Generate and initialize new topology */
323 mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr,
324 &graph, mdAtoms, constr, vsite, shellfc);
326 upd.setNumAtoms(state->natoms);
329 /*****************************************************************************************/
330 // TODO: The following block of code should be refactored, once:
331 // 1. We have the useGpuForBufferOps variable set and available here and in do_force(...)
332 // 2. The proper GPU syncronization is introduced, so that the H2D and D2H data copies can be performed in the separate
333 // stream owned by the StatePropagatorDataGpu
334 const auto &simulationWork = runScheduleWork->simulationWork;
335 const bool useGpuForPme = simulationWork.useGpuPme;
336 const bool useGpuForNonbonded = simulationWork.useGpuNonbonded;
337 // Temporary solution to make sure that the buffer ops are offloaded when update is offloaded
338 const bool useGpuForBufferOps = simulationWork.useGpuBufferOps;
339 const bool useGpuForUpdate = simulationWork.useGpuUpdate;
342 StatePropagatorDataGpu *stateGpu = fr->stateGpu;
344 if (useGpuForUpdate)
346 GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr), "Domain decomposition is not supported with the GPU update.\n");
347 GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
348 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU to use GPU update.\n");
349 GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only the md integrator is supported with the GPU update.\n");
350 GMX_RELEASE_ASSERT(ir->etc != etcNOSEHOOVER, "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
351 GMX_RELEASE_ASSERT(ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN, "Only Parrinello-Rahman pressure control is supported with the GPU update.\n");
352 GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported with the GPU update.\n");
353 GMX_RELEASE_ASSERT(ed == nullptr, "Essential dynamics is not supported with the GPU update.\n");
354 GMX_RELEASE_ASSERT(!ir->bPull && !ir->pull, "Pulling is not supported with the GPU update.\n");
355 GMX_RELEASE_ASSERT(fcd->orires.nr == 0, "Orientation restraints are not supported with the GPU update.\n");
356 GMX_RELEASE_ASSERT(fcd->disres.npair == 0, "Distance restraints are not supported with the GPU update.\n");
357 GMX_RELEASE_ASSERT(ir->efep == efepNO, "Free energy perturbations are not supported with the GPU update.");
359 if (constr != nullptr && constr->numConstraintsTotal() > 0)
361 GMX_LOG(mdlog.info).asParagraph().
362 appendText("Updating coordinates and applying constraints on the GPU.");
364 else
366 GMX_LOG(mdlog.info).asParagraph().
367 appendText("Updating coordinates on the GPU.");
369 integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global, stateGpu->getUpdateStream(), stateGpu->xUpdatedOnDevice());
372 if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
374 changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
376 if ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
378 changePinningPolicy(&f, PinningPolicy::PinnedIfSupported);
380 if (useGpuForUpdate)
382 changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
384 /*****************************************************************************************/
386 // NOTE: The global state is no longer used at this point.
387 // But state_global is still used as temporary storage space for writing
388 // the global state to file and potentially for replica exchange.
389 // (Global topology should persist.)
391 update_mdatoms(mdatoms, state->lambda[efptMASS]);
393 if (ir->bExpanded)
395 /* Check nstexpanded here, because the grompp check was broken */
396 if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
398 gmx_fatal(FARGS, "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
400 init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation,
401 ir, state->dfhist);
404 if (MASTER(cr))
406 EnergyElement::initializeEnergyHistory(
407 startingBehavior, observablesHistory, &energyOutput);
410 preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
411 startingBehavior != StartingBehavior::NewSimulation);
413 // TODO: Remove this by converting AWH into a ForceProvider
414 auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingBehavior != StartingBehavior::NewSimulation,
415 shellfc != nullptr,
416 opt2fn("-awh", nfile, fnm), pull_work);
418 const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
419 if (useReplicaExchange && MASTER(cr))
421 repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir,
422 replExParams);
424 /* PME tuning is only supported in the Verlet scheme, with PME for
425 * Coulomb. It is not supported with only LJ PME. */
426 bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) &&
427 !mdrunOptions.reproducible && ir->cutoff_scheme != ecutsGROUP);
429 pme_load_balancing_t *pme_loadbal = nullptr;
430 if (bPMETune)
432 pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
433 *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu(),
434 &bPMETunePrinting);
437 if (!ir->bContinuation)
439 if (state->flags & (1U << estV))
441 auto v = makeArrayRef(state->v);
442 /* Set the velocities of vsites, shells and frozen atoms to zero */
443 for (i = 0; i < mdatoms->homenr; i++)
445 if (mdatoms->ptype[i] == eptVSite ||
446 mdatoms->ptype[i] == eptShell)
448 clear_rvec(v[i]);
450 else if (mdatoms->cFREEZE)
452 for (m = 0; m < DIM; m++)
454 if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
456 v[i][m] = 0;
463 if (constr)
465 /* Constrain the initial coordinates and velocities */
466 do_constrain_first(fplog, constr, ir, mdatoms,
467 state->natoms,
468 state->x.arrayRefWithPadding(),
469 state->v.arrayRefWithPadding(),
470 state->box, state->lambda[efptBONDED]);
472 if (vsite)
474 /* Construct the virtual sites for the initial configuration */
475 construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr,
476 top.idef.iparams, top.idef.il,
477 fr->ePBC, fr->bMolPBC, cr, state->box);
481 if (ir->efep != efepNO)
483 /* Set free energy calculation frequency as the greatest common
484 * denominator of nstdhdl and repl_ex_nst. */
485 nstfep = ir->fepvals->nstdhdl;
486 if (ir->bExpanded)
488 nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
490 if (useReplicaExchange)
492 nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
496 /* Be REALLY careful about what flags you set here. You CANNOT assume
497 * this is the first step, since we might be restarting from a checkpoint,
498 * and in that case we should not do any modifications to the state.
500 bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
502 // When restarting from a checkpoint, it can be appropriate to
503 // initialize ekind from quantities in the checkpoint. Otherwise,
504 // compute_globals must initialize ekind before the simulation
505 // starts/restarts. However, only the master rank knows what was
506 // found in the checkpoint file, so we have to communicate in
507 // order to coordinate the restart.
509 // TODO Consider removing this communication if/when checkpoint
510 // reading directly follows .tpr reading, because all ranks can
511 // agree on hasReadEkinState at that time.
512 bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
513 if (PAR(cr))
515 gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
517 if (hasReadEkinState)
519 restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
522 unsigned int cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
523 | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
524 | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0)
525 | (hasReadEkinState ? CGLO_READEKIN : 0));
527 bSumEkinhOld = FALSE;
529 t_vcm vcm(top_global->groups, *ir);
530 reportComRemovalInfo(fplog, vcm);
532 /* To minimize communication, compute_globals computes the COM velocity
533 * and the kinetic energy for the velocities without COM motion removed.
534 * Thus to get the kinetic energy without the COM contribution, we need
535 * to call compute_globals twice.
537 for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
539 unsigned int cglo_flags_iteration = cglo_flags;
540 if (bStopCM && cgloIteration == 0)
542 cglo_flags_iteration |= CGLO_STOPCM;
543 cglo_flags_iteration &= ~CGLO_TEMPERATURE;
545 compute_globals(gstat, cr, ir, fr, ekind,
546 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
547 mdatoms, nrnb, &vcm,
548 nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
549 constr, &nullSignaller, state->box,
550 &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags_iteration
551 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0));
552 if (cglo_flags_iteration & CGLO_STOPCM)
554 /* At initialization, do not pass x with acceleration-correction mode
555 * to avoid (incorrect) correction of the initial coordinates.
557 rvec *xPtr = nullptr;
558 if (vcm.mode != ecmLINEAR_ACCELERATION_CORRECTION)
560 xPtr = state->x.rvec_array();
562 process_and_stopcm_grp(fplog, &vcm, *mdatoms, xPtr, state->v.rvec_array());
563 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
566 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
567 top_global, &top, state->x.rvec_array(), state->box,
568 &shouldCheckNumberOfBondedInteractions);
569 if (ir->eI == eiVVAK)
571 /* a second call to get the half step temperature initialized as well */
572 /* we do the same call as above, but turn the pressure off -- internally to
573 compute_globals, this is recognized as a velocity verlet half-step
574 kinetic energy calculation. This minimized excess variables, but
575 perhaps loses some logic?*/
577 compute_globals(gstat, cr, ir, fr, ekind,
578 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
579 mdatoms, nrnb, &vcm,
580 nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
581 constr, &nullSignaller, state->box,
582 nullptr, &bSumEkinhOld,
583 cglo_flags & ~CGLO_PRESSURE);
586 /* Calculate the initial half step temperature, and save the ekinh_old */
587 if (startingBehavior == StartingBehavior::NewSimulation)
589 for (i = 0; (i < ir->opts.ngtc); i++)
591 copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
595 /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
596 temperature control */
597 auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
599 if (MASTER(cr))
601 if (!ir->bContinuation)
603 if (constr && ir->eConstrAlg == econtLINCS)
605 fprintf(fplog,
606 "RMS relative constraint deviation after constraining: %.2e\n",
607 constr->rmsd());
609 if (EI_STATE_VELOCITY(ir->eI))
611 real temp = enerd->term[F_TEMP];
612 if (ir->eI != eiVV)
614 /* Result of Ekin averaged over velocities of -half
615 * and +half step, while we only have -half step here.
617 temp *= 2;
619 fprintf(fplog, "Initial temperature: %g K\n", temp);
623 char tbuf[20];
624 fprintf(stderr, "starting mdrun '%s'\n",
625 *(top_global->name));
626 if (ir->nsteps >= 0)
628 sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
630 else
632 sprintf(tbuf, "%s", "infinite");
634 if (ir->init_step > 0)
636 fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
637 gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
638 gmx_step_str(ir->init_step, sbuf2),
639 ir->init_step*ir->delta_t);
641 else
643 fprintf(stderr, "%s steps, %s ps.\n",
644 gmx_step_str(ir->nsteps, sbuf), tbuf);
646 fprintf(fplog, "\n");
649 walltime_accounting_start_time(walltime_accounting);
650 wallcycle_start(wcycle, ewcRUN);
651 print_start(fplog, cr, walltime_accounting, "mdrun");
653 #if GMX_FAHCORE
654 /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */
655 int chkpt_ret = fcCheckPointParallel( cr->nodeid,
656 NULL, 0);
657 if (chkpt_ret == 0)
659 gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
661 #endif
663 /***********************************************************
665 * Loop over MD steps
667 ************************************************************/
669 bFirstStep = TRUE;
670 /* Skip the first Nose-Hoover integration when we get the state from tpx */
671 bInitStep = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
672 bSumEkinhOld = FALSE;
673 bExchanged = FALSE;
674 bNeedRepartition = FALSE;
676 bool simulationsShareState = false;
677 int nstSignalComm = nstglobalcomm;
679 // TODO This implementation of ensemble orientation restraints is nasty because
680 // a user can't just do multi-sim with single-sim orientation restraints.
681 bool usingEnsembleRestraints = (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
682 bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
684 // Replica exchange, ensemble restraints and AWH need all
685 // simulations to remain synchronized, so they need
686 // checkpoints and stop conditions to act on the same step, so
687 // the propagation of such signals must take place between
688 // simulations, not just within simulations.
689 // TODO: Make algorithm initializers set these flags.
690 simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
692 if (simulationsShareState)
694 // Inter-simulation signal communication does not need to happen
695 // often, so we use a minimum of 200 steps to reduce overhead.
696 const int c_minimumInterSimulationSignallingInterval = 200;
697 nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1)/nstglobalcomm)*nstglobalcomm;
701 auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
702 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
703 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
704 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
706 auto checkpointHandler = std::make_unique<CheckpointHandler>(
707 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
708 simulationsShareState, ir->nstlist == 0, MASTER(cr),
709 mdrunOptions.writeConfout, mdrunOptions.checkpointOptions.period);
711 const bool resetCountersIsLocal = true;
712 auto resetHandler = std::make_unique<ResetHandler>(
713 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]), !resetCountersIsLocal,
714 ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
715 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
717 const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
719 step = ir->init_step;
720 step_rel = 0;
722 // TODO extract this to new multi-simulation module
723 if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
725 if (!multisim_int_all_are_equal(ms, ir->nsteps))
727 GMX_LOG(mdlog.warning).appendText(
728 "Note: The number of steps is not consistent across multi simulations,\n"
729 "but we are proceeding anyway!");
731 if (!multisim_int_all_are_equal(ms, ir->init_step))
733 GMX_LOG(mdlog.warning).appendText(
734 "Note: The initial step is not consistent across multi simulations,\n"
735 "but we are proceeding anyway!");
739 /* and stop now if we should */
740 bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
741 while (!bLastStep)
744 /* Determine if this is a neighbor search step */
745 bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
747 if (bPMETune && bNStList)
749 // This has to be here because PME load balancing is called so early.
750 // TODO: Move to after all booleans are defined.
751 if (useGpuForUpdate && !bFirstStep)
753 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
754 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
756 /* PME grid + cut-off optimization with GPUs or PME nodes */
757 pme_loadbal_do(pme_loadbal, cr,
758 (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
759 fplog, mdlog,
760 *ir, fr, state->box, state->x,
761 wcycle,
762 step, step_rel,
763 &bPMETunePrinting);
766 wallcycle_start(wcycle, ewcSTEP);
768 bLastStep = (step_rel == ir->nsteps);
769 t = t0 + step*ir->delta_t;
771 // TODO Refactor this, so that nstfep does not need a default value of zero
772 if (ir->efep != efepNO || ir->bSimTemp)
774 /* find and set the current lambdas */
775 setCurrentLambdasLocal(step, ir->fepvals, lam0, state->lambda, state->fep_state);
777 bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl);
778 bDoFEP = ((ir->efep != efepNO) && do_per_step(step, nstfep));
779 bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded)
780 && (ir->bExpanded) && (step > 0) &&
781 (startingBehavior == StartingBehavior::NewSimulation));
784 bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep &&
785 do_per_step(step, replExParams.exchangeInterval));
787 if (doSimulatedAnnealing)
789 update_annealing_target_temp(ir, t, &upd);
792 /* Stop Center of Mass motion */
793 bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
795 /* Determine whether or not to do Neighbour Searching */
796 bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
798 bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
800 /* do_log triggers energy and virial calculation. Because this leads
801 * to different code paths, forces can be different. Thus for exact
802 * continuation we should avoid extra log output.
803 * Note that the || bLastStep can result in non-exact continuation
804 * beyond the last step. But we don't consider that to be an issue.
806 do_log =
807 (do_per_step(step, ir->nstlog) ||
808 (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) ||
809 bLastStep);
810 do_verbose = mdrunOptions.verbose &&
811 (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
813 if (useGpuForUpdate && !bFirstStep)
815 // Copy velocities from the GPU when needed:
816 // - On search steps to keep copy on host (device buffers are reinitialized).
817 // - When needed for the output.
818 if (bNS || do_per_step(step, ir->nstvout))
820 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
821 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
824 // Copy coordinate from the GPU when needed at the search step.
825 // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
826 // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
827 if (bNS)
829 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
830 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
834 if (bNS && !(bFirstStep && ir->bContinuation))
836 bMasterState = FALSE;
837 /* Correct the new box if it is too skewed */
838 if (inputrecDynamicBox(ir))
840 if (correct_box(fplog, step, state->box, graph))
842 bMasterState = TRUE;
845 if (DOMAINDECOMP(cr) && bMasterState)
847 dd_collect_state(cr->dd, state, state_global);
850 if (DOMAINDECOMP(cr))
852 /* Repartition the domain decomposition */
853 dd_partition_system(fplog, mdlog, step, cr,
854 bMasterState, nstglobalcomm,
855 state_global, *top_global, ir, imdSession,
856 pull_work,
857 state, &f, mdAtoms, &top, fr,
858 vsite, constr,
859 nrnb, wcycle,
860 do_verbose && !bPMETunePrinting);
861 shouldCheckNumberOfBondedInteractions = true;
862 upd.setNumAtoms(state->natoms);
866 if (MASTER(cr) && do_log)
868 energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
871 if (ir->efep != efepNO)
873 update_mdatoms(mdatoms, state->lambda[efptMASS]);
876 if (bExchanged)
879 /* We need the kinetic energy at minus the half step for determining
880 * the full step kinetic energy and possibly for T-coupling.*/
881 /* This may not be quite working correctly yet . . . . */
882 compute_globals(gstat, cr, ir, fr, ekind,
883 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
884 mdatoms, nrnb, &vcm,
885 wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
886 constr, &nullSignaller, state->box,
887 &totalNumberOfBondedInteractions, &bSumEkinhOld,
888 CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
889 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
890 top_global, &top, state->x.rvec_array(), state->box,
891 &shouldCheckNumberOfBondedInteractions);
893 clear_mat(force_vir);
895 checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
897 /* Determine the energy and pressure:
898 * at nstcalcenergy steps and at energy output steps (set below).
900 if (EI_VV(ir->eI) && (!bInitStep))
902 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
903 bCalcVir = bCalcEnerStep ||
904 (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
906 else
908 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
909 bCalcVir = bCalcEnerStep ||
910 (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
912 bCalcEner = bCalcEnerStep;
914 do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
916 if (do_ene || do_log || bDoReplEx)
918 bCalcVir = TRUE;
919 bCalcEner = TRUE;
922 /* Do we need global communication ? */
923 bGStat = (bCalcVir || bCalcEner || bStopCM ||
924 do_per_step(step, nstglobalcomm) ||
925 (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm)));
927 force_flags = (GMX_FORCE_STATECHANGED |
928 ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) |
929 GMX_FORCE_ALLFORCES |
930 (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
931 (bCalcEner ? GMX_FORCE_ENERGY : 0) |
932 (bDoFEP ? GMX_FORCE_DHDL : 0)
935 if (shellfc)
937 /* Now is the time to relax the shells */
938 relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose,
939 enforcedRotation, step,
940 ir, imdSession, pull_work, bNS, force_flags, &top,
941 constr, enerd, fcd,
942 state->natoms,
943 state->x.arrayRefWithPadding(),
944 state->v.arrayRefWithPadding(),
945 state->box,
946 state->lambda,
947 &state->hist,
948 f.arrayRefWithPadding(), force_vir, mdatoms,
949 nrnb, wcycle, graph,
950 shellfc, fr, runScheduleWork, t, mu_tot,
951 vsite,
952 ddBalanceRegionHandler);
954 else
956 /* The AWH history need to be saved _before_ doing force calculations where the AWH bias is updated
957 (or the AWH update will be performed twice for one step when continuing). It would be best to
958 call this update function from do_md_trajectory_writing but that would occur after do_force.
959 One would have to divide the update_awh function into one function applying the AWH force
960 and one doing the AWH bias update. The update AWH bias function could then be called after
961 do_md_trajectory_writing (then containing update_awh_history).
962 The checkpointing will in the future probably moved to the start of the md loop which will
963 rid of this issue. */
964 if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
966 awh->updateHistory(state_global->awhHistory.get());
969 /* The coordinates (x) are shifted (to get whole molecules)
970 * in do_force.
971 * This is parallellized as well, and does communication too.
972 * Check comments in sim_util.c
974 do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession,
975 pull_work,
976 step, nrnb, wcycle, &top,
977 state->box, state->x.arrayRefWithPadding(), &state->hist,
978 f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
979 state->lambda, graph,
980 fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
981 (bNS ? GMX_FORCE_NS : 0) | force_flags,
982 ddBalanceRegionHandler);
985 // VV integrators do not need the following velocity half step
986 // if it is the first step after starting from a checkpoint.
987 // That is, the half step is needed on all other steps, and
988 // also the first step when starting from a .tpr file.
989 if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
990 /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
992 rvec *vbuf = nullptr;
994 wallcycle_start(wcycle, ewcUPDATE);
995 if (ir->eI == eiVV && bInitStep)
997 /* if using velocity verlet with full time step Ekin,
998 * take the first half step only to compute the
999 * virial for the first step. From there,
1000 * revert back to the initial coordinates
1001 * so that the input is actually the initial step.
1003 snew(vbuf, state->natoms);
1004 copy_rvecn(state->v.rvec_array(), vbuf, 0, state->natoms); /* should make this better for parallelizing? */
1006 else
1008 /* this is for NHC in the Ekin(t+dt/2) version of vv */
1009 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
1012 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1013 ekind, M, &upd, etrtVELOCITY1,
1014 cr, constr);
1016 wallcycle_stop(wcycle, ewcUPDATE);
1017 constrain_velocities(step, nullptr,
1018 state,
1019 shake_vir,
1020 constr,
1021 bCalcVir, do_log, do_ene);
1022 wallcycle_start(wcycle, ewcUPDATE);
1023 /* if VV, compute the pressure and constraints */
1024 /* For VV2, we strictly only need this if using pressure
1025 * control, but we really would like to have accurate pressures
1026 * printed out.
1027 * Think about ways around this in the future?
1028 * For now, keep this choice in comments.
1030 /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
1031 /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
1032 bPres = TRUE;
1033 bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
1034 if (bCalcEner && ir->eI == eiVVAK)
1036 bSumEkinhOld = TRUE;
1038 /* for vv, the first half of the integration actually corresponds to the previous step.
1039 So we need information from the last step in the first half of the integration */
1040 if (bGStat || do_per_step(step-1, nstglobalcomm))
1042 wallcycle_stop(wcycle, ewcUPDATE);
1043 compute_globals(gstat, cr, ir, fr, ekind,
1044 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1045 mdatoms, nrnb, &vcm,
1046 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1047 constr, &nullSignaller, state->box,
1048 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1049 (bGStat ? CGLO_GSTAT : 0)
1050 | (bCalcEner ? CGLO_ENERGY : 0)
1051 | (bTemp ? CGLO_TEMPERATURE : 0)
1052 | (bPres ? CGLO_PRESSURE : 0)
1053 | (bPres ? CGLO_CONSTRAINT : 0)
1054 | (bStopCM ? CGLO_STOPCM : 0)
1055 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1056 | CGLO_SCALEEKIN
1058 /* explanation of above:
1059 a) We compute Ekin at the full time step
1060 if 1) we are using the AveVel Ekin, and it's not the
1061 initial step, or 2) if we are using AveEkin, but need the full
1062 time step kinetic energy for the pressure (always true now, since we want accurate statistics).
1063 b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
1064 EkinAveVel because it's needed for the pressure */
1065 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1066 top_global, &top, state->x.rvec_array(), state->box,
1067 &shouldCheckNumberOfBondedInteractions);
1068 if (bStopCM)
1070 process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1071 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1073 wallcycle_start(wcycle, ewcUPDATE);
1075 /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1076 if (!bInitStep)
1078 if (bTrotter)
1080 m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */
1081 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
1083 /* TODO This is only needed when we're about to write
1084 * a checkpoint, because we use it after the restart
1085 * (in a kludge?). But what should we be doing if
1086 * the startingBehavior is NewSimulation or bInitStep are true? */
1087 if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
1089 copy_mat(shake_vir, state->svir_prev);
1090 copy_mat(force_vir, state->fvir_prev);
1092 if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
1094 /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1095 enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
1096 enerd->term[F_EKIN] = trace(ekind->ekin);
1099 else if (bExchanged)
1101 wallcycle_stop(wcycle, ewcUPDATE);
1102 /* We need the kinetic energy at minus the half step for determining
1103 * the full step kinetic energy and possibly for T-coupling.*/
1104 /* This may not be quite working correctly yet . . . . */
1105 compute_globals(gstat, cr, ir, fr, ekind,
1106 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1107 mdatoms, nrnb, &vcm,
1108 wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
1109 constr, &nullSignaller, state->box,
1110 nullptr, &bSumEkinhOld,
1111 CGLO_GSTAT | CGLO_TEMPERATURE);
1112 wallcycle_start(wcycle, ewcUPDATE);
1115 /* if it's the initial step, we performed this first step just to get the constraint virial */
1116 if (ir->eI == eiVV && bInitStep)
1118 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
1119 sfree(vbuf);
1121 wallcycle_stop(wcycle, ewcUPDATE);
1124 /* compute the conserved quantity */
1125 if (EI_VV(ir->eI))
1127 saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1128 if (ir->eI == eiVV)
1130 last_ekin = enerd->term[F_EKIN];
1132 if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1134 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1136 /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */
1137 if (ir->efep != efepNO)
1139 sum_dhdl(enerd, state->lambda, *ir->fepvals);
1143 /* ######## END FIRST UPDATE STEP ############## */
1144 /* ######## If doing VV, we now have v(dt) ###### */
1145 if (bDoExpanded)
1147 /* perform extended ensemble sampling in lambda - we don't
1148 actually move to the new state before outputting
1149 statistics, but if performing simulated tempering, we
1150 do update the velocities and the tau_t. */
1152 lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, state->dfhist, step, state->v.rvec_array(), mdatoms);
1153 /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1154 if (MASTER(cr))
1156 copy_df_history(state_global->dfhist, state->dfhist);
1160 // Copy coordinate from the GPU for the output if the update is offloaded and
1161 // coordinates have not already been copied for i) search or ii) CPU force tasks.
1162 if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork &&
1163 (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)))
1165 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1166 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1168 /* Now we have the energies and forces corresponding to the
1169 * coordinates at time t. We must output all of this before
1170 * the update.
1172 do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
1173 ir, state, state_global, observablesHistory,
1174 top_global, fr,
1175 outf, energyOutput, ekind, f,
1176 checkpointHandler->isCheckpointingStep(),
1177 bRerunMD, bLastStep,
1178 mdrunOptions.writeConfout,
1179 bSumEkinhOld);
1180 /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1181 bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1183 /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1184 if (startingBehavior != StartingBehavior::NewSimulation &&
1185 (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1187 copy_mat(state->svir_prev, shake_vir);
1188 copy_mat(state->fvir_prev, force_vir);
1191 stopHandler->setSignal();
1192 resetHandler->setSignal(walltime_accounting);
1194 if (bGStat || !PAR(cr))
1196 /* In parallel we only have to check for checkpointing in steps
1197 * where we do global communication,
1198 * otherwise the other nodes don't know.
1200 checkpointHandler->setSignal(walltime_accounting);
1203 /* ######### START SECOND UPDATE STEP ################# */
1205 /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
1206 in preprocessing */
1208 if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1210 gmx_bool bIfRandomize;
1211 bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1212 /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1213 if (constr && bIfRandomize)
1215 constrain_velocities(step, nullptr,
1216 state,
1217 tmp_vir,
1218 constr,
1219 bCalcVir, do_log, do_ene);
1222 /* Box is changed in update() when we do pressure coupling,
1223 * but we should still use the old box for energy corrections and when
1224 * writing it to the energy file, so it matches the trajectory files for
1225 * the same timestep above. Make a copy in a separate array.
1227 copy_mat(state->box, lastbox);
1229 dvdl_constr = 0;
1231 wallcycle_start(wcycle, ewcUPDATE);
1232 /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1233 if (bTrotter)
1235 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1236 /* We can only do Berendsen coupling after we have summed
1237 * the kinetic energy or virial. Since the happens
1238 * in global_state after update, we should only do it at
1239 * step % nstlist = 1 with bGStatEveryStep=FALSE.
1242 else
1244 update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1245 update_pcouple_before_coordinates(fplog, step, ir, state,
1246 parrinellorahmanMu, M,
1247 bInitStep);
1250 if (EI_VV(ir->eI))
1252 /* velocity half-step update */
1253 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1254 ekind, M, &upd, etrtVELOCITY2,
1255 cr, constr);
1258 /* Above, initialize just copies ekinh into ekin,
1259 * it doesn't copy position (for VV),
1260 * and entire integrator for MD.
1263 if (ir->eI == eiVVAK)
1265 cbuf.resize(state->x.size());
1266 std::copy(state->x.begin(), state->x.end(), cbuf.begin());
1269 /* With leap-frog type integrators we compute the kinetic energy
1270 * at a whole time step as the average of the half-time step kinetic
1271 * energies of two subsequent steps. Therefore we need to compute the
1272 * half step kinetic energy also if we need energies at the next step.
1274 const bool needHalfStepKineticEnergy = (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm));
1276 if (useGpuForUpdate)
1278 if (bNS)
1280 integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(), stateGpu->getForces(),
1281 top.idef, *mdatoms, ekind->ngtc);
1282 t_pbc pbc;
1283 set_pbc(&pbc, epbcXYZ, state->box);
1284 integrator->setPbc(&pbc);
1286 // Copy data to the GPU after buffers might have being reinitialized
1287 stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1288 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1291 stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), AtomLocality::All);
1293 // TODO: Use StepWorkload fields.
1294 bool useGpuFBufferOps = simulationWork.useGpuBufferOps && !(bCalcVir || bCalcEner);
1296 bool doTempCouple = (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1297 bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1299 // This applies Leap-Frog, LINCS and SETTLE in succession
1300 integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::Local, useGpuFBufferOps),
1301 ir->delta_t, true, bCalcVir, shake_vir,
1302 doTempCouple, ekind->tcstat,
1303 doParrinelloRahman, ir->nstpcouple*ir->delta_t, M);
1305 // Copy velocities D2H after update if:
1306 // - Globals are computed this step (includes the energy output steps).
1307 // - Temperature is needed for the next step.
1308 if (bGStat || needHalfStepKineticEnergy)
1310 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1311 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1314 else
1316 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1317 ekind, M, &upd, etrtPOSITION, cr, constr);
1319 wallcycle_stop(wcycle, ewcUPDATE);
1321 constrain_coordinates(step, &dvdl_constr, state,
1322 shake_vir,
1323 &upd, constr,
1324 bCalcVir, do_log, do_ene);
1326 update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state,
1327 cr, nrnb, wcycle, &upd, constr, do_log, do_ene);
1328 finish_update(ir, mdatoms,
1329 state, graph,
1330 nrnb, wcycle, &upd, constr);
1333 if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1335 updatePrevStepPullCom(pull_work, state);
1338 if (ir->eI == eiVVAK)
1340 /* erase F_EKIN and F_TEMP here? */
1341 /* just compute the kinetic energy at the half step to perform a trotter step */
1342 compute_globals(gstat, cr, ir, fr, ekind,
1343 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1344 mdatoms, nrnb, &vcm,
1345 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1346 constr, &nullSignaller, lastbox,
1347 nullptr, &bSumEkinhOld,
1348 (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE
1350 wallcycle_start(wcycle, ewcUPDATE);
1351 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1352 /* now we know the scaling, we can compute the positions again */
1353 std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
1355 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1356 ekind, M, &upd, etrtPOSITION, cr, constr);
1357 wallcycle_stop(wcycle, ewcUPDATE);
1359 /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1360 /* are the small terms in the shake_vir here due
1361 * to numerical errors, or are they important
1362 * physically? I'm thinking they are just errors, but not completely sure.
1363 * For now, will call without actually constraining, constr=NULL*/
1364 finish_update(ir, mdatoms,
1365 state, graph,
1366 nrnb, wcycle, &upd, nullptr);
1368 if (EI_VV(ir->eI))
1370 /* this factor or 2 correction is necessary
1371 because half of the constraint force is removed
1372 in the vv step, so we have to double it. See
1373 the Redmine issue #1255. It is not yet clear
1374 if the factor of 2 is exact, or just a very
1375 good approximation, and this will be
1376 investigated. The next step is to see if this
1377 can be done adding a dhdl contribution from the
1378 rattle step, but this is somewhat more
1379 complicated with the current code. Will be
1380 investigated, hopefully for 4.6.3. However,
1381 this current solution is much better than
1382 having it completely wrong.
1384 enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
1386 else
1388 enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1391 if (vsite != nullptr)
1393 wallcycle_start(wcycle, ewcVSITECONSTR);
1394 if (graph != nullptr)
1396 shift_self(graph, state->box, state->x.rvec_array());
1398 construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
1399 top.idef.iparams, top.idef.il,
1400 fr->ePBC, fr->bMolPBC, cr, state->box);
1402 if (graph != nullptr)
1404 unshift_self(graph, state->box, state->x.rvec_array());
1406 wallcycle_stop(wcycle, ewcVSITECONSTR);
1409 /* ############## IF NOT VV, Calculate globals HERE ############ */
1410 /* With Leap-Frog we can skip compute_globals at
1411 * non-communication steps, but we need to calculate
1412 * the kinetic energy one step before communication.
1415 // Organize to do inter-simulation signalling on steps if
1416 // and when algorithms require it.
1417 bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1419 if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
1421 // Copy coordinates when needed to stop the CM motion.
1422 if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
1424 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1425 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1427 // Since we're already communicating at this step, we
1428 // can propagate intra-simulation signals. Note that
1429 // check_nstglobalcomm has the responsibility for
1430 // choosing the value of nstglobalcomm that is one way
1431 // bGStat becomes true, so we can't get into a
1432 // situation where e.g. checkpointing can't be
1433 // signalled.
1434 bool doIntraSimSignal = true;
1435 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1437 compute_globals(gstat, cr, ir, fr, ekind,
1438 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1439 mdatoms, nrnb, &vcm,
1440 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1441 constr, &signaller,
1442 lastbox,
1443 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1444 (bGStat ? CGLO_GSTAT : 0)
1445 | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1446 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1447 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1448 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
1449 | CGLO_CONSTRAINT
1450 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1452 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1453 top_global, &top, state->x.rvec_array(), state->box,
1454 &shouldCheckNumberOfBondedInteractions);
1455 if (!EI_VV(ir->eI) && bStopCM)
1457 process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1458 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1460 // TODO: The special case of removing CM motion should be dealt more gracefully
1461 if (useGpuForUpdate)
1463 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1464 stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
1470 /* ############# END CALC EKIN AND PRESSURE ################# */
1472 /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1473 the virial that should probably be addressed eventually. state->veta has better properies,
1474 but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1475 generate the new shake_vir, but test the veta value for convergence. This will take some thought. */
1477 if (ir->efep != efepNO && !EI_VV(ir->eI))
1479 /* Sum up the foreign energy and dhdl terms for md and sd.
1480 Currently done every step so that dhdl is correct in the .edr */
1481 sum_dhdl(enerd, state->lambda, *ir->fepvals);
1484 update_pcouple_after_coordinates(fplog, step, ir, mdatoms,
1485 pres, force_vir, shake_vir,
1486 parrinellorahmanMu,
1487 state, nrnb, &upd);
1489 /* ################# END UPDATE STEP 2 ################# */
1490 /* #### We now have r(t+dt) and v(t+dt/2) ############# */
1492 /* The coordinates (x) were unshifted in update */
1493 if (!bGStat)
1495 /* We will not sum ekinh_old,
1496 * so signal that we still have to do it.
1498 bSumEkinhOld = TRUE;
1501 if (bCalcEner)
1503 /* ######### BEGIN PREPARING EDR OUTPUT ########### */
1505 /* use the directly determined last velocity, not actually the averaged half steps */
1506 if (bTrotter && ir->eI == eiVV)
1508 enerd->term[F_EKIN] = last_ekin;
1510 enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1512 if (integratorHasConservedEnergyQuantity(ir))
1514 if (EI_VV(ir->eI))
1516 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1518 else
1520 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1523 /* ######### END PREPARING EDR OUTPUT ########### */
1526 /* Output stuff */
1527 if (MASTER(cr))
1529 if (fplog && do_log && bDoExpanded)
1531 /* only needed if doing expanded ensemble */
1532 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : nullptr,
1533 state_global->dfhist, state->fep_state, ir->nstlog, step);
1535 if (bCalcEner)
1537 energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep,
1538 t, mdatoms->tmass, enerd, state,
1539 ir->fepvals, ir->expandedvals, lastbox,
1540 shake_vir, force_vir, total_vir, pres,
1541 ekind, mu_tot, constr);
1543 else
1545 energyOutput.recordNonEnergyStep();
1548 gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
1549 gmx_bool do_or = do_per_step(step, ir->nstorireout);
1551 if (doSimulatedAnnealing)
1553 energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
1555 if (do_log || do_ene || do_dr || do_or)
1557 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1558 do_log ? fplog : nullptr,
1559 step, t,
1560 fcd, awh.get());
1563 if (ir->bPull)
1565 pull_print_output(pull_work, step, t);
1568 if (do_per_step(step, ir->nstlog))
1570 if (fflush(fplog) != 0)
1572 gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1576 if (bDoExpanded)
1578 /* Have to do this part _after_ outputting the logfile and the edr file */
1579 /* Gets written into the state at the beginning of next loop*/
1580 state->fep_state = lamnew;
1582 /* Print the remaining wall clock time for the run */
1583 if (isMasterSimMasterRank(ms, MASTER(cr)) &&
1584 (do_verbose || gmx_got_usr_signal()) &&
1585 !bPMETunePrinting)
1587 if (shellfc)
1589 fprintf(stderr, "\n");
1591 print_time(stderr, walltime_accounting, step, ir, cr);
1594 /* Ion/water position swapping.
1595 * Not done in last step since trajectory writing happens before this call
1596 * in the MD loop and exchanges would be lost anyway. */
1597 bNeedRepartition = FALSE;
1598 if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
1599 do_per_step(step, ir->swap->nstswap))
1601 bNeedRepartition = do_swapcoords(cr, step, t, ir, swap, wcycle,
1602 as_rvec_array(state->x.data()),
1603 state->box,
1604 MASTER(cr) && mdrunOptions.verbose,
1605 bRerunMD);
1607 if (bNeedRepartition && DOMAINDECOMP(cr))
1609 dd_collect_state(cr->dd, state, state_global);
1613 /* Replica exchange */
1614 bExchanged = FALSE;
1615 if (bDoReplEx)
1617 bExchanged = replica_exchange(fplog, cr, ms, repl_ex,
1618 state_global, enerd,
1619 state, step, t);
1622 if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
1624 dd_partition_system(fplog, mdlog, step, cr, TRUE, 1,
1625 state_global, *top_global, ir, imdSession,
1626 pull_work,
1627 state, &f, mdAtoms, &top, fr,
1628 vsite, constr,
1629 nrnb, wcycle, FALSE);
1630 shouldCheckNumberOfBondedInteractions = true;
1631 upd.setNumAtoms(state->natoms);
1634 bFirstStep = FALSE;
1635 bInitStep = FALSE;
1637 /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1638 /* With all integrators, except VV, we need to retain the pressure
1639 * at the current step for coupling at the next step.
1641 if ((state->flags & (1U<<estPRES_PREV)) &&
1642 (bGStatEveryStep ||
1643 (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1645 /* Store the pressure in t_state for pressure coupling
1646 * at the next MD step.
1648 copy_mat(pres, state->pres_prev);
1651 /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */
1653 if ( (membed != nullptr) && (!bLastStep) )
1655 rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1658 cycles = wallcycle_stop(wcycle, ewcSTEP);
1659 if (DOMAINDECOMP(cr) && wcycle)
1661 dd_cycles_add(cr->dd, cycles, ddCyclStep);
1664 /* increase the MD step number */
1665 step++;
1666 step_rel++;
1668 resetHandler->resetCounters(
1669 step, step_rel, mdlog, fplog, cr, fr->nbv.get(),
1670 nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1672 /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1673 imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1676 /* End of main MD loop */
1678 /* Closing TNG files can include compressing data. Therefore it is good to do that
1679 * before stopping the time measurements. */
1680 mdoutf_tng_close(outf);
1682 /* Stop measuring walltime */
1683 walltime_accounting_end_time(walltime_accounting);
1685 if (!thisRankHasDuty(cr, DUTY_PME))
1687 /* Tell the PME only node to finish */
1688 gmx_pme_send_finish(cr);
1691 if (MASTER(cr))
1693 if (ir->nstcalcenergy > 0)
1695 energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
1696 energyOutput.printAverages(fplog, groups);
1699 done_mdoutf(outf);
1701 if (bPMETune)
1703 pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1706 done_shellfc(fplog, shellfc, step_rel);
1708 if (useReplicaExchange && MASTER(cr))
1710 print_replica_exchange_statistics(fplog, repl_ex);
1713 walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1715 if (fr->pmePpCommGpu)
1717 // destroy object since it is no longer required. (This needs to be done while the GPU context still exists.)
1718 fr->pmePpCommGpu.reset();
1721 global_stat_destroy(gstat);