src/gromacs/mdrun/md.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the integrator for normal molecular dynamics simulations
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include <cinttypes>
  47 #include <cmath>
  48 #include <cstdio>
  49 #include <cstdlib>
  50
  51 #include <algorithm>
  52 #include <memory>
  53
  54 #include "gromacs/awh/awh.h"
  55 #include "gromacs/commandline/filenm.h"
  56 #include "gromacs/domdec/collect.h"
  57 #include "gromacs/domdec/dlbtiming.h"
  58 #include "gromacs/domdec/domdec.h"
  59 #include "gromacs/domdec/domdec_network.h"
  60 #include "gromacs/domdec/domdec_struct.h"
  61 #include "gromacs/domdec/mdsetup.h"
  62 #include "gromacs/domdec/partition.h"
  63 #include "gromacs/essentialdynamics/edsam.h"
  64 #include "gromacs/ewald/pme.h"
  65 #include "gromacs/ewald/pme_load_balancing.h"
  66 #include "gromacs/fileio/trxio.h"
  67 #include "gromacs/gmxlib/network.h"
  68 #include "gromacs/gmxlib/nrnb.h"
  69 #include "gromacs/gpu_utils/gpu_utils.h"
  70 #include "gromacs/imd/imd.h"
  71 #include "gromacs/listed_forces/manage_threading.h"
  72 #include "gromacs/math/functions.h"
  73 #include "gromacs/math/utilities.h"
  74 #include "gromacs/math/vec.h"
  75 #include "gromacs/math/vectypes.h"
  76 #include "gromacs/mdlib/checkpointhandler.h"
  77 #include "gromacs/mdlib/compute_io.h"
  78 #include "gromacs/mdlib/constr.h"
  79 #include "gromacs/mdlib/ebin.h"
  80 #include "gromacs/mdlib/enerdata_utils.h"
  81 #include "gromacs/mdlib/energyoutput.h"
  82 #include "gromacs/mdlib/expanded.h"
  83 #include "gromacs/mdlib/force.h"
  84 #include "gromacs/mdlib/force_flags.h"
  85 #include "gromacs/mdlib/forcerec.h"
  86 #include "gromacs/mdlib/leapfrog_cuda.h"
  87 #include "gromacs/mdlib/md_support.h"
  88 #include "gromacs/mdlib/mdatoms.h"
  89 #include "gromacs/mdlib/mdoutf.h"
  90 #include "gromacs/mdlib/membed.h"
  91 #include "gromacs/mdlib/resethandler.h"
  92 #include "gromacs/mdlib/sighandler.h"
  93 #include "gromacs/mdlib/simulationsignal.h"
  94 #include "gromacs/mdlib/stat.h"
  95 #include "gromacs/mdlib/stophandler.h"
  96 #include "gromacs/mdlib/tgroup.h"
  97 #include "gromacs/mdlib/trajectory_writing.h"
  98 #include "gromacs/mdlib/update.h"
  99 #include "gromacs/mdlib/vcm.h"
 100 #include "gromacs/mdlib/vsite.h"
 101 #include "gromacs/mdrunutility/handlerestart.h"
 102 #include "gromacs/mdrunutility/multisim.h"
 103 #include "gromacs/mdrunutility/printtime.h"
 104 #include "gromacs/mdtypes/awh_history.h"
 105 #include "gromacs/mdtypes/awh_params.h"
 106 #include "gromacs/mdtypes/commrec.h"
 107 #include "gromacs/mdtypes/df_history.h"
 108 #include "gromacs/mdtypes/energyhistory.h"
 109 #include "gromacs/mdtypes/fcdata.h"
 110 #include "gromacs/mdtypes/forcerec.h"
 111 #include "gromacs/mdtypes/group.h"
 112 #include "gromacs/mdtypes/inputrec.h"
 113 #include "gromacs/mdtypes/interaction_const.h"
 114 #include "gromacs/mdtypes/md_enums.h"
 115 #include "gromacs/mdtypes/mdatom.h"
 116 #include "gromacs/mdtypes/mdrunoptions.h"
 117 #include "gromacs/mdtypes/observableshistory.h"
 118 #include "gromacs/mdtypes/pullhistory.h"
 119 #include "gromacs/mdtypes/state.h"
 120 #include "gromacs/nbnxm/nbnxm.h"
 121 #include "gromacs/pbcutil/mshift.h"
 122 #include "gromacs/pbcutil/pbc.h"
 123 #include "gromacs/pulling/output.h"
 124 #include "gromacs/pulling/pull.h"
 125 #include "gromacs/swap/swapcoords.h"
 126 #include "gromacs/timing/wallcycle.h"
 127 #include "gromacs/timing/walltime_accounting.h"
 128 #include "gromacs/topology/atoms.h"
 129 #include "gromacs/topology/idef.h"
 130 #include "gromacs/topology/mtop_util.h"
 131 #include "gromacs/topology/topology.h"
 132 #include "gromacs/trajectory/trajectoryframe.h"
 133 #include "gromacs/utility/basedefinitions.h"
 134 #include "gromacs/utility/cstringutil.h"
 135 #include "gromacs/utility/fatalerror.h"
 136 #include "gromacs/utility/logger.h"
 137 #include "gromacs/utility/real.h"
 138 #include "gromacs/utility/smalloc.h"
 139
 140 #include "integrator.h"
 141 #include "replicaexchange.h"
 142 #include "shellfc.h"
 143
 144 #if GMX_FAHCORE
 145 #include "corewrap.h"
 146 #endif
 147
 148 using gmx::SimulationSignaller;
 149
 150 //! Whether the GPU version of Leap-Frog integrator should be used.
 151 static const bool c_useGpuLeapFrog = (getenv("GMX_INTEGRATE_GPU") != nullptr);
 152
 153 void gmx::Integrator::do_md()
 154 {
 155     // TODO Historically, the EM and MD "integrators" used different
 156     // names for the t_inputrec *parameter, but these must have the
 157     // same name, now that it's a member of a struct. We use this ir
 158     // alias to avoid a large ripple of nearly useless changes.
 159     // t_inputrec is being replaced by IMdpOptionsProvider, so this
 160     // will go away eventually.
 161     t_inputrec             *ir   = inputrec;
 162     int64_t                 step, step_rel;
 163     double                  t, t0 = ir->init_t, lam0[efptNR];
 164     gmx_bool                bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
 165     gmx_bool                bNS, bNStList, bStopCM,
 166                             bFirstStep, bInitStep, bLastStep = FALSE;
 167     gmx_bool                bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
 168     gmx_bool                do_ene, do_log, do_verbose;
 169     gmx_bool                bMasterState;
 170     int                     force_flags, cglo_flags;
 171     tensor                  force_vir = {{0}}, shake_vir = {{0}}, total_vir = {{0}},
 172                             tmp_vir   = {{0}}, pres = {{0}};
 173     int                     i, m;
 174     rvec                    mu_tot;
 175     matrix                  parrinellorahmanMu, M;
 176     gmx_repl_ex_t           repl_ex = nullptr;
 177     gmx_localtop_t          top;
 178     PaddedVector<gmx::RVec> f {};
 179     gmx_global_stat_t       gstat;
 180     t_graph                *graph = nullptr;
 181     gmx_shellfc_t          *shellfc;
 182     gmx_bool                bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
 183     gmx_bool                bTemp, bPres, bTrotter;
 184     real                    dvdl_constr;
 185     rvec                   *cbuf        = nullptr;
 186     int                     cbuf_nalloc = 0;
 187     matrix                  lastbox;
 188     int                     lamnew  = 0;
 189     /* for FEP */
 190     int                     nstfep = 0;
 191     double                  cycles;
 192     real                    saved_conserved_quantity = 0;
 193     real                    last_ekin                = 0;
 194     t_extmass               MassQ;
 195     char                    sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
 196
 197     /* PME load balancing data for GPU kernels */
 198     gmx_bool              bPMETune         = FALSE;
 199     gmx_bool              bPMETunePrinting = FALSE;
 200
 201     bool                  bInteractiveMDstep = false;
 202
 203     /* Domain decomposition could incorrectly miss a bonded
 204        interaction, but checking for that requires a global
 205        communication stage, which does not otherwise happen in DD
 206        code. So we do that alongside the first global energy reduction
 207        after a new DD is made. These variables handle whether the
 208        check happens, and the result it returns. */
 209     bool              shouldCheckNumberOfBondedInteractions = false;
 210     int               totalNumberOfBondedInteractions       = -1;
 211
 212     SimulationSignals signals;
 213     // Most global communnication stages don't propagate mdrun
 214     // signals, and will use this object to achieve that.
 215     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
 216
 217     if (!mdrunOptions.writeConfout)
 218     {
 219         // This is on by default, and the main known use case for
 220         // turning it off is for convenience in benchmarking, which is
 221         // something that should not show up in the general user
 222         // interface.
 223         GMX_LOG(mdlog.info).asParagraph().
 224             appendText("The -noconfout functionality is deprecated, and may be removed in a future version.");
 225     }
 226
 227     /* md-vv uses averaged full step velocities for T-control
 228        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 229        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 230     bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
 231
 232     const bool bRerunMD      = false;
 233
 234     int        nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
 235     bGStatEveryStep = (nstglobalcomm == 1);
 236
 237     SimulationGroups                  *groups = &top_global->groups;
 238
 239     std::unique_ptr<EssentialDynamics> ed = nullptr;
 240     if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr)
 241     {
 242         /* Initialize essential dynamics sampling */
 243         ed = init_edsam(mdlog,
 244                         opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm),
 245                         top_global,
 246                         ir, cr, constr,
 247                         state_global, observablesHistory,
 248                         oenv,
 249                         startingBehavior);
 250     }
 251
 252     initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
 253     Update upd(ir, deform);
 254     bool   doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
 255     if (startingBehavior != StartingBehavior::RestartWithAppending)
 256     {
 257         pleaseCiteCouplingAlgorithms(fplog, *ir);
 258     }
 259     init_nrnb(nrnb);
 260     gmx_mdoutf       *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, top_global, oenv, wcycle,
 261                                          StartingBehavior::NewSimulation);
 262     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, mdoutf_get_fp_dhdl(outf), false);
 263
 264     /* Kinetic energy data */
 265     std::unique_ptr<gmx_ekindata_t> eKinData = std::make_unique<gmx_ekindata_t>();
 266     gmx_ekindata_t                 *ekind    = eKinData.get();
 267     init_ekindata(fplog, top_global, &(ir->opts), ekind);
 268     /* Copy the cos acceleration to the groups struct */
 269     ekind->cosacc.cos_accel = ir->cos_accel;
 270
 271     gstat = global_stat_init(ir);
 272
 273     /* Check for polarizable models and flexible constraints */
 274     shellfc = init_shell_flexcon(fplog,
 275                                  top_global, constr ? constr->numFlexibleConstraints() : 0,
 276                                  ir->nstcalcenergy, DOMAINDECOMP(cr));
 277
 278     {
 279         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
 280         if ((io > 2000) && MASTER(cr))
 281         {
 282             fprintf(stderr,
 283                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 284                     io);
 285         }
 286     }
 287
 288     // Local state only becomes valid now.
 289     std::unique_ptr<t_state> stateInstance;
 290     t_state *                state;
 291
 292     if (DOMAINDECOMP(cr))
 293     {
 294         dd_init_local_top(*top_global, &top);
 295
 296         stateInstance = std::make_unique<t_state>();
 297         state         = stateInstance.get();
 298         if (fr->nbv->useGpu())
 299         {
 300             changePinningPolicy(&state->x, gmx::PinningPolicy::PinnedIfSupported);
 301         }
 302         dd_init_local_state(cr->dd, state_global, state);
 303
 304         /* Distribute the charge groups over the nodes from the master node */
 305         dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1,
 306                             state_global, *top_global, ir, imdSession,
 307                             pull_work,
 308                             state, &f, mdAtoms, &top, fr,
 309                             vsite, constr,
 310                             nrnb, nullptr, FALSE);
 311         shouldCheckNumberOfBondedInteractions = true;
 312         upd.setNumAtoms(state->natoms);
 313     }
 314     else
 315     {
 316         state_change_natoms(state_global, state_global->natoms);
 317         f.resizeWithPadding(state_global->natoms);
 318         /* Copy the pointer to the global state */
 319         state = state_global;
 320
 321         /* Generate and initialize new topology */
 322         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr,
 323                                   &graph, mdAtoms, constr, vsite, shellfc);
 324
 325         upd.setNumAtoms(state->natoms);
 326     }
 327
 328     auto mdatoms = mdAtoms->mdatoms();
 329
 330     // NOTE: The global state is no longer used at this point.
 331     // But state_global is still used as temporary storage space for writing
 332     // the global state to file and potentially for replica exchange.
 333     // (Global topology should persist.)
 334
 335     update_mdatoms(mdatoms, state->lambda[efptMASS]);
 336
 337     if (ir->bExpanded)
 338     {
 339         /* Check nstexpanded here, because the grompp check was broken */
 340         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
 341         {
 342             gmx_fatal(FARGS, "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
 343         }
 344         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation,
 345                                ir, state->dfhist);
 346     }
 347
 348     if (MASTER(cr))
 349     {
 350         if (startingBehavior != StartingBehavior::NewSimulation)
 351         {
 352             /* Restore from energy history if appending to output files */
 353             if (startingBehavior == StartingBehavior::RestartWithAppending)
 354             {
 355                 /* If no history is available (because a checkpoint is from before
 356                  * it was written) make a new one later, otherwise restore it.
 357                  */
 358                 if (observablesHistory->energyHistory)
 359                 {
 360                     energyOutput.restoreFromEnergyHistory(*observablesHistory->energyHistory);
 361                 }
 362             }
 363             else if (observablesHistory->energyHistory)
 364             {
 365                 /* We might have read an energy history from checkpoint.
 366                  * As we are not appending, we want to restart the statistics.
 367                  * Free the allocated memory and reset the counts.
 368                  */
 369                 observablesHistory->energyHistory = {};
 370                 /* We might have read a pull history from checkpoint.
 371                  * We will still want to keep the statistics, so that the files
 372                  * can be joined and still be meaningful.
 373                  * This means that observablesHistory->pullHistory
 374                  * should not be reset.
 375                  */
 376             }
 377         }
 378         if (!observablesHistory->energyHistory)
 379         {
 380             observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
 381         }
 382         if (!observablesHistory->pullHistory)
 383         {
 384             observablesHistory->pullHistory = std::make_unique<PullHistory>();
 385         }
 386         /* Set the initial energy history */
 387         energyOutput.fillEnergyHistory(observablesHistory->energyHistory.get());
 388     }
 389
 390     preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
 391                            startingBehavior != StartingBehavior::NewSimulation);
 392
 393     // TODO: Remove this by converting AWH into a ForceProvider
 394     auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingBehavior != StartingBehavior::NewSimulation,
 395                                 shellfc != nullptr,
 396                                 opt2fn("-awh", nfile, fnm), pull_work);
 397
 398     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
 399     if (useReplicaExchange && MASTER(cr))
 400     {
 401         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir,
 402                                         replExParams);
 403     }
 404     /* PME tuning is only supported in the Verlet scheme, with PME for
 405      * Coulomb. It is not supported with only LJ PME. */
 406     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) &&
 407                 !mdrunOptions.reproducible && ir->cutoff_scheme != ecutsGROUP);
 408
 409     pme_load_balancing_t *pme_loadbal      = nullptr;
 410     if (bPMETune)
 411     {
 412         pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
 413                          *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu(),
 414                          &bPMETunePrinting);
 415     }
 416
 417     if (!ir->bContinuation)
 418     {
 419         if (state->flags & (1 << estV))
 420         {
 421             auto v = makeArrayRef(state->v);
 422             /* Set the velocities of vsites, shells and frozen atoms to zero */
 423             for (i = 0; i < mdatoms->homenr; i++)
 424             {
 425                 if (mdatoms->ptype[i] == eptVSite ||
 426                     mdatoms->ptype[i] == eptShell)
 427                 {
 428                     clear_rvec(v[i]);
 429                 }
 430                 else if (mdatoms->cFREEZE)
 431                 {
 432                     for (m = 0; m < DIM; m++)
 433                     {
 434                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 435                         {
 436                             v[i][m] = 0;
 437                         }
 438                     }
 439                 }
 440             }
 441         }
 442
 443         if (constr)
 444         {
 445             /* Constrain the initial coordinates and velocities */
 446             do_constrain_first(fplog, constr, ir, mdatoms, state);
 447         }
 448         if (vsite)
 449         {
 450             /* Construct the virtual sites for the initial configuration */
 451             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr,
 452                              top.idef.iparams, top.idef.il,
 453                              fr->ePBC, fr->bMolPBC, cr, state->box);
 454         }
 455     }
 456
 457     if (ir->efep != efepNO)
 458     {
 459         /* Set free energy calculation frequency as the greatest common
 460          * denominator of nstdhdl and repl_ex_nst. */
 461         nstfep = ir->fepvals->nstdhdl;
 462         if (ir->bExpanded)
 463         {
 464             nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
 465         }
 466         if (useReplicaExchange)
 467         {
 468             nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
 469         }
 470     }
 471
 472     /* Be REALLY careful about what flags you set here. You CANNOT assume
 473      * this is the first step, since we might be restarting from a checkpoint,
 474      * and in that case we should not do any modifications to the state.
 475      */
 476     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
 477
 478     // When restarting from a checkpoint, it can be appropriate to
 479     // initialize ekind from quantities in the checkpoint. Otherwise,
 480     // compute_globals must initialize ekind before the simulation
 481     // starts/restarts. However, only the master rank knows what was
 482     // found in the checkpoint file, so we have to communicate in
 483     // order to coordinate the restart.
 484     //
 485     // TODO Consider removing this communication if/when checkpoint
 486     // reading directly follows .tpr reading, because all ranks can
 487     // agree on hasReadEkinState at that time.
 488     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
 489     if (PAR(cr))
 490     {
 491         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
 492     }
 493     if (hasReadEkinState)
 494     {
 495         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
 496     }
 497
 498     cglo_flags = (CGLO_INITIALIZATION | CGLO_TEMPERATURE | CGLO_GSTAT
 499                   | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
 500                   | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0)
 501                   | (hasReadEkinState ? CGLO_READEKIN : 0));
 502
 503     bSumEkinhOld = FALSE;
 504
 505     t_vcm vcm(top_global->groups, *ir);
 506     reportComRemovalInfo(fplog, vcm);
 507
 508     /* To minimize communication, compute_globals computes the COM velocity
 509      * and the kinetic energy for the velocities without COM motion removed.
 510      * Thus to get the kinetic energy without the COM contribution, we need
 511      * to call compute_globals twice.
 512      */
 513     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
 514     {
 515         int cglo_flags_iteration = cglo_flags;
 516         if (bStopCM && cgloIteration == 0)
 517         {
 518             cglo_flags_iteration |= CGLO_STOPCM;
 519             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
 520         }
 521         compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
 522                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 523                         constr, &nullSignaller, state->box,
 524                         &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags_iteration
 525                         | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0));
 526     }
 527     checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
 528                                     top_global, &top, state,
 529                                     &shouldCheckNumberOfBondedInteractions);
 530     if (ir->eI == eiVVAK)
 531     {
 532         /* a second call to get the half step temperature initialized as well */
 533         /* we do the same call as above, but turn the pressure off -- internally to
 534            compute_globals, this is recognized as a velocity verlet half-step
 535            kinetic energy calculation.  This minimized excess variables, but
 536            perhaps loses some logic?*/
 537
 538         compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
 539                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 540                         constr, &nullSignaller, state->box,
 541                         nullptr, &bSumEkinhOld,
 542                         cglo_flags & ~CGLO_PRESSURE);
 543     }
 544
 545     /* Calculate the initial half step temperature, and save the ekinh_old */
 546     if (startingBehavior == StartingBehavior::NewSimulation)
 547     {
 548         for (i = 0; (i < ir->opts.ngtc); i++)
 549         {
 550             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
 551         }
 552     }
 553
 554     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
 555        temperature control */
 556     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
 557
 558     if (MASTER(cr))
 559     {
 560         if (!ir->bContinuation)
 561         {
 562             if (constr && ir->eConstrAlg == econtLINCS)
 563             {
 564                 fprintf(fplog,
 565                         "RMS relative constraint deviation after constraining: %.2e\n",
 566                         constr->rmsd());
 567             }
 568             if (EI_STATE_VELOCITY(ir->eI))
 569             {
 570                 real temp = enerd->term[F_TEMP];
 571                 if (ir->eI != eiVV)
 572                 {
 573                     /* Result of Ekin averaged over velocities of -half
 574                      * and +half step, while we only have -half step here.
 575                      */
 576                     temp *= 2;
 577                 }
 578                 fprintf(fplog, "Initial temperature: %g K\n", temp);
 579             }
 580         }
 581
 582         char tbuf[20];
 583         fprintf(stderr, "starting mdrun '%s'\n",
 584                 *(top_global->name));
 585         if (ir->nsteps >= 0)
 586         {
 587             sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
 588         }
 589         else
 590         {
 591             sprintf(tbuf, "%s", "infinite");
 592         }
 593         if (ir->init_step > 0)
 594         {
 595             fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 596                     gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
 597                     gmx_step_str(ir->init_step, sbuf2),
 598                     ir->init_step*ir->delta_t);
 599         }
 600         else
 601         {
 602             fprintf(stderr, "%s steps, %s ps.\n",
 603                     gmx_step_str(ir->nsteps, sbuf), tbuf);
 604         }
 605         fprintf(fplog, "\n");
 606     }
 607
 608     walltime_accounting_start_time(walltime_accounting);
 609     wallcycle_start(wcycle, ewcRUN);
 610     print_start(fplog, cr, walltime_accounting, "mdrun");
 611
 612 #if GMX_FAHCORE
 613     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 614     int chkpt_ret = fcCheckPointParallel( cr->nodeid,
 615                                           NULL, 0);
 616     if (chkpt_ret == 0)
 617     {
 618         gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
 619     }
 620 #endif
 621
 622     /***********************************************************
 623      *
 624      *             Loop over MD steps
 625      *
 626      ************************************************************/
 627     std::unique_ptr<LeapFrogCuda> integrator;
 628     if (c_useGpuLeapFrog)
 629     {
 630         GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU.");
 631         GMX_RELEASE_ASSERT(ir->etc == etcNO, "Temperature coupling is not supported on the GPU.");
 632         GMX_RELEASE_ASSERT(ir->epc == epcNO, "Pressure coupling is not supported on the GPU.");
 633         GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU");
 634         integrator = std::make_unique<LeapFrogCuda>(state->natoms);
 635         integrator->set(*mdatoms);
 636     }
 637
 638     bFirstStep       = TRUE;
 639     /* Skip the first Nose-Hoover integration when we get the state from tpx */
 640     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
 641     bSumEkinhOld     = FALSE;
 642     bExchanged       = FALSE;
 643     bNeedRepartition = FALSE;
 644
 645     bool simulationsShareState = false;
 646     int  nstSignalComm         = nstglobalcomm;
 647     {
 648         // TODO This implementation of ensemble orientation restraints is nasty because
 649         // a user can't just do multi-sim with single-sim orientation restraints.
 650         bool usingEnsembleRestraints = (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
 651         bool awhUsesMultiSim         = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
 652
 653         // Replica exchange, ensemble restraints and AWH need all
 654         // simulations to remain synchronized, so they need
 655         // checkpoints and stop conditions to act on the same step, so
 656         // the propagation of such signals must take place between
 657         // simulations, not just within simulations.
 658         // TODO: Make algorithm initializers set these flags.
 659         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
 660
 661         if (simulationsShareState)
 662         {
 663             // Inter-simulation signal communication does not need to happen
 664             // often, so we use a minimum of 200 steps to reduce overhead.
 665             const int c_minimumInterSimulationSignallingInterval = 200;
 666             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1)/nstglobalcomm)*nstglobalcomm;
 667         }
 668     }
 669
 670     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
 671                 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
 672                 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
 673                 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
 674
 675     auto checkpointHandler = std::make_unique<CheckpointHandler>(
 676                 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
 677                 simulationsShareState, ir->nstlist == 0, MASTER(cr),
 678                 mdrunOptions.writeConfout, mdrunOptions.checkpointOptions.period);
 679
 680     const bool resetCountersIsLocal = true;
 681     auto       resetHandler         = std::make_unique<ResetHandler>(
 682                 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]), !resetCountersIsLocal,
 683                 ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
 684                 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
 685
 686     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
 687
 688     step     = ir->init_step;
 689     step_rel = 0;
 690
 691     // TODO extract this to new multi-simulation module
 692     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
 693     {
 694         if (!multisim_int_all_are_equal(ms, ir->nsteps))
 695         {
 696             GMX_LOG(mdlog.warning).appendText(
 697                     "Note: The number of steps is not consistent across multi simulations,\n"
 698                     "but we are proceeding anyway!");
 699         }
 700         if (!multisim_int_all_are_equal(ms, ir->init_step))
 701         {
 702             GMX_LOG(mdlog.warning).appendText(
 703                     "Note: The initial step is not consistent across multi simulations,\n"
 704                     "but we are proceeding anyway!");
 705         }
 706     }
 707
 708     /* and stop now if we should */
 709     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
 710     while (!bLastStep)
 711     {
 712
 713         /* Determine if this is a neighbor search step */
 714         bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
 715
 716         if (bPMETune && bNStList)
 717         {
 718             /* PME grid + cut-off optimization with GPUs or PME nodes */
 719             pme_loadbal_do(pme_loadbal, cr,
 720                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
 721                            fplog, mdlog,
 722                            *ir, fr, *state,
 723                            wcycle,
 724                            step, step_rel,
 725                            &bPMETunePrinting);
 726         }
 727
 728         wallcycle_start(wcycle, ewcSTEP);
 729
 730         bLastStep = (step_rel == ir->nsteps);
 731         t         = t0 + step*ir->delta_t;
 732
 733         // TODO Refactor this, so that nstfep does not need a default value of zero
 734         if (ir->efep != efepNO || ir->bSimTemp)
 735         {
 736             /* find and set the current lambdas */
 737             setCurrentLambdasLocal(step, ir->fepvals, lam0, state);
 738
 739             bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
 740             bDoFEP       = ((ir->efep != efepNO) && do_per_step(step, nstfep));
 741             bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
 742                             && (ir->bExpanded) && (step > 0) &&
 743                             (startingBehavior == StartingBehavior::NewSimulation));
 744         }
 745
 746         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep &&
 747                      do_per_step(step, replExParams.exchangeInterval));
 748
 749         if (doSimulatedAnnealing)
 750         {
 751             update_annealing_target_temp(ir, t, &upd);
 752         }
 753
 754         /* Stop Center of Mass motion */
 755         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
 756
 757         /* Determine whether or not to do Neighbour Searching */
 758         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
 759
 760         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
 761
 762         /* do_log triggers energy and virial calculation. Because this leads
 763          * to different code paths, forces can be different. Thus for exact
 764          * continuation we should avoid extra log output.
 765          * Note that the || bLastStep can result in non-exact continuation
 766          * beyond the last step. But we don't consider that to be an issue.
 767          */
 768         do_log     =
 769             (do_per_step(step, ir->nstlog) ||
 770              (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) ||
 771              bLastStep);
 772         do_verbose = mdrunOptions.verbose &&
 773             (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
 774
 775         if (bNS && !(bFirstStep && ir->bContinuation))
 776         {
 777             bMasterState = FALSE;
 778             /* Correct the new box if it is too skewed */
 779             if (inputrecDynamicBox(ir))
 780             {
 781                 if (correct_box(fplog, step, state->box, graph))
 782                 {
 783                     bMasterState = TRUE;
 784                 }
 785             }
 786             if (DOMAINDECOMP(cr) && bMasterState)
 787             {
 788                 dd_collect_state(cr->dd, state, state_global);
 789             }
 790
 791             if (DOMAINDECOMP(cr))
 792             {
 793                 /* Repartition the domain decomposition */
 794                 dd_partition_system(fplog, mdlog, step, cr,
 795                                     bMasterState, nstglobalcomm,
 796                                     state_global, *top_global, ir, imdSession,
 797                                     pull_work,
 798                                     state, &f, mdAtoms, &top, fr,
 799                                     vsite, constr,
 800                                     nrnb, wcycle,
 801                                     do_verbose && !bPMETunePrinting);
 802                 shouldCheckNumberOfBondedInteractions = true;
 803                 upd.setNumAtoms(state->natoms);
 804             }
 805         }
 806
 807         if (MASTER(cr) && do_log)
 808         {
 809             energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
 810         }
 811
 812         if (ir->efep != efepNO)
 813         {
 814             update_mdatoms(mdatoms, state->lambda[efptMASS]);
 815         }
 816
 817         if (bExchanged)
 818         {
 819
 820             /* We need the kinetic energy at minus the half step for determining
 821              * the full step kinetic energy and possibly for T-coupling.*/
 822             /* This may not be quite working correctly yet . . . . */
 823             compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
 824                             wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
 825                             constr, &nullSignaller, state->box,
 826                             &totalNumberOfBondedInteractions, &bSumEkinhOld,
 827                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
 828             checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
 829                                             top_global, &top, state,
 830                                             &shouldCheckNumberOfBondedInteractions);
 831         }
 832         clear_mat(force_vir);
 833
 834         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
 835
 836         /* Determine the energy and pressure:
 837          * at nstcalcenergy steps and at energy output steps (set below).
 838          */
 839         if (EI_VV(ir->eI) && (!bInitStep))
 840         {
 841             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
 842             bCalcVir      = bCalcEnerStep ||
 843                 (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
 844         }
 845         else
 846         {
 847             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
 848             bCalcVir      = bCalcEnerStep ||
 849                 (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
 850         }
 851         bCalcEner = bCalcEnerStep;
 852
 853         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
 854
 855         if (do_ene || do_log || bDoReplEx)
 856         {
 857             bCalcVir  = TRUE;
 858             bCalcEner = TRUE;
 859         }
 860
 861         /* Do we need global communication ? */
 862         bGStat = (bCalcVir || bCalcEner || bStopCM ||
 863                   do_per_step(step, nstglobalcomm) ||
 864                   (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm)));
 865
 866         force_flags = (GMX_FORCE_STATECHANGED |
 867                        ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) |
 868                        GMX_FORCE_ALLFORCES |
 869                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
 870                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
 871                        (bDoFEP ? GMX_FORCE_DHDL : 0)
 872                        );
 873
 874         if (shellfc)
 875         {
 876             /* Now is the time to relax the shells */
 877             relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose,
 878                                 enforcedRotation, step,
 879                                 ir, imdSession, pull_work, bNS, force_flags, &top,
 880                                 constr, enerd, fcd,
 881                                 state, f.arrayRefWithPadding(), force_vir, mdatoms,
 882                                 nrnb, wcycle, graph,
 883                                 shellfc, fr, ppForceWorkload, t, mu_tot,
 884                                 vsite,
 885                                 ddBalanceRegionHandler);
 886         }
 887         else
 888         {
 889             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias is updated
 890                (or the AWH update will be performed twice for one step when continuing). It would be best to
 891                call this update function from do_md_trajectory_writing but that would occur after do_force.
 892                One would have to divide the update_awh function into one function applying the AWH force
 893                and one doing the AWH bias update. The update AWH bias function could then be called after
 894                do_md_trajectory_writing (then containing update_awh_history).
 895                The checkpointing will in the future probably moved to the start of the md loop which will
 896                rid of this issue. */
 897             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
 898             {
 899                 awh->updateHistory(state_global->awhHistory.get());
 900             }
 901
 902             /* The coordinates (x) are shifted (to get whole molecules)
 903              * in do_force.
 904              * This is parallellized as well, and does communication too.
 905              * Check comments in sim_util.c
 906              */
 907             do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession,
 908                      pull_work,
 909                      step, nrnb, wcycle, &top,
 910                      state->box, state->x.arrayRefWithPadding(), &state->hist,
 911                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
 912                      state->lambda, graph,
 913                      fr, ppForceWorkload, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
 914                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
 915                      ddBalanceRegionHandler);
 916         }
 917
 918         if (EI_VV(ir->eI) && startingBehavior == StartingBehavior::NewSimulation)
 919         /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
 920         {
 921             rvec *vbuf = nullptr;
 922
 923             wallcycle_start(wcycle, ewcUPDATE);
 924             if (ir->eI == eiVV && bInitStep)
 925             {
 926                 /* if using velocity verlet with full time step Ekin,
 927                  * take the first half step only to compute the
 928                  * virial for the first step. From there,
 929                  * revert back to the initial coordinates
 930                  * so that the input is actually the initial step.
 931                  */
 932                 snew(vbuf, state->natoms);
 933                 copy_rvecn(state->v.rvec_array(), vbuf, 0, state->natoms); /* should make this better for parallelizing? */
 934             }
 935             else
 936             {
 937                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
 938                 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
 939             }
 940
 941             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
 942                           ekind, M, &upd, etrtVELOCITY1,
 943                           cr, constr);
 944
 945             wallcycle_stop(wcycle, ewcUPDATE);
 946             constrain_velocities(step, nullptr,
 947                                  state,
 948                                  shake_vir,
 949                                  constr,
 950                                  bCalcVir, do_log, do_ene);
 951             wallcycle_start(wcycle, ewcUPDATE);
 952             /* if VV, compute the pressure and constraints */
 953             /* For VV2, we strictly only need this if using pressure
 954              * control, but we really would like to have accurate pressures
 955              * printed out.
 956              * Think about ways around this in the future?
 957              * For now, keep this choice in comments.
 958              */
 959             /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
 960             /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
 961             bPres = TRUE;
 962             bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
 963             if (bCalcEner && ir->eI == eiVVAK)
 964             {
 965                 bSumEkinhOld = TRUE;
 966             }
 967             /* for vv, the first half of the integration actually corresponds to the previous step.
 968                So we need information from the last step in the first half of the integration */
 969             if (bGStat || do_per_step(step-1, nstglobalcomm))
 970             {
 971                 wallcycle_stop(wcycle, ewcUPDATE);
 972                 compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
 973                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 974                                 constr, &nullSignaller, state->box,
 975                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
 976                                 (bGStat ? CGLO_GSTAT : 0)
 977                                 | (bCalcEner ? CGLO_ENERGY : 0)
 978                                 | (bTemp ? CGLO_TEMPERATURE : 0)
 979                                 | (bPres ? CGLO_PRESSURE : 0)
 980                                 | (bPres ? CGLO_CONSTRAINT : 0)
 981                                 | (bStopCM ? CGLO_STOPCM : 0)
 982                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
 983                                 | CGLO_SCALEEKIN
 984                                 );
 985                 /* explanation of above:
 986                    a) We compute Ekin at the full time step
 987                    if 1) we are using the AveVel Ekin, and it's not the
 988                    initial step, or 2) if we are using AveEkin, but need the full
 989                    time step kinetic energy for the pressure (always true now, since we want accurate statistics).
 990                    b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
 991                    EkinAveVel because it's needed for the pressure */
 992                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
 993                                                 top_global, &top, state,
 994                                                 &shouldCheckNumberOfBondedInteractions);
 995                 wallcycle_start(wcycle, ewcUPDATE);
 996             }
 997             /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
 998             if (!bInitStep)
 999             {
1000                 if (bTrotter)
1001                 {
1002                     m_add(force_vir, shake_vir, total_vir);     /* we need the un-dispersion corrected total vir here */
1003                     trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
1004
1005                     /* TODO This is only needed when we're about to write
1006                      * a checkpoint, because we use it after the restart
1007                      * (in a kludge?). But what should we be doing if
1008                      * the startingBehavior is NewSimulation or bInitStep are true? */
1009                     if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
1010                     {
1011                         copy_mat(shake_vir, state->svir_prev);
1012                         copy_mat(force_vir, state->fvir_prev);
1013                     }
1014                     if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
1015                     {
1016                         /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1017                         enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
1018                         enerd->term[F_EKIN] = trace(ekind->ekin);
1019                     }
1020                 }
1021                 else if (bExchanged)
1022                 {
1023                     wallcycle_stop(wcycle, ewcUPDATE);
1024                     /* We need the kinetic energy at minus the half step for determining
1025                      * the full step kinetic energy and possibly for T-coupling.*/
1026                     /* This may not be quite working correctly yet . . . . */
1027                     compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
1028                                     wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
1029                                     constr, &nullSignaller, state->box,
1030                                     nullptr, &bSumEkinhOld,
1031                                     CGLO_GSTAT | CGLO_TEMPERATURE);
1032                     wallcycle_start(wcycle, ewcUPDATE);
1033                 }
1034             }
1035             /* if it's the initial step, we performed this first step just to get the constraint virial */
1036             if (ir->eI == eiVV && bInitStep)
1037             {
1038                 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
1039                 sfree(vbuf);
1040             }
1041             wallcycle_stop(wcycle, ewcUPDATE);
1042         }
1043
1044         /* compute the conserved quantity */
1045         if (EI_VV(ir->eI))
1046         {
1047             saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1048             if (ir->eI == eiVV)
1049             {
1050                 last_ekin = enerd->term[F_EKIN];
1051             }
1052             if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1053             {
1054                 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1055             }
1056             /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
1057             if (ir->efep != efepNO)
1058             {
1059                 sum_dhdl(enerd, state->lambda, ir->fepvals);
1060             }
1061         }
1062
1063         /* ########  END FIRST UPDATE STEP  ############## */
1064         /* ########  If doing VV, we now have v(dt) ###### */
1065         if (bDoExpanded)
1066         {
1067             /* perform extended ensemble sampling in lambda - we don't
1068                actually move to the new state before outputting
1069                statistics, but if performing simulated tempering, we
1070                do update the velocities and the tau_t. */
1071
1072             lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, state->dfhist, step, state->v.rvec_array(), mdatoms);
1073             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1074             if (MASTER(cr))
1075             {
1076                 copy_df_history(state_global->dfhist, state->dfhist);
1077             }
1078         }
1079
1080         /* Now we have the energies and forces corresponding to the
1081          * coordinates at time t. We must output all of this before
1082          * the update.
1083          */
1084         do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
1085                                  ir, state, state_global, observablesHistory,
1086                                  top_global, fr,
1087                                  outf, energyOutput, ekind, f,
1088                                  checkpointHandler->isCheckpointingStep(),
1089                                  bRerunMD, bLastStep,
1090                                  mdrunOptions.writeConfout,
1091                                  bSumEkinhOld);
1092         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1093         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1094
1095         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1096         if (startingBehavior != StartingBehavior::NewSimulation &&
1097             (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1098         {
1099             copy_mat(state->svir_prev, shake_vir);
1100             copy_mat(state->fvir_prev, force_vir);
1101         }
1102
1103         stopHandler->setSignal();
1104         resetHandler->setSignal(walltime_accounting);
1105
1106         if (bGStat || !PAR(cr))
1107         {
1108             /* In parallel we only have to check for checkpointing in steps
1109              * where we do global communication,
1110              *  otherwise the other nodes don't know.
1111              */
1112             checkpointHandler->setSignal(walltime_accounting);
1113         }
1114
1115         /* #########   START SECOND UPDATE STEP ################# */
1116
1117         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
1118            in preprocessing */
1119
1120         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1121         {
1122             gmx_bool bIfRandomize;
1123             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1124             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1125             if (constr && bIfRandomize)
1126             {
1127                 constrain_velocities(step, nullptr,
1128                                      state,
1129                                      tmp_vir,
1130                                      constr,
1131                                      bCalcVir, do_log, do_ene);
1132             }
1133         }
1134         /* Box is changed in update() when we do pressure coupling,
1135          * but we should still use the old box for energy corrections and when
1136          * writing it to the energy file, so it matches the trajectory files for
1137          * the same timestep above. Make a copy in a separate array.
1138          */
1139         copy_mat(state->box, lastbox);
1140
1141         dvdl_constr = 0;
1142
1143         wallcycle_start(wcycle, ewcUPDATE);
1144         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1145         if (bTrotter)
1146         {
1147             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1148             /* We can only do Berendsen coupling after we have summed
1149              * the kinetic energy or virial. Since the happens
1150              * in global_state after update, we should only do it at
1151              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1152              */
1153         }
1154         else
1155         {
1156             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1157             update_pcouple_before_coordinates(fplog, step, ir, state,
1158                                               parrinellorahmanMu, M,
1159                                               bInitStep);
1160         }
1161
1162         if (EI_VV(ir->eI))
1163         {
1164             /* velocity half-step update */
1165             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1166                           ekind, M, &upd, etrtVELOCITY2,
1167                           cr, constr);
1168         }
1169
1170         /* Above, initialize just copies ekinh into ekin,
1171          * it doesn't copy position (for VV),
1172          * and entire integrator for MD.
1173          */
1174
1175         if (ir->eI == eiVVAK)
1176         {
1177             /* We probably only need md->homenr, not state->natoms */
1178             if (state->natoms > cbuf_nalloc)
1179             {
1180                 cbuf_nalloc = state->natoms;
1181                 srenew(cbuf, cbuf_nalloc);
1182             }
1183             copy_rvecn(as_rvec_array(state->x.data()), cbuf, 0, state->natoms);
1184         }
1185
1186
1187         if (c_useGpuLeapFrog)
1188         {
1189             integrator->copyCoordinatesToGpu(state->x.rvec_array());
1190             integrator->copyVelocitiesToGpu(state->v.rvec_array());
1191             integrator->copyForcesToGpu(as_rvec_array(f.data()));
1192             integrator->integrate(ir->delta_t);
1193             integrator->copyCoordinatesFromGpu(upd.xp()->rvec_array());
1194             integrator->copyVelocitiesFromGpu(state->v.rvec_array());
1195         }
1196         else
1197         {
1198             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1199                           ekind, M, &upd, etrtPOSITION, cr, constr);
1200         }
1201         wallcycle_stop(wcycle, ewcUPDATE);
1202
1203         constrain_coordinates(step, &dvdl_constr, state,
1204                               shake_vir,
1205                               &upd, constr,
1206                               bCalcVir, do_log, do_ene);
1207         update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state,
1208                               cr, nrnb, wcycle, &upd, constr, do_log, do_ene);
1209         finish_update(ir, mdatoms,
1210                       state, graph,
1211                       nrnb, wcycle, &upd, constr);
1212
1213         if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1214         {
1215             updatePrevStepPullCom(pull_work, state);
1216         }
1217
1218         if (ir->eI == eiVVAK)
1219         {
1220             /* erase F_EKIN and F_TEMP here? */
1221             /* just compute the kinetic energy at the half step to perform a trotter step */
1222             compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
1223                             wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1224                             constr, &nullSignaller, lastbox,
1225                             nullptr, &bSumEkinhOld,
1226                             (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE
1227                             );
1228             wallcycle_start(wcycle, ewcUPDATE);
1229             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1230             /* now we know the scaling, we can compute the positions again again */
1231             copy_rvecn(cbuf, as_rvec_array(state->x.data()), 0, state->natoms);
1232
1233             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1234                           ekind, M, &upd, etrtPOSITION, cr, constr);
1235             wallcycle_stop(wcycle, ewcUPDATE);
1236
1237             /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1238             /* are the small terms in the shake_vir here due
1239              * to numerical errors, or are they important
1240              * physically? I'm thinking they are just errors, but not completely sure.
1241              * For now, will call without actually constraining, constr=NULL*/
1242             finish_update(ir, mdatoms,
1243                           state, graph,
1244                           nrnb, wcycle, &upd, nullptr);
1245         }
1246         if (EI_VV(ir->eI))
1247         {
1248             /* this factor or 2 correction is necessary
1249                because half of the constraint force is removed
1250                in the vv step, so we have to double it.  See
1251                the Redmine issue #1255.  It is not yet clear
1252                if the factor of 2 is exact, or just a very
1253                good approximation, and this will be
1254                investigated.  The next step is to see if this
1255                can be done adding a dhdl contribution from the
1256                rattle step, but this is somewhat more
1257                complicated with the current code. Will be
1258                investigated, hopefully for 4.6.3. However,
1259                this current solution is much better than
1260                having it completely wrong.
1261              */
1262             enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
1263         }
1264         else
1265         {
1266             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1267         }
1268
1269         if (vsite != nullptr)
1270         {
1271             wallcycle_start(wcycle, ewcVSITECONSTR);
1272             if (graph != nullptr)
1273             {
1274                 shift_self(graph, state->box, state->x.rvec_array());
1275             }
1276             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
1277                              top.idef.iparams, top.idef.il,
1278                              fr->ePBC, fr->bMolPBC, cr, state->box);
1279
1280             if (graph != nullptr)
1281             {
1282                 unshift_self(graph, state->box, state->x.rvec_array());
1283             }
1284             wallcycle_stop(wcycle, ewcVSITECONSTR);
1285         }
1286
1287         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1288         /* With Leap-Frog we can skip compute_globals at
1289          * non-communication steps, but we need to calculate
1290          * the kinetic energy one step before communication.
1291          */
1292         {
1293             // Organize to do inter-simulation signalling on steps if
1294             // and when algorithms require it.
1295             bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1296
1297             if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal)
1298             {
1299                 // Since we're already communicating at this step, we
1300                 // can propagate intra-simulation signals. Note that
1301                 // check_nstglobalcomm has the responsibility for
1302                 // choosing the value of nstglobalcomm that is one way
1303                 // bGStat becomes true, so we can't get into a
1304                 // situation where e.g. checkpointing can't be
1305                 // signalled.
1306                 bool                doIntraSimSignal = true;
1307                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1308
1309                 compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
1310                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1311                                 constr, &signaller,
1312                                 lastbox,
1313                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1314                                 (bGStat ? CGLO_GSTAT : 0)
1315                                 | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1316                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1317                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1318                                 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
1319                                 | CGLO_CONSTRAINT
1320                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1321                                 );
1322                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1323                                                 top_global, &top, state,
1324                                                 &shouldCheckNumberOfBondedInteractions);
1325             }
1326         }
1327
1328         /* #############  END CALC EKIN AND PRESSURE ################# */
1329
1330         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1331            the virial that should probably be addressed eventually. state->veta has better properies,
1332            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1333            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1334
1335         if (ir->efep != efepNO && !EI_VV(ir->eI))
1336         {
1337             /* Sum up the foreign energy and dhdl terms for md and sd.
1338                Currently done every step so that dhdl is correct in the .edr */
1339             sum_dhdl(enerd, state->lambda, ir->fepvals);
1340         }
1341
1342         update_pcouple_after_coordinates(fplog, step, ir, mdatoms,
1343                                          pres, force_vir, shake_vir,
1344                                          parrinellorahmanMu,
1345                                          state, nrnb, &upd);
1346
1347         /* ################# END UPDATE STEP 2 ################# */
1348         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1349
1350         /* The coordinates (x) were unshifted in update */
1351         if (!bGStat)
1352         {
1353             /* We will not sum ekinh_old,
1354              * so signal that we still have to do it.
1355              */
1356             bSumEkinhOld = TRUE;
1357         }
1358
1359         if (bCalcEner)
1360         {
1361             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1362
1363             /* use the directly determined last velocity, not actually the averaged half steps */
1364             if (bTrotter && ir->eI == eiVV)
1365             {
1366                 enerd->term[F_EKIN] = last_ekin;
1367             }
1368             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1369
1370             if (integratorHasConservedEnergyQuantity(ir))
1371             {
1372                 if (EI_VV(ir->eI))
1373                 {
1374                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1375                 }
1376                 else
1377                 {
1378                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1379                 }
1380             }
1381             /* #########  END PREPARING EDR OUTPUT  ###########  */
1382         }
1383
1384         /* Output stuff */
1385         if (MASTER(cr))
1386         {
1387             if (fplog && do_log && bDoExpanded)
1388             {
1389                 /* only needed if doing expanded ensemble */
1390                 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : nullptr,
1391                                           state_global->dfhist, state->fep_state, ir->nstlog, step);
1392             }
1393             if (bCalcEner)
1394             {
1395                 energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep,
1396                                                  t, mdatoms->tmass, enerd, state,
1397                                                  ir->fepvals, ir->expandedvals, lastbox,
1398                                                  shake_vir, force_vir, total_vir, pres,
1399                                                  ekind, mu_tot, constr);
1400             }
1401             else
1402             {
1403                 energyOutput.recordNonEnergyStep();
1404             }
1405
1406             gmx_bool do_dr  = do_per_step(step, ir->nstdisreout);
1407             gmx_bool do_or  = do_per_step(step, ir->nstorireout);
1408
1409             energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
1410             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1411                                                do_log ? fplog : nullptr,
1412                                                step, t,
1413                                                fcd, awh.get());
1414
1415             if (ir->bPull)
1416             {
1417                 pull_print_output(pull_work, step, t);
1418             }
1419
1420             if (do_per_step(step, ir->nstlog))
1421             {
1422                 if (fflush(fplog) != 0)
1423                 {
1424                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1425                 }
1426             }
1427         }
1428         if (bDoExpanded)
1429         {
1430             /* Have to do this part _after_ outputting the logfile and the edr file */
1431             /* Gets written into the state at the beginning of next loop*/
1432             state->fep_state = lamnew;
1433         }
1434         /* Print the remaining wall clock time for the run */
1435         if (isMasterSimMasterRank(ms, MASTER(cr)) &&
1436             (do_verbose || gmx_got_usr_signal()) &&
1437             !bPMETunePrinting)
1438         {
1439             if (shellfc)
1440             {
1441                 fprintf(stderr, "\n");
1442             }
1443             print_time(stderr, walltime_accounting, step, ir, cr);
1444         }
1445
1446         /* Ion/water position swapping.
1447          * Not done in last step since trajectory writing happens before this call
1448          * in the MD loop and exchanges would be lost anyway. */
1449         bNeedRepartition = FALSE;
1450         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
1451             do_per_step(step, ir->swap->nstswap))
1452         {
1453             bNeedRepartition = do_swapcoords(cr, step, t, ir, swap, wcycle,
1454                                              as_rvec_array(state->x.data()),
1455                                              state->box,
1456                                              MASTER(cr) && mdrunOptions.verbose,
1457                                              bRerunMD);
1458
1459             if (bNeedRepartition && DOMAINDECOMP(cr))
1460             {
1461                 dd_collect_state(cr->dd, state, state_global);
1462             }
1463         }
1464
1465         /* Replica exchange */
1466         bExchanged = FALSE;
1467         if (bDoReplEx)
1468         {
1469             bExchanged = replica_exchange(fplog, cr, ms, repl_ex,
1470                                           state_global, enerd,
1471                                           state, step, t);
1472         }
1473
1474         if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
1475         {
1476             dd_partition_system(fplog, mdlog, step, cr, TRUE, 1,
1477                                 state_global, *top_global, ir, imdSession,
1478                                 pull_work,
1479                                 state, &f, mdAtoms, &top, fr,
1480                                 vsite, constr,
1481                                 nrnb, wcycle, FALSE);
1482             shouldCheckNumberOfBondedInteractions = true;
1483             upd.setNumAtoms(state->natoms);
1484         }
1485
1486         bFirstStep             = FALSE;
1487         bInitStep              = FALSE;
1488
1489         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1490         /* With all integrators, except VV, we need to retain the pressure
1491          * at the current step for coupling at the next step.
1492          */
1493         if ((state->flags & (1<<estPRES_PREV)) &&
1494             (bGStatEveryStep ||
1495              (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1496         {
1497             /* Store the pressure in t_state for pressure coupling
1498              * at the next MD step.
1499              */
1500             copy_mat(pres, state->pres_prev);
1501         }
1502
1503         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1504
1505         if ( (membed != nullptr) && (!bLastStep) )
1506         {
1507             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1508         }
1509
1510         cycles = wallcycle_stop(wcycle, ewcSTEP);
1511         if (DOMAINDECOMP(cr) && wcycle)
1512         {
1513             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1514         }
1515
1516         /* increase the MD step number */
1517         step++;
1518         step_rel++;
1519
1520         resetHandler->resetCounters(
1521                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(),
1522                 nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1523
1524         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1525         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1526
1527     }
1528     /* End of main MD loop */
1529
1530     /* Closing TNG files can include compressing data. Therefore it is good to do that
1531      * before stopping the time measurements. */
1532     mdoutf_tng_close(outf);
1533
1534     /* Stop measuring walltime */
1535     walltime_accounting_end_time(walltime_accounting);
1536
1537     if (!thisRankHasDuty(cr, DUTY_PME))
1538     {
1539         /* Tell the PME only node to finish */
1540         gmx_pme_send_finish(cr);
1541     }
1542
1543     if (MASTER(cr))
1544     {
1545         if (ir->nstcalcenergy > 0)
1546         {
1547             energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
1548             energyOutput.printAverages(fplog, groups);
1549         }
1550     }
1551     done_mdoutf(outf);
1552
1553     if (bPMETune)
1554     {
1555         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1556     }
1557
1558     done_shellfc(fplog, shellfc, step_rel);
1559
1560     if (useReplicaExchange && MASTER(cr))
1561     {
1562         print_replica_exchange_statistics(fplog, repl_ex);
1563     }
1564
1565     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1566
1567     global_stat_destroy(gstat);
1568
1569 }