src/gromacs/mdrun/runner.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the MD runner routine calling all integrators.
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include "runner.h"
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cinttypes>
  52 #include <csignal>
  53 #include <cstdlib>
  54 #include <cstring>
  55
  56 #include <algorithm>
  57 #include <memory>
  58
  59 #include "gromacs/commandline/filenm.h"
  60 #include "gromacs/domdec/domdec.h"
  61 #include "gromacs/domdec/domdec_struct.h"
  62 #include "gromacs/domdec/gpuhaloexchange.h"
  63 #include "gromacs/domdec/localatomsetmanager.h"
  64 #include "gromacs/domdec/partition.h"
  65 #include "gromacs/ewald/ewald_utils.h"
  66 #include "gromacs/ewald/pme.h"
  67 #include "gromacs/ewald/pme_gpu_program.h"
  68 #include "gromacs/fileio/checkpoint.h"
  69 #include "gromacs/fileio/gmxfio.h"
  70 #include "gromacs/fileio/oenv.h"
  71 #include "gromacs/fileio/tpxio.h"
  72 #include "gromacs/gmxlib/network.h"
  73 #include "gromacs/gmxlib/nrnb.h"
  74 #include "gromacs/gpu_utils/gpu_utils.h"
  75 #include "gromacs/hardware/cpuinfo.h"
  76 #include "gromacs/hardware/detecthardware.h"
  77 #include "gromacs/hardware/printhardware.h"
  78 #include "gromacs/imd/imd.h"
  79 #include "gromacs/listed_forces/disre.h"
  80 #include "gromacs/listed_forces/gpubonded.h"
  81 #include "gromacs/listed_forces/orires.h"
  82 #include "gromacs/math/functions.h"
  83 #include "gromacs/math/utilities.h"
  84 #include "gromacs/math/vec.h"
  85 #include "gromacs/mdlib/boxdeformation.h"
  86 #include "gromacs/mdlib/broadcaststructs.h"
  87 #include "gromacs/mdlib/calc_verletbuf.h"
  88 #include "gromacs/mdlib/dispersioncorrection.h"
  89 #include "gromacs/mdlib/enerdata_utils.h"
  90 #include "gromacs/mdlib/force.h"
  91 #include "gromacs/mdlib/forcerec.h"
  92 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  93 #include "gromacs/mdlib/makeconstraints.h"
  94 #include "gromacs/mdlib/md_support.h"
  95 #include "gromacs/mdlib/mdatoms.h"
  96 #include "gromacs/mdlib/membed.h"
  97 #include "gromacs/mdlib/qmmm.h"
  98 #include "gromacs/mdlib/sighandler.h"
  99 #include "gromacs/mdlib/stophandler.h"
 100 #include "gromacs/mdrun/mdmodules.h"
 101 #include "gromacs/mdrun/simulationcontext.h"
 102 #include "gromacs/mdrunutility/handlerestart.h"
 103 #include "gromacs/mdrunutility/logging.h"
 104 #include "gromacs/mdrunutility/multisim.h"
 105 #include "gromacs/mdrunutility/printtime.h"
 106 #include "gromacs/mdrunutility/threadaffinity.h"
 107 #include "gromacs/mdtypes/commrec.h"
 108 #include "gromacs/mdtypes/enerdata.h"
 109 #include "gromacs/mdtypes/fcdata.h"
 110 #include "gromacs/mdtypes/group.h"
 111 #include "gromacs/mdtypes/inputrec.h"
 112 #include "gromacs/mdtypes/md_enums.h"
 113 #include "gromacs/mdtypes/mdrunoptions.h"
 114 #include "gromacs/mdtypes/observableshistory.h"
 115 #include "gromacs/mdtypes/simulation_workload.h"
 116 #include "gromacs/mdtypes/state.h"
 117 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 118 #include "gromacs/nbnxm/nbnxm.h"
 119 #include "gromacs/nbnxm/pairlist_tuning.h"
 120 #include "gromacs/pbcutil/pbc.h"
 121 #include "gromacs/pulling/output.h"
 122 #include "gromacs/pulling/pull.h"
 123 #include "gromacs/pulling/pull_rotation.h"
 124 #include "gromacs/restraint/manager.h"
 125 #include "gromacs/restraint/restraintmdmodule.h"
 126 #include "gromacs/restraint/restraintpotential.h"
 127 #include "gromacs/swap/swapcoords.h"
 128 #include "gromacs/taskassignment/decidegpuusage.h"
 129 #include "gromacs/taskassignment/resourcedivision.h"
 130 #include "gromacs/taskassignment/taskassignment.h"
 131 #include "gromacs/taskassignment/usergpuids.h"
 132 #include "gromacs/timing/gpu_timing.h"
 133 #include "gromacs/timing/wallcycle.h"
 134 #include "gromacs/timing/wallcyclereporting.h"
 135 #include "gromacs/topology/mtop_util.h"
 136 #include "gromacs/trajectory/trajectoryframe.h"
 137 #include "gromacs/utility/basenetwork.h"
 138 #include "gromacs/utility/cstringutil.h"
 139 #include "gromacs/utility/exceptions.h"
 140 #include "gromacs/utility/fatalerror.h"
 141 #include "gromacs/utility/filestream.h"
 142 #include "gromacs/utility/gmxassert.h"
 143 #include "gromacs/utility/gmxmpi.h"
 144 #include "gromacs/utility/keyvaluetree.h"
 145 #include "gromacs/utility/logger.h"
 146 #include "gromacs/utility/loggerbuilder.h"
 147 #include "gromacs/utility/mdmodulenotification.h"
 148 #include "gromacs/utility/physicalnodecommunicator.h"
 149 #include "gromacs/utility/pleasecite.h"
 150 #include "gromacs/utility/programcontext.h"
 151 #include "gromacs/utility/smalloc.h"
 152 #include "gromacs/utility/stringutil.h"
 153
 154 #include "isimulator.h"
 155 #include "replicaexchange.h"
 156 #include "simulatorbuilder.h"
 157
 158 #if GMX_FAHCORE
 159 #include "corewrap.h"
 160 #endif
 161
 162 namespace gmx
 163 {
 164
 165 /*! \brief environment variable to enable GPU P2P communication */
 166 static const bool c_enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr)
 167     && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
 168
 169 /*! \brief environment variable to enable GPU buffer operations */
 170 static const bool c_enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 171
 172 /*! \brief Manage any development feature flag variables encountered
 173  *
 174  * The use of dev features indicated by environment variables is
 175  * logged in order to ensure that runs with such featrues enabled can
 176  * be identified from their log and standard output. Any cross
 177  * dependencies are also checked, and if unsatisified, a fatal error
 178  * issued.
 179  *
 180  * \param[in]  mdlog        Logger object.
 181  */
 182 static void manageDevelopmentFeatures(const gmx::MDLogger &mdlog)
 183 {
 184     const bool enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 185     const bool useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
 186     const bool enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
 187
 188     if (enableGpuBufOps)
 189     {
 190         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 191                 "NOTE: This run uses the 'GPU buffer ops' feature, enabled by the GMX_USE_GPU_BUFFER_OPS environment variable.");
 192     }
 193
 194     if (enableGpuHaloExchange)
 195     {
 196         if (!enableGpuBufOps)
 197         {
 198             gmx_fatal(FARGS, "Cannot enable GPU halo exchange without GPU buffer operations, set GMX_USE_GPU_BUFFER_OPS=1\n");
 199         }
 200     }
 201
 202     if (useGpuUpdateConstrain)
 203     {
 204         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 205                 "NOTE: This run uses the 'GPU update/constraints' feature, enabled by the GMX_UPDATE_CONSTRAIN_GPU environment variable.");
 206     }
 207
 208 }
 209
 210 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
 211  *
 212  * Used to ensure that the master thread does not modify mdrunner during copy
 213  * on the spawned threads. */
 214 static void threadMpiMdrunnerAccessBarrier()
 215 {
 216 #if GMX_THREAD_MPI
 217     MPI_Barrier(MPI_COMM_WORLD);
 218 #endif
 219 }
 220
 221 Mdrunner Mdrunner::cloneOnSpawnedThread() const
 222 {
 223     auto newRunner = Mdrunner(std::make_unique<MDModules>());
 224
 225     // All runners in the same process share a restraint manager resource because it is
 226     // part of the interface to the client code, which is associated only with the
 227     // original thread. Handles to the same resources can be obtained by copy.
 228     {
 229         newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
 230     }
 231
 232     // Copy original cr pointer before master thread can pass the thread barrier
 233     newRunner.cr  = reinitialize_commrec_for_this_thread(cr);
 234
 235     // Copy members of master runner.
 236     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
 237     // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
 238     newRunner.hw_opt    = hw_opt;
 239     newRunner.filenames = filenames;
 240
 241     newRunner.oenv                = oenv;
 242     newRunner.mdrunOptions        = mdrunOptions;
 243     newRunner.domdecOptions       = domdecOptions;
 244     newRunner.nbpu_opt            = nbpu_opt;
 245     newRunner.pme_opt             = pme_opt;
 246     newRunner.pme_fft_opt         = pme_fft_opt;
 247     newRunner.bonded_opt          = bonded_opt;
 248     newRunner.update_opt          = update_opt;
 249     newRunner.nstlist_cmdline     = nstlist_cmdline;
 250     newRunner.replExParams        = replExParams;
 251     newRunner.pforce              = pforce;
 252     newRunner.ms                  = ms;
 253     newRunner.startingBehavior    = startingBehavior;
 254     newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
 255
 256     threadMpiMdrunnerAccessBarrier();
 257
 258     GMX_RELEASE_ASSERT(!MASTER(newRunner.cr), "cloneOnSpawnedThread should only be called on spawned threads");
 259
 260     return newRunner;
 261 }
 262
 263 /*! \brief The callback used for running on spawned threads.
 264  *
 265  * Obtains the pointer to the master mdrunner object from the one
 266  * argument permitted to the thread-launch API call, copies it to make
 267  * a new runner for this thread, reinitializes necessary data, and
 268  * proceeds to the simulation. */
 269 static void mdrunner_start_fn(const void *arg)
 270 {
 271     try
 272     {
 273         auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg);
 274         /* copy the arg list to make sure that it's thread-local. This
 275            doesn't copy pointed-to items, of course; fnm, cr and fplog
 276            are reset in the call below, all others should be const. */
 277         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
 278         mdrunner.mdrunner();
 279     }
 280     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 281 }
 282
 283
 284 /*! \brief Start thread-MPI threads.
 285  *
 286  * Called by mdrunner() to start a specific number of threads
 287  * (including the main thread) for thread-parallel runs. This in turn
 288  * calls mdrunner() for each thread. All options are the same as for
 289  * mdrunner(). */
 290 t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const
 291 {
 292
 293     /* first check whether we even need to start tMPI */
 294     if (numThreadsToLaunch < 2)
 295     {
 296         return cr;
 297     }
 298
 299 #if GMX_THREAD_MPI
 300     /* now spawn new threads that start mdrunner_start_fn(), while
 301        the main thread returns. Thread affinity is handled later. */
 302     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
 303                      mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
 304     {
 305         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
 306     }
 307
 308     threadMpiMdrunnerAccessBarrier();
 309 #else
 310     GMX_UNUSED_VALUE(mdrunner_start_fn);
 311 #endif
 312
 313     return reinitialize_commrec_for_this_thread(cr);
 314 }
 315
 316 }  // namespace gmx
 317
 318 /*! \brief Initialize variables for Verlet scheme simulation */
 319 static void prepare_verlet_scheme(FILE                           *fplog,
 320                                   t_commrec                      *cr,
 321                                   t_inputrec                     *ir,
 322                                   int                             nstlist_cmdline,
 323                                   const gmx_mtop_t               *mtop,
 324                                   const matrix                    box,
 325                                   bool                            makeGpuPairList,
 326                                   const gmx::CpuInfo             &cpuinfo)
 327 {
 328     /* For NVE simulations, we will retain the initial list buffer */
 329     if (EI_DYNAMICS(ir->eI) &&
 330         ir->verletbuf_tol > 0 &&
 331         !(EI_MD(ir->eI) && ir->etc == etcNO))
 332     {
 333         /* Update the Verlet buffer size for the current run setup */
 334
 335         /* Here we assume SIMD-enabled kernels are being used. But as currently
 336          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 337          * and 4x2 gives a larger buffer than 4x4, this is ok.
 338          */
 339         ListSetupType      listType  = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
 340         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
 341
 342         const real         rlist_new =
 343             calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
 344
 345         if (rlist_new != ir->rlist)
 346         {
 347             if (fplog != nullptr)
 348             {
 349                 fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 350                         ir->rlist, rlist_new,
 351                         listSetup.cluster_size_i, listSetup.cluster_size_j);
 352             }
 353             ir->rlist     = rlist_new;
 354         }
 355     }
 356
 357     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
 358     {
 359         gmx_fatal(FARGS, "Can not set nstlist without %s",
 360                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
 361     }
 362
 363     if (EI_DYNAMICS(ir->eI))
 364     {
 365         /* Set or try nstlist values */
 366         increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
 367     }
 368 }
 369
 370 /*! \brief Override the nslist value in inputrec
 371  *
 372  * with value passed on the command line (if any)
 373  */
 374 static void override_nsteps_cmdline(const gmx::MDLogger &mdlog,
 375                                     int64_t              nsteps_cmdline,
 376                                     t_inputrec          *ir)
 377 {
 378     assert(ir);
 379
 380     /* override with anything else than the default -2 */
 381     if (nsteps_cmdline > -2)
 382     {
 383         char sbuf_steps[STEPSTRSIZE];
 384         char sbuf_msg[STRLEN];
 385
 386         ir->nsteps = nsteps_cmdline;
 387         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
 388         {
 389             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
 390                     gmx_step_str(nsteps_cmdline, sbuf_steps),
 391                     fabs(nsteps_cmdline*ir->delta_t));
 392         }
 393         else
 394         {
 395             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
 396                     gmx_step_str(nsteps_cmdline, sbuf_steps));
 397         }
 398
 399         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
 400     }
 401     else if (nsteps_cmdline < -2)
 402     {
 403         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64,
 404                   nsteps_cmdline);
 405     }
 406     /* Do nothing if nsteps_cmdline == -2 */
 407 }
 408
 409 namespace gmx
 410 {
 411
 412 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
 413  *
 414  * If not, and if a warning may be issued, logs a warning about
 415  * falling back to CPU code. With thread-MPI, only the first
 416  * call to this function should have \c issueWarning true. */
 417 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
 418                                                const t_inputrec &ir,
 419                                                bool              issueWarning)
 420 {
 421     bool        gpuIsUseful = true;
 422     std::string warning;
 423
 424     if (ir.opts.ngener - ir.nwall > 1)
 425     {
 426         /* The GPU code does not support more than one energy group.
 427          * If the user requested GPUs explicitly, a fatal error is given later.
 428          */
 429         gpuIsUseful = false;
 430         warning     =
 431             "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
 432             "For better performance, run on the GPU without energy groups and then do "
 433             "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
 434     }
 435
 436     if (EI_TPI(ir.eI))
 437     {
 438         gpuIsUseful = false;
 439         warning     = "TPI is not implemented for GPUs.";
 440     }
 441
 442     if (!gpuIsUseful && issueWarning)
 443     {
 444         GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
 445     }
 446
 447     return gpuIsUseful;
 448 }
 449
 450 //! Initializes the logger for mdrun.
 451 static gmx::LoggerOwner buildLogger(FILE *fplog, const t_commrec *cr)
 452 {
 453     gmx::LoggerBuilder builder;
 454     if (fplog != nullptr)
 455     {
 456         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
 457     }
 458     if (cr == nullptr || SIMMASTER(cr))
 459     {
 460         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning,
 461                                 &gmx::TextOutputFile::standardError());
 462     }
 463     return builder.build();
 464 }
 465
 466 //! Make a TaskTarget from an mdrun argument string.
 467 static TaskTarget findTaskTarget(const char *optionString)
 468 {
 469     TaskTarget returnValue = TaskTarget::Auto;
 470
 471     if (strncmp(optionString, "auto", 3) == 0)
 472     {
 473         returnValue = TaskTarget::Auto;
 474     }
 475     else if (strncmp(optionString, "cpu", 3) == 0)
 476     {
 477         returnValue = TaskTarget::Cpu;
 478     }
 479     else if (strncmp(optionString, "gpu", 3) == 0)
 480     {
 481         returnValue = TaskTarget::Gpu;
 482     }
 483     else
 484     {
 485         GMX_ASSERT(false, "Option string should have been checked for sanity already");
 486     }
 487
 488     return returnValue;
 489 }
 490
 491 //! Finish run, aggregate data to print performance info.
 492 static void finish_run(FILE *fplog,
 493                        const gmx::MDLogger &mdlog,
 494                        const t_commrec *cr,
 495                        const t_inputrec *inputrec,
 496                        t_nrnb nrnb[], gmx_wallcycle_t wcycle,
 497                        gmx_walltime_accounting_t walltime_accounting,
 498                        nonbonded_verlet_t *nbv,
 499                        const gmx_pme_t *pme,
 500                        gmx_bool bWriteStat)
 501 {
 502     double  delta_t  = 0;
 503     double  nbfs     = 0, mflop = 0;
 504     double  elapsed_time,
 505             elapsed_time_over_all_ranks,
 506             elapsed_time_over_all_threads,
 507             elapsed_time_over_all_threads_over_all_ranks;
 508     /* Control whether it is valid to print a report. Only the
 509        simulation master may print, but it should not do so if the run
 510        terminated e.g. before a scheduled reset step. This is
 511        complicated by the fact that PME ranks are unaware of the
 512        reason why they were sent a pmerecvqxFINISH. To avoid
 513        communication deadlocks, we always do the communication for the
 514        report, even if we've decided not to write the report, because
 515        how long it takes to finish the run is not important when we've
 516        decided not to report on the simulation performance.
 517
 518        Further, we only report performance for dynamical integrators,
 519        because those are the only ones for which we plan to
 520        consider doing any optimizations. */
 521     bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
 522
 523     if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
 524     {
 525         GMX_LOG(mdlog.warning).asParagraph().appendText("Simulation ended prematurely, no performance report will be written.");
 526         printReport = false;
 527     }
 528
 529     t_nrnb                  *nrnb_tot;
 530     std::unique_ptr<t_nrnb>  nrnbTotalStorage;
 531     if (cr->nnodes > 1)
 532     {
 533         nrnbTotalStorage = std::make_unique<t_nrnb>();
 534         nrnb_tot         = nrnbTotalStorage.get();
 535 #if GMX_MPI
 536         MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
 537                       cr->mpi_comm_mysim);
 538 #endif
 539     }
 540     else
 541     {
 542         nrnb_tot = nrnb;
 543     }
 544
 545     elapsed_time                  = walltime_accounting_get_time_since_reset(walltime_accounting);
 546     elapsed_time_over_all_threads = walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
 547     if (cr->nnodes > 1)
 548     {
 549 #if GMX_MPI
 550         /* reduce elapsed_time over all MPI ranks in the current simulation */
 551         MPI_Allreduce(&elapsed_time,
 552                       &elapsed_time_over_all_ranks,
 553                       1, MPI_DOUBLE, MPI_SUM,
 554                       cr->mpi_comm_mysim);
 555         elapsed_time_over_all_ranks /= cr->nnodes;
 556         /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
 557          * current simulation. */
 558         MPI_Allreduce(&elapsed_time_over_all_threads,
 559                       &elapsed_time_over_all_threads_over_all_ranks,
 560                       1, MPI_DOUBLE, MPI_SUM,
 561                       cr->mpi_comm_mysim);
 562 #endif
 563     }
 564     else
 565     {
 566         elapsed_time_over_all_ranks                  = elapsed_time;
 567         elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
 568     }
 569
 570     if (printReport)
 571     {
 572         print_flop(fplog, nrnb_tot, &nbfs, &mflop);
 573     }
 574
 575     if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
 576     {
 577         print_dd_statistics(cr, inputrec, fplog);
 578     }
 579
 580     /* TODO Move the responsibility for any scaling by thread counts
 581      * to the code that handled the thread region, so that there's a
 582      * mechanism to keep cycle counting working during the transition
 583      * to task parallelism. */
 584     int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 585     int nthreads_pme = gmx_omp_nthreads_get(emntPME);
 586     wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), nthreads_pp, nthreads_pme);
 587     auto cycle_sum(wallcycle_sum(cr, wcycle));
 588
 589     if (printReport)
 590     {
 591         auto                    nbnxn_gpu_timings = (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
 592         gmx_wallclock_gpu_pme_t pme_gpu_timings   = {};
 593
 594         if (pme_gpu_task_enabled(pme))
 595         {
 596             pme_gpu_get_timings(pme, &pme_gpu_timings);
 597         }
 598         wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
 599                         elapsed_time_over_all_ranks,
 600                         wcycle, cycle_sum,
 601                         nbnxn_gpu_timings,
 602                         &pme_gpu_timings);
 603
 604         if (EI_DYNAMICS(inputrec->eI))
 605         {
 606             delta_t = inputrec->delta_t;
 607         }
 608
 609         if (fplog)
 610         {
 611             print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
 612                        elapsed_time_over_all_ranks,
 613                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 614                        delta_t, nbfs, mflop);
 615         }
 616         if (bWriteStat)
 617         {
 618             print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
 619                        elapsed_time_over_all_ranks,
 620                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 621                        delta_t, nbfs, mflop);
 622         }
 623     }
 624 }
 625
 626 int Mdrunner::mdrunner()
 627 {
 628     matrix                    box;
 629     t_forcerec               *fr               = nullptr;
 630     t_fcdata                 *fcd              = nullptr;
 631     real                      ewaldcoeff_q     = 0;
 632     real                      ewaldcoeff_lj    = 0;
 633     int                       nChargePerturbed = -1, nTypePerturbed = 0;
 634     gmx_wallcycle_t           wcycle;
 635     gmx_walltime_accounting_t walltime_accounting = nullptr;
 636     gmx_membed_t *            membed              = nullptr;
 637     gmx_hw_info_t            *hwinfo              = nullptr;
 638
 639     /* CAUTION: threads may be started later on in this function, so
 640        cr doesn't reflect the final parallel state right now */
 641     gmx_mtop_t                      mtop;
 642
 643     bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data());
 644     bool doRerun  = mdrunOptions.rerun;
 645
 646     // Handle task-assignment related user options.
 647     EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
 648                                                EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
 649
 650     std::vector<int> userGpuTaskAssignment;
 651     try
 652     {
 653         userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
 654     }
 655     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 656     auto       nonbondedTarget = findTaskTarget(nbpu_opt);
 657     auto       pmeTarget       = findTaskTarget(pme_opt);
 658     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
 659     auto       bondedTarget    = findTaskTarget(bonded_opt);
 660     auto       updateTarget    = findTaskTarget(update_opt);
 661     PmeRunMode pmeRunMode      = PmeRunMode::None;
 662
 663     // Here we assume that SIMMASTER(cr) does not change even after the
 664     // threads are started.
 665
 666     FILE *fplog = nullptr;
 667     // If we are appending, we don't write log output because we need
 668     // to check that the old log file matches what the checkpoint file
 669     // expects. Otherwise, we should start to write log output now if
 670     // there is a file ready for it.
 671     if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
 672     {
 673         fplog = gmx_fio_getfp(logFileHandle);
 674     }
 675     gmx::LoggerOwner logOwner(buildLogger(fplog, cr));
 676     gmx::MDLogger    mdlog(logOwner.logger());
 677
 678     // report any development features that may be enabled by environment variables
 679     manageDevelopmentFeatures(mdlog);
 680
 681     // With thread-MPI, the communicator changes after threads are
 682     // launched, so this is rebuilt for the master rank at that
 683     // time. The non-master ranks are fine to keep the one made here.
 684     PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
 685     hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
 686
 687     gmx_print_detected_hardware(fplog, isMasterSimMasterRank(ms, MASTER(cr)), mdlog, hwinfo);
 688
 689     std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
 690
 691     // Print citation requests after all software/hardware printing
 692     pleaseCiteGromacs(fplog);
 693
 694     // TODO Replace this by unique_ptr once t_inputrec is C++
 695     t_inputrec               inputrecInstance;
 696     t_inputrec              *inputrec = nullptr;
 697     std::unique_ptr<t_state> globalState;
 698
 699     if (SIMMASTER(cr))
 700     {
 701         /* Only the master rank has the global state */
 702         globalState = std::make_unique<t_state>();
 703
 704         /* Read (nearly) all data required for the simulation */
 705         read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()),
 706                        &inputrecInstance, globalState.get(), &mtop);
 707         inputrec = &inputrecInstance;
 708     }
 709
 710     /* Check and update the hardware options for internal consistency */
 711     checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks, inputrec);
 712
 713     if (GMX_THREAD_MPI && SIMMASTER(cr))
 714     {
 715         bool useGpuForNonbonded = false;
 716         bool useGpuForPme       = false;
 717         try
 718         {
 719             GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
 720
 721             // If the user specified the number of ranks, then we must
 722             // respect that, but in default mode, we need to allow for
 723             // the number of GPUs to choose the number of ranks.
 724             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 725             useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
 726                     (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
 727                     canUseGpuForNonbonded,
 728                     gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
 729                     hw_opt.nthreads_tmpi);
 730             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
 731                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
 732                     *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
 733
 734         }
 735         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 736
 737         /* Determine how many thread-MPI ranks to start.
 738          *
 739          * TODO Over-writing the user-supplied value here does
 740          * prevent any possible subsequent checks from working
 741          * correctly. */
 742         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
 743                                                 &hw_opt,
 744                                                 gpuIdsToUse,
 745                                                 useGpuForNonbonded,
 746                                                 useGpuForPme,
 747                                                 inputrec, &mtop,
 748                                                 mdlog,
 749                                                 doMembed);
 750
 751         // Now start the threads for thread MPI.
 752         cr = spawnThreads(hw_opt.nthreads_tmpi);
 753         /* The main thread continues here with a new cr. We don't deallocate
 754            the old cr because other threads may still be reading it. */
 755         // TODO Both master and spawned threads call dup_tfn and
 756         // reinitialize_commrec_for_this_thread. Find a way to express
 757         // this better.
 758         physicalNodeComm = PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
 759     }
 760     // END OF CAUTION: cr and physicalNodeComm are now reliable
 761
 762     if (PAR(cr))
 763     {
 764         /* now broadcast everything to the non-master nodes/threads: */
 765         if (!SIMMASTER(cr))
 766         {
 767             inputrec = &inputrecInstance;
 768         }
 769         init_parallel(cr, inputrec, &mtop);
 770     }
 771     GMX_RELEASE_ASSERT(inputrec != nullptr, "All range should have a valid inputrec now");
 772
 773     // Now each rank knows the inputrec that SIMMASTER read and used,
 774     // and (if applicable) cr->nnodes has been assigned the number of
 775     // thread-MPI ranks that have been chosen. The ranks can now all
 776     // run the task-deciding functions and will agree on the result
 777     // without needing to communicate.
 778     //
 779     // TODO Should we do the communication in debug mode to support
 780     // having an assertion?
 781     //
 782     // Note that these variables describe only their own node.
 783     //
 784     // Note that when bonded interactions run on a GPU they always run
 785     // alongside a nonbonded task, so do not influence task assignment
 786     // even though they affect the force calculation workload.
 787     bool useGpuForNonbonded     = false;
 788     bool useGpuForPme           = false;
 789     bool useGpuForBonded        = false;
 790     bool useGpuForUpdate        = false;
 791     bool gpusWereDetected       = hwinfo->ngpu_compatible_tot > 0;
 792     try
 793     {
 794         // It's possible that there are different numbers of GPUs on
 795         // different nodes, which is the user's responsibilty to
 796         // handle. If unsuitable, we will notice that during task
 797         // assignment.
 798         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 799         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
 800                                                                 emulateGpuNonbonded,
 801                                                                 canUseGpuForNonbonded,
 802                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI),
 803                                                                 gpusWereDetected);
 804         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
 805                                                     *hwinfo, *inputrec, mtop,
 806                                                     cr->nnodes, domdecOptions.numPmeRanks,
 807                                                     gpusWereDetected);
 808         auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
 809         useGpuForBonded =
 810             decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
 811                                             bondedTarget, canUseGpuForBonded,
 812                                             EVDW_PME(inputrec->vdwtype),
 813                                             EEL_PME_EWALD(inputrec->coulombtype),
 814                                             domdecOptions.numPmeRanks, gpusWereDetected);
 815
 816         pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
 817         if (pmeRunMode == PmeRunMode::GPU)
 818         {
 819             if (pmeFftTarget == TaskTarget::Cpu)
 820             {
 821                 pmeRunMode = PmeRunMode::Mixed;
 822             }
 823         }
 824         else if (pmeFftTarget == TaskTarget::Gpu)
 825         {
 826             gmx_fatal(FARGS, "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME on CPU you should not be using -pmefft.");
 827         }
 828     }
 829     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 830
 831     // Build restraints.
 832     // TODO: hide restraint implementation details from Mdrunner.
 833     // There is nothing unique about restraints at this point as far as the
 834     // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
 835     // factory functions from the SimulationContext on which to call mdModules_->add().
 836     // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
 837     for (auto && restraint : restraintManager_->getRestraints())
 838     {
 839         auto module = RestraintMDModule::create(restraint,
 840                                                 restraint->sites());
 841         mdModules_->add(std::move(module));
 842     }
 843
 844     // TODO: Error handling
 845     mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
 846     const auto &mdModulesNotifier = mdModules_->notifier().notifier_;
 847
 848     if (inputrec->internalParameters != nullptr)
 849     {
 850         mdModulesNotifier.notify(*inputrec->internalParameters);
 851     }
 852
 853     if (fplog != nullptr)
 854     {
 855         pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
 856         fprintf(fplog, "\n");
 857     }
 858
 859     if (SIMMASTER(cr))
 860     {
 861         /* In rerun, set velocities to zero if present */
 862         if (doRerun && ((globalState->flags & (1 << estV)) != 0))
 863         {
 864             // rerun does not use velocities
 865             GMX_LOG(mdlog.info).asParagraph().appendText(
 866                     "Rerun trajectory contains velocities. Rerun does only evaluate "
 867                     "potential energy and forces. The velocities will be ignored.");
 868             for (int i = 0; i < globalState->natoms; i++)
 869             {
 870                 clear_rvec(globalState->v[i]);
 871             }
 872             globalState->flags &= ~(1 << estV);
 873         }
 874
 875         /* now make sure the state is initialized and propagated */
 876         set_state_entries(globalState.get(), inputrec);
 877     }
 878
 879     /* NM and TPI parallelize over force/energy calculations, not atoms,
 880      * so we need to initialize and broadcast the global state.
 881      */
 882     if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
 883     {
 884         if (!MASTER(cr))
 885         {
 886             globalState = std::make_unique<t_state>();
 887         }
 888         broadcastStateWithoutDynamics(cr, globalState.get());
 889     }
 890
 891     /* A parallel command line option consistency check that we can
 892        only do after any threads have started. */
 893     if (!PAR(cr) && (domdecOptions.numCells[XX] > 1 ||
 894                      domdecOptions.numCells[YY] > 1 ||
 895                      domdecOptions.numCells[ZZ] > 1 ||
 896                      domdecOptions.numPmeRanks > 0))
 897     {
 898         gmx_fatal(FARGS,
 899                   "The -dd or -npme option request a parallel simulation, "
 900 #if !GMX_MPI
 901                   "but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv));
 902 #else
 903 #if GMX_THREAD_MPI
 904                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
 905 #else
 906                   "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv));
 907 #endif
 908 #endif
 909     }
 910
 911     if (doRerun &&
 912         (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 913     {
 914         gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 915     }
 916
 917     if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
 918     {
 919         if (domdecOptions.numPmeRanks > 0)
 920         {
 921             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
 922                                  "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
 923         }
 924
 925         domdecOptions.numPmeRanks = 0;
 926     }
 927
 928     if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
 929     {
 930         /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
 931          * improve performance with many threads per GPU, since our OpenMP
 932          * scaling is bad, but it's difficult to automate the setup.
 933          */
 934         domdecOptions.numPmeRanks = 0;
 935     }
 936     if (useGpuForPme)
 937     {
 938         if (domdecOptions.numPmeRanks < 0)
 939         {
 940             domdecOptions.numPmeRanks = 0;
 941             // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
 942         }
 943         else
 944         {
 945             GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported");
 946         }
 947     }
 948
 949 #if GMX_FAHCORE
 950     if (MASTER(cr))
 951     {
 952         fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
 953     }
 954 #endif
 955
 956     /* NMR restraints must be initialized before load_checkpoint,
 957      * since with time averaging the history is added to t_state.
 958      * For proper consistency check we therefore need to extend
 959      * t_state here.
 960      * So the PME-only nodes (if present) will also initialize
 961      * the distance restraints.
 962      */
 963     snew(fcd, 1);
 964
 965     /* This needs to be called before read_checkpoint to extend the state */
 966     init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
 967
 968     init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
 969
 970     auto                 deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
 971
 972     ObservablesHistory   observablesHistory = {};
 973
 974     if (startingBehavior != StartingBehavior::NewSimulation)
 975     {
 976         /* Check if checkpoint file exists before doing continuation.
 977          * This way we can use identical input options for the first and subsequent runs...
 978          */
 979         if (mdrunOptions.numStepsCommandline > -2)
 980         {
 981             /* Temporarily set the number of steps to unmlimited to avoid
 982              * triggering the nsteps check in load_checkpoint().
 983              * This hack will go away soon when the -nsteps option is removed.
 984              */
 985             inputrec->nsteps = -1;
 986         }
 987
 988         load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr),
 989                         logFileHandle,
 990                         cr, domdecOptions.numCells,
 991                         inputrec, globalState.get(),
 992                         &observablesHistory,
 993                         mdrunOptions.reproducible, mdModules_->notifier());
 994
 995         if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
 996         {
 997             // Now we can start normal logging to the truncated log file.
 998             fplog    = gmx_fio_getfp(logFileHandle);
 999             prepareLogAppending(fplog);
1000             logOwner = buildLogger(fplog, cr);
1001             mdlog    = logOwner.logger();
1002         }
1003     }
1004
1005     if (mdrunOptions.numStepsCommandline > -2)
1006     {
1007         GMX_LOG(mdlog.info).asParagraph().
1008             appendText("The -nsteps functionality is deprecated, and may be removed in a future version. "
1009                        "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp file field.");
1010     }
1011     /* override nsteps with value set on the commamdline */
1012     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
1013
1014     if (SIMMASTER(cr))
1015     {
1016         copy_mat(globalState->box, box);
1017     }
1018
1019     if (PAR(cr))
1020     {
1021         gmx_bcast(sizeof(box), box, cr);
1022     }
1023
1024     if (inputrec->cutoff_scheme != ecutsVERLET)
1025     {
1026         gmx_fatal(FARGS, "This group-scheme .tpr file can no longer be run by mdrun. Please update to the Verlet scheme, or use an earlier version of GROMACS if necessary.");
1027     }
1028     /* Update rlist and nstlist. */
1029     prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
1030                           useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
1031
1032     const bool          prefer1DAnd1PulseDD = (c_enableGpuHaloExchange && useGpuForNonbonded);
1033     LocalAtomSetManager atomSets;
1034     if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
1035                      inputrec->eI == eiNM))
1036     {
1037         cr->dd = init_domain_decomposition(mdlog, cr, domdecOptions, mdrunOptions,
1038                                            prefer1DAnd1PulseDD,
1039                                            &mtop, inputrec,
1040                                            box, positionsFromStatePointer(globalState.get()),
1041                                            &atomSets);
1042         // Note that local state still does not exist yet.
1043     }
1044     else
1045     {
1046         /* PME, if used, is done on all nodes with 1D decomposition */
1047         cr->npmenodes = 0;
1048         cr->duty      = (DUTY_PP | DUTY_PME);
1049
1050         if (inputrec->ePBC == epbcSCREW)
1051         {
1052             gmx_fatal(FARGS,
1053                       "pbc=screw is only implemented with domain decomposition");
1054         }
1055     }
1056
1057     if (PAR(cr))
1058     {
1059         /* After possible communicator splitting in make_dd_communicators.
1060          * we can set up the intra/inter node communication.
1061          */
1062         gmx_setup_nodecomm(fplog, cr);
1063     }
1064
1065 #if GMX_MPI
1066     if (isMultiSim(ms))
1067     {
1068         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1069                 "This is simulation %d out of %d running as a composite GROMACS\n"
1070                 "multi-simulation job. Setup for this simulation:\n",
1071                 ms->sim, ms->nsim);
1072     }
1073     GMX_LOG(mdlog.warning).appendTextFormatted(
1074             "Using %d MPI %s\n",
1075             cr->nnodes,
1076 #if GMX_THREAD_MPI
1077             cr->nnodes == 1 ? "thread" : "threads"
1078 #else
1079             cr->nnodes == 1 ? "process" : "processes"
1080 #endif
1081             );
1082     fflush(stderr);
1083 #endif
1084
1085     // If mdrun -pin auto honors any affinity setting that already
1086     // exists. If so, it is nice to provide feedback about whether
1087     // that existing affinity setting was from OpenMP or something
1088     // else, so we run this code both before and after we initialize
1089     // the OpenMP support.
1090     gmx_check_thread_affinity_set(mdlog,
1091                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
1092     /* Check and update the number of OpenMP threads requested */
1093     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
1094                                             pmeRunMode, mtop, *inputrec);
1095
1096     gmx_omp_nthreads_init(mdlog, cr,
1097                           hwinfo->nthreads_hw_avail,
1098                           physicalNodeComm.size_,
1099                           hw_opt.nthreads_omp,
1100                           hw_opt.nthreads_omp_pme,
1101                           !thisRankHasDuty(cr, DUTY_PP));
1102
1103     // Enable FP exception detection, but not in
1104     // Release mode and not for compilers with known buggy FP
1105     // exception support (clang with any optimization) or suspected
1106     // buggy FP exception support (gcc 7.* with optimization).
1107 #if !defined NDEBUG && \
1108     !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
1109     && defined __OPTIMIZE__)
1110     const bool bEnableFPE = true;
1111 #else
1112     const bool bEnableFPE = false;
1113 #endif
1114     //FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
1115     if (bEnableFPE)
1116     {
1117         gmx_feenableexcept();
1118     }
1119
1120     // Build a data structure that expresses which kinds of non-bonded
1121     // task are handled by this rank.
1122     //
1123     // TODO Later, this might become a loop over all registered modules
1124     // relevant to the mdp inputs, to find those that have such tasks.
1125     //
1126     // TODO This could move before init_domain_decomposition() as part
1127     // of refactoring that separates the responsibility for duty
1128     // assignment from setup for communication between tasks, and
1129     // setup for tasks handled with a domain (ie including short-ranged
1130     // tasks, bonded tasks, etc.).
1131     //
1132     // Note that in general useGpuForNonbonded, etc. can have a value
1133     // that is inconsistent with the presence of actual GPUs on any
1134     // rank, and that is not known to be a problem until the
1135     // duty of the ranks on a node become known.
1136     //
1137     // TODO Later we might need the concept of computeTasksOnThisRank,
1138     // from which we construct gpuTasksOnThisRank.
1139     //
1140     // Currently the DD code assigns duty to ranks that can
1141     // include PP work that currently can be executed on a single
1142     // GPU, if present and compatible.  This has to be coordinated
1143     // across PP ranks on a node, with possible multiple devices
1144     // or sharing devices on a node, either from the user
1145     // selection, or automatically.
1146     auto                 haveGpus = !gpuIdsToUse.empty();
1147     std::vector<GpuTask> gpuTasksOnThisRank;
1148     if (thisRankHasDuty(cr, DUTY_PP))
1149     {
1150         if (useGpuForNonbonded)
1151         {
1152             // Note that any bonded tasks on a GPU always accompany a
1153             // non-bonded task.
1154             if (haveGpus)
1155             {
1156                 gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
1157             }
1158             else if (nonbondedTarget == TaskTarget::Gpu)
1159             {
1160                 gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because no GPU is detected.");
1161             }
1162             else if (bondedTarget == TaskTarget::Gpu)
1163             {
1164                 gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because no GPU is detected.");
1165             }
1166         }
1167     }
1168     // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not.
1169     if (EEL_PME(inputrec->coulombtype) && (thisRankHasDuty(cr, DUTY_PME)))
1170     {
1171         if (useGpuForPme)
1172         {
1173             if (haveGpus)
1174             {
1175                 gpuTasksOnThisRank.push_back(GpuTask::Pme);
1176             }
1177             else if (pmeTarget == TaskTarget::Gpu)
1178             {
1179                 gmx_fatal(FARGS, "Cannot run PME on a GPU because no GPU is detected.");
1180             }
1181         }
1182     }
1183
1184     GpuTaskAssignment gpuTaskAssignment;
1185     try
1186     {
1187         // Produce the task assignment for this rank.
1188         gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
1189                                               mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank,
1190                                               useGpuForBonded, pmeRunMode);
1191     }
1192     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1193
1194     /* Prevent other ranks from continuing after an issue was found
1195      * and reported as a fatal error.
1196      *
1197      * TODO This function implements a barrier so that MPI runtimes
1198      * can organize an orderly shutdown if one of the ranks has had to
1199      * issue a fatal error in various code already run. When we have
1200      * MPI-aware error handling and reporting, this should be
1201      * improved. */
1202 #if GMX_MPI
1203     if (PAR(cr))
1204     {
1205         MPI_Barrier(cr->mpi_comm_mysim);
1206     }
1207     if (isMultiSim(ms))
1208     {
1209         if (SIMMASTER(cr))
1210         {
1211             MPI_Barrier(ms->mpi_comm_masters);
1212         }
1213         /* We need another barrier to prevent non-master ranks from contiuing
1214          * when an error occured in a different simulation.
1215          */
1216         MPI_Barrier(cr->mpi_comm_mysim);
1217     }
1218 #endif
1219
1220     /* Now that we know the setup is consistent, check for efficiency */
1221     check_resource_division_efficiency(hwinfo, !gpuTaskAssignment.empty(), mdrunOptions.ntompOptionIsSet,
1222                                        cr, mdlog);
1223
1224     gmx_device_info_t *nonbondedDeviceInfo = nullptr;
1225
1226     if (thisRankHasDuty(cr, DUTY_PP))
1227     {
1228         // This works because only one task of each type is currently permitted.
1229         auto nbGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(),
1230                                              hasTaskType<GpuTask::Nonbonded>);
1231         if (nbGpuTaskMapping != gpuTaskAssignment.end())
1232         {
1233             int nonbondedDeviceId = nbGpuTaskMapping->deviceId_;
1234             nonbondedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, nonbondedDeviceId);
1235             init_gpu(nonbondedDeviceInfo);
1236
1237             if (DOMAINDECOMP(cr))
1238             {
1239                 /* When we share GPUs over ranks, we need to know this for the DLB */
1240                 dd_setup_dlb_resource_sharing(cr, nonbondedDeviceId);
1241             }
1242
1243         }
1244     }
1245
1246     gmx_device_info_t                *pmeDeviceInfo = nullptr;
1247     // Later, this program could contain kernels that might be later
1248     // re-used as auto-tuning progresses, or subsequent simulations
1249     // are invoked.
1250     PmeGpuProgramStorage pmeGpuProgram;
1251     // This works because only one task of each type is currently permitted.
1252     auto                 pmeGpuTaskMapping     = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), hasTaskType<GpuTask::Pme>);
1253     const bool           thisRankHasPmeGpuTask = (pmeGpuTaskMapping != gpuTaskAssignment.end());
1254     if (thisRankHasPmeGpuTask)
1255     {
1256         pmeDeviceInfo = getDeviceInfo(hwinfo->gpu_info, pmeGpuTaskMapping->deviceId_);
1257         init_gpu(pmeDeviceInfo);
1258         pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
1259     }
1260
1261     /* getting number of PP/PME threads on this MPI / tMPI rank.
1262        PME: env variable should be read only on one node to make sure it is
1263        identical everywhere;
1264      */
1265     const int numThreadsOnThisRank =
1266         thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded) : gmx_omp_nthreads_get(emntPME);
1267     checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid,
1268                                   *hwinfo->hardwareTopology,
1269                                   physicalNodeComm, mdlog);
1270
1271     if (hw_opt.threadAffinity != ThreadAffinity::Off)
1272     {
1273         /* Before setting affinity, check whether the affinity has changed
1274          * - which indicates that probably the OpenMP library has changed it
1275          * since we first checked).
1276          */
1277         gmx_check_thread_affinity_set(mdlog,
1278                                       &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
1279
1280         int numThreadsOnThisNode, intraNodeThreadOffset;
1281         analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
1282                                  &intraNodeThreadOffset);
1283
1284         /* Set the CPU affinity */
1285         gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology,
1286                                 numThreadsOnThisRank, numThreadsOnThisNode,
1287                                 intraNodeThreadOffset, nullptr);
1288     }
1289
1290     if (mdrunOptions.timingOptions.resetStep > -1)
1291     {
1292         GMX_LOG(mdlog.info).asParagraph().
1293             appendText("The -resetstep functionality is deprecated, and may be removed in a future version.");
1294     }
1295     wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1296
1297     if (PAR(cr))
1298     {
1299         /* Master synchronizes its value of reset_counters with all nodes
1300          * including PME only nodes */
1301         int64_t reset_counters = wcycle_get_reset_counters(wcycle);
1302         gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
1303         wcycle_set_reset_counters(wcycle, reset_counters);
1304     }
1305
1306     // Membrane embedding must be initialized before we call init_forcerec()
1307     if (doMembed)
1308     {
1309         if (MASTER(cr))
1310         {
1311             fprintf(stderr, "Initializing membed");
1312         }
1313         /* Note that membed cannot work in parallel because mtop is
1314          * changed here. Fix this if we ever want to make it run with
1315          * multiple ranks. */
1316         membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec, globalState.get(), cr,
1317                              &mdrunOptions
1318                                  .checkpointOptions.period);
1319     }
1320
1321     std::unique_ptr<MDAtoms>     mdAtoms;
1322     std::unique_ptr<gmx_vsite_t> vsite;
1323
1324     t_nrnb nrnb;
1325     if (thisRankHasDuty(cr, DUTY_PP))
1326     {
1327         mdModulesNotifier.notify(*cr);
1328         mdModulesNotifier.notify(&atomSets);
1329         mdModulesNotifier.notify(PeriodicBoundaryConditionType {inputrec->ePBC});
1330         /* Initiate forcerecord */
1331         fr                 = new t_forcerec;
1332         fr->forceProviders = mdModules_->initForceProviders();
1333         init_forcerec(fplog, mdlog, fr, fcd,
1334                       inputrec, &mtop, cr, box,
1335                       opt2fn("-table", filenames.size(), filenames.data()),
1336                       opt2fn("-tablep", filenames.size(), filenames.data()),
1337                       opt2fns("-tableb", filenames.size(), filenames.data()),
1338                       *hwinfo, nonbondedDeviceInfo,
1339                       useGpuForBonded,
1340                       pforce,
1341                       wcycle);
1342
1343         // TODO Move this to happen during domain decomposition setup,
1344         // once stream and event handling works well with that.
1345         // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
1346         if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
1347         {
1348             GMX_RELEASE_ASSERT(c_enableGpuBufOps, "Must use GMX_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
1349             void *streamLocal                   = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
1350             void *streamNonLocal                =
1351                 Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
1352             void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event();
1353             GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1354                     "NOTE: This run uses the 'GPU halo exchange' feature, enabled by the GMX_GPU_DD_COMMS environment variable.");
1355             cr->dd->gpuHaloExchange = std::make_unique<GpuHaloExchange>(cr->dd, cr->mpi_comm_mysim, streamLocal,
1356                                                                         streamNonLocal, coordinatesOnDeviceEvent);
1357         }
1358
1359         /* Initialize the mdAtoms structure.
1360          * mdAtoms is not filled with atom data,
1361          * as this can not be done now with domain decomposition.
1362          */
1363         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1364         if (globalState && thisRankHasPmeGpuTask)
1365         {
1366             // The pinning of coordinates in the global state object works, because we only use
1367             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1368             // points to the global state object without DD.
1369             // FIXME: MD and EM separately set up the local state - this should happen in the same function,
1370             // which should also perform the pinning.
1371             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1372         }
1373
1374         /* Initialize the virtual site communication */
1375         vsite = initVsite(mtop, cr);
1376
1377         calc_shifts(box, fr->shift_vec);
1378
1379         /* With periodic molecules the charge groups should be whole at start up
1380          * and the virtual sites should not be far from their proper positions.
1381          */
1382         if (!inputrec->bContinuation && MASTER(cr) &&
1383             !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
1384         {
1385             /* Make molecules whole at start of run */
1386             if (fr->ePBC != epbcNONE)
1387             {
1388                 do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
1389             }
1390             if (vsite)
1391             {
1392                 /* Correct initial vsite positions are required
1393                  * for the initial distribution in the domain decomposition
1394                  * and for the initial shell prediction.
1395                  */
1396                 constructVsitesGlobal(mtop, globalState->x);
1397             }
1398         }
1399
1400         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1401         {
1402             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1403             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1404         }
1405     }
1406     else
1407     {
1408         /* This is a PME only node */
1409
1410         GMX_ASSERT(globalState == nullptr, "We don't need the state on a PME only rank and expect it to be unitialized");
1411
1412         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1413         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1414     }
1415
1416     gmx_pme_t *sepPmeData = nullptr;
1417     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1418     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), "Double-checking that only PME-only ranks have no forcerec");
1419     gmx_pme_t * &pmedata = fr ? fr->pmedata : sepPmeData;
1420
1421     /* Initiate PME if necessary,
1422      * either on all nodes or on dedicated PME nodes only. */
1423     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1424     {
1425         if (mdAtoms && mdAtoms->mdatoms())
1426         {
1427             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1428             if (EVDW_PME(inputrec->vdwtype))
1429             {
1430                 nTypePerturbed   = mdAtoms->mdatoms()->nTypePerturbed;
1431             }
1432         }
1433         if (cr->npmenodes > 0)
1434         {
1435             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1436             gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
1437             gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
1438         }
1439
1440         if (thisRankHasDuty(cr, DUTY_PME))
1441         {
1442             try
1443             {
1444                 pmedata = gmx_pme_init(cr,
1445                                        getNumPmeDomains(cr->dd),
1446                                        inputrec,
1447                                        nChargePerturbed != 0, nTypePerturbed != 0,
1448                                        mdrunOptions.reproducible,
1449                                        ewaldcoeff_q, ewaldcoeff_lj,
1450                                        gmx_omp_nthreads_get(emntPME),
1451                                        pmeRunMode, nullptr,
1452                                        pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
1453             }
1454             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1455         }
1456     }
1457
1458
1459     if (EI_DYNAMICS(inputrec->eI))
1460     {
1461         /* Turn on signal handling on all nodes */
1462         /*
1463          * (A user signal from the PME nodes (if any)
1464          * is communicated to the PP nodes.
1465          */
1466         signal_handler_install();
1467     }
1468
1469     pull_t *pull_work = nullptr;
1470     if (thisRankHasDuty(cr, DUTY_PP))
1471     {
1472         /* Assumes uniform use of the number of OpenMP threads */
1473         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
1474
1475         if (inputrec->bPull)
1476         {
1477             /* Initialize pull code */
1478             pull_work =
1479                 init_pull(fplog, inputrec->pull, inputrec,
1480                           &mtop, cr, &atomSets, inputrec->fepvals->init_lambda);
1481             if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
1482             {
1483                 initPullHistory(pull_work, &observablesHistory);
1484             }
1485             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1486             {
1487                 init_pull_output_files(pull_work,
1488                                        filenames.size(), filenames.data(), oenv,
1489                                        startingBehavior);
1490             }
1491         }
1492
1493         std::unique_ptr<EnforcedRotation> enforcedRotation;
1494         if (inputrec->bRot)
1495         {
1496             /* Initialize enforced rotation code */
1497             enforcedRotation = init_rot(fplog,
1498                                         inputrec,
1499                                         filenames.size(),
1500                                         filenames.data(),
1501                                         cr,
1502                                         &atomSets,
1503                                         globalState.get(),
1504                                         &mtop,
1505                                         oenv,
1506                                         mdrunOptions,
1507                                         startingBehavior);
1508         }
1509
1510         t_swap *swap = nullptr;
1511         if (inputrec->eSwapCoords != eswapNO)
1512         {
1513             /* Initialize ion swapping code */
1514             swap = init_swapcoords(fplog, inputrec,
1515                                    opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
1516                                    &mtop, globalState.get(), &observablesHistory,
1517                                    cr, &atomSets, oenv, mdrunOptions,
1518                                    startingBehavior);
1519         }
1520
1521         /* Let makeConstraints know whether we have essential dynamics constraints.
1522          * TODO: inputrec should tell us whether we use an algorithm, not a file option or the checkpoint
1523          */
1524         bool doEssentialDynamics = (opt2fn_null("-ei", filenames.size(), filenames.data()) != nullptr
1525                                     || observablesHistory.edsamHistory);
1526         auto constr              = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics,
1527                                                    fplog, *mdAtoms->mdatoms(),
1528                                                    cr, ms, &nrnb, wcycle, fr->bMolPBC);
1529
1530         /* Energy terms and groups */
1531         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(), inputrec->fepvals->n_lambda);
1532
1533         /* Kinetic energy data */
1534         gmx_ekindata_t ekind;
1535         init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel);
1536
1537         /* Set up interactive MD (IMD) */
1538         auto imdSession = makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
1539                                          MASTER(cr) ? globalState->x.rvec_array() : nullptr,
1540                                          filenames.size(), filenames.data(), oenv, mdrunOptions.imdOptions,
1541                                          startingBehavior);
1542
1543         if (DOMAINDECOMP(cr))
1544         {
1545             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1546             /* This call is not included in init_domain_decomposition mainly
1547              * because fr->cginfo_mb is set later.
1548              */
1549             dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
1550                             domdecOptions.checkBondedInteractions,
1551                             fr->cginfo_mb);
1552         }
1553
1554         if (updateTarget == TaskTarget::Gpu)
1555         {
1556             if (SIMMASTER(cr))
1557             {
1558                 gmx_fatal(FARGS, "It is currently not possible to redirect the calculation "
1559                           "of update and constraints to the GPU!");
1560             }
1561         }
1562
1563         // Before we start the actual simulator, try if we can run the update task on the GPU.
1564         useGpuForUpdate = decideWhetherToUseGpuForUpdate(DOMAINDECOMP(cr),
1565                                                          useGpuForNonbonded,
1566                                                          updateTarget,
1567                                                          gpusWereDetected,
1568                                                          *inputrec,
1569                                                          *mdAtoms,
1570                                                          doEssentialDynamics,
1571                                                          fcd->orires.nr != 0,
1572                                                          fcd->disres.nsystems != 0);
1573
1574         // TODO This is not the right place to manage the lifetime of
1575         // this data structure, but currently it's the easiest way to
1576         // make it work.
1577         MdrunScheduleWorkload runScheduleWork;
1578
1579         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
1580         SimulatorBuilder simulatorBuilder;
1581
1582         // build and run simulator object based on user-input
1583         const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
1584                     false,
1585                     inputrec, doRerun, vsite.get(), ms, replExParams,
1586                     fcd, static_cast<int>(filenames.size()), filenames.data(),
1587                     &observablesHistory, membed);
1588         auto simulator = simulatorBuilder.build(
1589                     inputIsCompatibleWithModularSimulator,
1590                     fplog, cr, ms, mdlog, static_cast<int>(filenames.size()), filenames.data(),
1591                     oenv,
1592                     mdrunOptions,
1593                     startingBehavior,
1594                     vsite.get(), constr.get(),
1595                     enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
1596                     deform.get(),
1597                     mdModules_->outputProvider(),
1598                     mdModules_->notifier(),
1599                     inputrec, imdSession.get(), pull_work, swap, &mtop,
1600                     fcd,
1601                     globalState.get(),
1602                     &observablesHistory,
1603                     mdAtoms.get(), &nrnb, wcycle, fr,
1604                     &enerd,
1605                     &ekind,
1606                     &runScheduleWork,
1607                     replExParams,
1608                     membed,
1609                     walltime_accounting,
1610                     std::move(stopHandlerBuilder_),
1611                     doRerun,
1612                     useGpuForUpdate);
1613         simulator->run();
1614
1615         if (inputrec->bPull)
1616         {
1617             finish_pull(pull_work);
1618         }
1619         finish_swapcoords(swap);
1620     }
1621     else
1622     {
1623         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
1624         /* do PME only */
1625         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
1626         gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
1627     }
1628
1629     wallcycle_stop(wcycle, ewcRUN);
1630
1631     /* Finish up, write some stuff
1632      * if rerunMD, don't write last frame again
1633      */
1634     finish_run(fplog, mdlog, cr,
1635                inputrec, &nrnb, wcycle, walltime_accounting,
1636                fr ? fr->nbv.get() : nullptr,
1637                pmedata,
1638                EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
1639
1640     // clean up cycle counter
1641     wallcycle_destroy(wcycle);
1642
1643 // Free PME data
1644     if (pmedata)
1645     {
1646         gmx_pme_destroy(pmedata);
1647         pmedata = nullptr;
1648     }
1649
1650     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
1651     // before we destroy the GPU context(s) in free_gpu_resources().
1652     // Pinned buffers are associated with contexts in CUDA.
1653     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
1654     mdAtoms.reset(nullptr);
1655     globalState.reset(nullptr);
1656     mdModules_.reset(nullptr);   // destruct force providers here as they might also use the GPU
1657
1658     /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
1659     free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
1660     free_gpu(nonbondedDeviceInfo);
1661     free_gpu(pmeDeviceInfo);
1662     done_forcerec(fr, mtop.molblock.size());
1663     sfree(fcd);
1664
1665     if (doMembed)
1666     {
1667         free_membed(membed);
1668     }
1669
1670     /* Does what it says */
1671     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
1672     walltime_accounting_destroy(walltime_accounting);
1673
1674     // Ensure log file content is written
1675     if (logFileHandle)
1676     {
1677         gmx_fio_flush(logFileHandle);
1678     }
1679
1680     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
1681      * exceptions were enabled before function was called. */
1682     if (bEnableFPE)
1683     {
1684         gmx_fedisableexcept();
1685     }
1686
1687     auto rc = static_cast<int>(gmx_get_stop_condition());
1688
1689 #if GMX_THREAD_MPI
1690     /* we need to join all threads. The sub-threads join when they
1691        exit this function, but the master thread needs to be told to
1692        wait for that. */
1693     if (PAR(cr) && MASTER(cr))
1694     {
1695         tMPI_Finalize();
1696     }
1697 #endif
1698     return rc;
1699 }
1700
1701 Mdrunner::~Mdrunner()
1702 {
1703     // Clean up of the Manager.
1704     // This will end up getting called on every thread-MPI rank, which is unnecessary,
1705     // but okay as long as threads synchronize some time before adding or accessing
1706     // a new set of restraints.
1707     if (restraintManager_)
1708     {
1709         restraintManager_->clear();
1710         GMX_ASSERT(restraintManager_->countRestraints() == 0,
1711                    "restraints added during runner life time should be cleared at runner destruction.");
1712     }
1713 };
1714
1715 void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller,
1716                             const std::string                        &name)
1717 {
1718     GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
1719     // Not sure if this should be logged through the md logger or something else,
1720     // but it is helpful to have some sort of INFO level message sent somewhere.
1721     //    std::cout << "Registering restraint named " << name << std::endl;
1722
1723     // When multiple restraints are used, it may be wasteful to register them separately.
1724     // Maybe instead register an entire Restraint Manager as a force provider.
1725     restraintManager_->addToSpec(std::move(puller),
1726                                  name);
1727 }
1728
1729 Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules)
1730     : mdModules_(std::move(mdModules))
1731 {
1732 }
1733
1734 Mdrunner::Mdrunner(Mdrunner &&) noexcept = default;
1735
1736 //NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
1737 Mdrunner &Mdrunner::operator=(Mdrunner && /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
1738
1739 class Mdrunner::BuilderImplementation
1740 {
1741     public:
1742         BuilderImplementation() = delete;
1743         BuilderImplementation(std::unique_ptr<MDModules> mdModules);
1744         ~BuilderImplementation();
1745
1746         BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options,
1747                                                     real                forceWarningThreshold,
1748                                                     StartingBehavior    startingBehavior);
1749
1750         void addDomdec(const DomdecOptions &options);
1751
1752         void addVerletList(int nstlist);
1753
1754         void addReplicaExchange(const ReplicaExchangeParameters &params);
1755
1756         void addMultiSim(gmx_multisim_t* multisim);
1757
1758         void addCommunicationRecord(t_commrec *cr);
1759
1760         void addNonBonded(const char* nbpu_opt);
1761
1762         void addPME(const char* pme_opt_, const char* pme_fft_opt_);
1763
1764         void addBondedTaskAssignment(const char* bonded_opt);
1765
1766         void addUpdateTaskAssignment(const char* update_opt);
1767
1768         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
1769
1770         void addFilenames(ArrayRef <const t_filenm> filenames);
1771
1772         void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
1773
1774         void addLogFile(t_fileio *logFileHandle);
1775
1776         void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
1777
1778         Mdrunner build();
1779
1780     private:
1781
1782         // Default parameters copied from runner.h
1783         // \todo Clarify source(s) of default parameters.
1784
1785         const char* nbpu_opt_          = nullptr;
1786         const char* pme_opt_           = nullptr;
1787         const char* pme_fft_opt_       = nullptr;
1788         const char *bonded_opt_        = nullptr;
1789         const char *update_opt_        = nullptr;
1790
1791         MdrunOptions                          mdrunOptions_;
1792
1793         DomdecOptions                         domdecOptions_;
1794
1795         ReplicaExchangeParameters             replicaExchangeParameters_;
1796
1797         //! Command-line override for the duration of a neighbor list with the Verlet scheme.
1798         int         nstlist_ = 0;
1799
1800         //! Multisim communicator handle.
1801         std::unique_ptr<gmx_multisim_t*>      multisim_ = nullptr;
1802
1803         //! Non-owning communication record (only used when building command-line mdrun)
1804         t_commrec *communicationRecord_ = nullptr;
1805
1806         //! Print a warning if any force is larger than this (in kJ/mol nm).
1807         real forceWarningThreshold_ = -1;
1808
1809         //! Whether the simulation will start afresh, or restart with/without appending.
1810         StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
1811
1812         //! The modules that comprise the functionality of mdrun.
1813         std::unique_ptr<MDModules> mdModules_;
1814
1815         //! \brief Parallelism information.
1816         gmx_hw_opt_t hardwareOptions_;
1817
1818         //! filename options for simulation.
1819         ArrayRef<const t_filenm> filenames_;
1820
1821         /*! \brief Handle to output environment.
1822          *
1823          * \todo gmx_output_env_t needs lifetime management.
1824          */
1825         gmx_output_env_t*    outputEnvironment_ = nullptr;
1826
1827         /*! \brief Non-owning handle to MD log file.
1828          *
1829          * \todo Context should own output facilities for client.
1830          * \todo Improve log file handle management.
1831          * \internal
1832          * Code managing the FILE* relies on the ability to set it to
1833          * nullptr to check whether the filehandle is valid.
1834          */
1835         t_fileio* logFileHandle_ = nullptr;
1836
1837         /*!
1838          * \brief Builder for simulation stop signal handler.
1839          */
1840         std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
1841 };
1842
1843 Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules) :
1844     mdModules_(std::move(mdModules))
1845 {
1846 }
1847
1848 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
1849
1850 Mdrunner::BuilderImplementation &
1851 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions    &options,
1852                                                       const real             forceWarningThreshold,
1853                                                       const StartingBehavior startingBehavior)
1854 {
1855     mdrunOptions_          = options;
1856     forceWarningThreshold_ = forceWarningThreshold;
1857     startingBehavior_      = startingBehavior;
1858     return *this;
1859 }
1860
1861 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions &options)
1862 {
1863     domdecOptions_ = options;
1864 }
1865
1866 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
1867 {
1868     nstlist_ = nstlist;
1869 }
1870
1871 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters &params)
1872 {
1873     replicaExchangeParameters_ = params;
1874 }
1875
1876 void Mdrunner::BuilderImplementation::addMultiSim(gmx_multisim_t* multisim)
1877 {
1878     multisim_ = std::make_unique<gmx_multisim_t*>(multisim);
1879 }
1880
1881 void Mdrunner::BuilderImplementation::addCommunicationRecord(t_commrec *cr)
1882 {
1883     communicationRecord_ = cr;
1884 }
1885
1886 Mdrunner Mdrunner::BuilderImplementation::build()
1887 {
1888     auto newRunner = Mdrunner(std::move(mdModules_));
1889
1890     newRunner.mdrunOptions          = mdrunOptions_;
1891     newRunner.pforce                = forceWarningThreshold_;
1892     newRunner.startingBehavior      = startingBehavior_;
1893     newRunner.domdecOptions         = domdecOptions_;
1894
1895     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
1896     newRunner.hw_opt          = hardwareOptions_;
1897
1898     // No invariant to check. This parameter exists to optionally override other behavior.
1899     newRunner.nstlist_cmdline = nstlist_;
1900
1901     newRunner.replExParams    = replicaExchangeParameters_;
1902
1903     newRunner.filenames = filenames_;
1904
1905     GMX_ASSERT(communicationRecord_, "Bug found. It should not be possible to call build() without a valid communicationRecord_.");
1906     newRunner.cr = communicationRecord_;
1907
1908     if (multisim_)
1909     {
1910         // nullptr is a valid value for the multisim handle, so we don't check the pointed-to pointer.
1911         newRunner.ms = *multisim_;
1912     }
1913     else
1914     {
1915         GMX_THROW(gmx::APIError("MdrunnerBuilder::addMultiSim() is required before build()"));
1916     }
1917
1918     // \todo Clarify ownership and lifetime management for gmx_output_env_t
1919     // \todo Update sanity checking when output environment has clearly specified invariants.
1920     // Initialization and default values for oenv are not well specified in the current version.
1921     if (outputEnvironment_)
1922     {
1923         newRunner.oenv  = outputEnvironment_;
1924     }
1925     else
1926     {
1927         GMX_THROW(gmx::APIError("MdrunnerBuilder::addOutputEnvironment() is required before build()"));
1928     }
1929
1930     newRunner.logFileHandle = logFileHandle_;
1931
1932     if (nbpu_opt_)
1933     {
1934         newRunner.nbpu_opt    = nbpu_opt_;
1935     }
1936     else
1937     {
1938         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
1939     }
1940
1941     if (pme_opt_ && pme_fft_opt_)
1942     {
1943         newRunner.pme_opt     = pme_opt_;
1944         newRunner.pme_fft_opt = pme_fft_opt_;
1945     }
1946     else
1947     {
1948         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
1949     }
1950
1951     if (bonded_opt_)
1952     {
1953         newRunner.bonded_opt = bonded_opt_;
1954     }
1955     else
1956     {
1957         GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
1958     }
1959
1960     if (update_opt_)
1961     {
1962         newRunner.update_opt = update_opt_;
1963     }
1964     else
1965     {
1966         GMX_THROW(gmx::APIError("MdrunnerBuilder::addUpdateTaskAssignment() is required before build()  "));
1967     }
1968
1969
1970     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
1971
1972     if (stopHandlerBuilder_)
1973     {
1974         newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
1975     }
1976     else
1977     {
1978         newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
1979     }
1980
1981     return newRunner;
1982 }
1983
1984 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
1985 {
1986     nbpu_opt_ = nbpu_opt;
1987 }
1988
1989 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
1990                                              const char* pme_fft_opt)
1991 {
1992     pme_opt_     = pme_opt;
1993     pme_fft_opt_ = pme_fft_opt;
1994 }
1995
1996 void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
1997 {
1998     bonded_opt_ = bonded_opt;
1999 }
2000
2001 void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt)
2002 {
2003     update_opt_ = update_opt;
2004 }
2005
2006 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
2007 {
2008     hardwareOptions_ = hardwareOptions;
2009 }
2010
2011 void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
2012 {
2013     filenames_ = filenames;
2014 }
2015
2016 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2017 {
2018     outputEnvironment_ = outputEnvironment;
2019 }
2020
2021 void Mdrunner::BuilderImplementation::addLogFile(t_fileio *logFileHandle)
2022 {
2023     logFileHandle_ = logFileHandle;
2024 }
2025
2026 void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2027 {
2028     stopHandlerBuilder_ = std::move(builder);
2029 }
2030
2031 MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules) :
2032     impl_ {std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules))}
2033 {
2034 }
2035
2036 MdrunnerBuilder::~MdrunnerBuilder() = default;
2037
2038 MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions    &options,
2039                                                       real                   forceWarningThreshold,
2040                                                       const StartingBehavior startingBehavior)
2041 {
2042     impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
2043     return *this;
2044 }
2045
2046 MdrunnerBuilder &MdrunnerBuilder::addDomainDecomposition(const DomdecOptions &options)
2047 {
2048     impl_->addDomdec(options);
2049     return *this;
2050 }
2051
2052 MdrunnerBuilder &MdrunnerBuilder::addNeighborList(int nstlist)
2053 {
2054     impl_->addVerletList(nstlist);
2055     return *this;
2056 }
2057
2058 MdrunnerBuilder &MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters &params)
2059 {
2060     impl_->addReplicaExchange(params);
2061     return *this;
2062 }
2063
2064 MdrunnerBuilder &MdrunnerBuilder::addMultiSim(gmx_multisim_t* multisim)
2065 {
2066     impl_->addMultiSim(multisim);
2067     return *this;
2068 }
2069
2070 MdrunnerBuilder &MdrunnerBuilder::addCommunicationRecord(t_commrec *cr)
2071 {
2072     impl_->addCommunicationRecord(cr);
2073     return *this;
2074 }
2075
2076 MdrunnerBuilder &MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
2077 {
2078     impl_->addNonBonded(nbpu_opt);
2079     return *this;
2080 }
2081
2082 MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
2083                                                     const char* pme_fft_opt)
2084 {
2085     // The builder method may become more general in the future, but in this version,
2086     // parameters for PME electrostatics are both required and the only parameters
2087     // available.
2088     if (pme_opt && pme_fft_opt)
2089     {
2090         impl_->addPME(pme_opt, pme_fft_opt);
2091     }
2092     else
2093     {
2094         GMX_THROW(gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
2095     }
2096     return *this;
2097 }
2098
2099 MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
2100 {
2101     impl_->addBondedTaskAssignment(bonded_opt);
2102     return *this;
2103 }
2104
2105 MdrunnerBuilder &MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt)
2106 {
2107     impl_->addUpdateTaskAssignment(update_opt);
2108     return *this;
2109 }
2110
2111 Mdrunner MdrunnerBuilder::build()
2112 {
2113     return impl_->build();
2114 }
2115
2116 MdrunnerBuilder &MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
2117 {
2118     impl_->addHardwareOptions(hardwareOptions);
2119     return *this;
2120 }
2121
2122 MdrunnerBuilder &MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
2123 {
2124     impl_->addFilenames(filenames);
2125     return *this;
2126 }
2127
2128 MdrunnerBuilder &MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2129 {
2130     impl_->addOutputEnvironment(outputEnvironment);
2131     return *this;
2132 }
2133
2134 MdrunnerBuilder &MdrunnerBuilder::addLogFile(t_fileio *logFileHandle)
2135 {
2136     impl_->addLogFile(logFileHandle);
2137     return *this;
2138 }
2139
2140 MdrunnerBuilder &MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2141 {
2142     impl_->addStopHandlerBuilder(std::move(builder));
2143     return *this;
2144 }
2145
2146 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder &&) noexcept = default;
2147
2148 MdrunnerBuilder &MdrunnerBuilder::operator=(MdrunnerBuilder &&) noexcept = default;
2149
2150 } // namespace gmx