src/gromacs/mdrun/runner.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011-2019,2020, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the MD runner routine calling all integrators.
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include "runner.h"
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cinttypes>
  52 #include <csignal>
  53 #include <cstdlib>
  54 #include <cstring>
  55
  56 #include <algorithm>
  57 #include <memory>
  58
  59 #include "gromacs/commandline/filenm.h"
  60 #include "gromacs/domdec/builder.h"
  61 #include "gromacs/domdec/domdec.h"
  62 #include "gromacs/domdec/domdec_struct.h"
  63 #include "gromacs/domdec/gpuhaloexchange.h"
  64 #include "gromacs/domdec/localatomsetmanager.h"
  65 #include "gromacs/domdec/partition.h"
  66 #include "gromacs/ewald/ewald_utils.h"
  67 #include "gromacs/ewald/pme_gpu_program.h"
  68 #include "gromacs/ewald/pme_only.h"
  69 #include "gromacs/ewald/pme_pp_comm_gpu.h"
  70 #include "gromacs/fileio/checkpoint.h"
  71 #include "gromacs/fileio/gmxfio.h"
  72 #include "gromacs/fileio/oenv.h"
  73 #include "gromacs/fileio/tpxio.h"
  74 #include "gromacs/gmxlib/network.h"
  75 #include "gromacs/gmxlib/nrnb.h"
  76 #include "gromacs/gpu_utils/device_stream_manager.h"
  77 #include "gromacs/hardware/cpuinfo.h"
  78 #include "gromacs/hardware/detecthardware.h"
  79 #include "gromacs/hardware/device_management.h"
  80 #include "gromacs/hardware/printhardware.h"
  81 #include "gromacs/imd/imd.h"
  82 #include "gromacs/listed_forces/disre.h"
  83 #include "gromacs/listed_forces/gpubonded.h"
  84 #include "gromacs/listed_forces/listed_forces.h"
  85 #include "gromacs/listed_forces/orires.h"
  86 #include "gromacs/math/functions.h"
  87 #include "gromacs/math/utilities.h"
  88 #include "gromacs/math/vec.h"
  89 #include "gromacs/mdlib/boxdeformation.h"
  90 #include "gromacs/mdlib/broadcaststructs.h"
  91 #include "gromacs/mdlib/calc_verletbuf.h"
  92 #include "gromacs/mdlib/dispersioncorrection.h"
  93 #include "gromacs/mdlib/enerdata_utils.h"
  94 #include "gromacs/mdlib/force.h"
  95 #include "gromacs/mdlib/forcerec.h"
  96 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  97 #include "gromacs/mdlib/gpuforcereduction.h"
  98 #include "gromacs/mdlib/makeconstraints.h"
  99 #include "gromacs/mdlib/md_support.h"
 100 #include "gromacs/mdlib/mdatoms.h"
 101 #include "gromacs/mdlib/sighandler.h"
 102 #include "gromacs/mdlib/stophandler.h"
 103 #include "gromacs/mdlib/tgroup.h"
 104 #include "gromacs/mdlib/updategroups.h"
 105 #include "gromacs/mdlib/vsite.h"
 106 #include "gromacs/mdrun/mdmodules.h"
 107 #include "gromacs/mdrun/simulationcontext.h"
 108 #include "gromacs/mdrun/simulationinput.h"
 109 #include "gromacs/mdrun/simulationinputhandle.h"
 110 #include "gromacs/mdrunutility/handlerestart.h"
 111 #include "gromacs/mdrunutility/logging.h"
 112 #include "gromacs/mdrunutility/multisim.h"
 113 #include "gromacs/mdrunutility/printtime.h"
 114 #include "gromacs/mdrunutility/threadaffinity.h"
 115 #include "gromacs/mdtypes/checkpointdata.h"
 116 #include "gromacs/mdtypes/commrec.h"
 117 #include "gromacs/mdtypes/enerdata.h"
 118 #include "gromacs/mdtypes/fcdata.h"
 119 #include "gromacs/mdtypes/forcerec.h"
 120 #include "gromacs/mdtypes/group.h"
 121 #include "gromacs/mdtypes/inputrec.h"
 122 #include "gromacs/mdtypes/interaction_const.h"
 123 #include "gromacs/mdtypes/md_enums.h"
 124 #include "gromacs/mdtypes/mdatom.h"
 125 #include "gromacs/mdtypes/mdrunoptions.h"
 126 #include "gromacs/mdtypes/observableshistory.h"
 127 #include "gromacs/mdtypes/simulation_workload.h"
 128 #include "gromacs/mdtypes/state.h"
 129 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
 130 #include "gromacs/modularsimulator/modularsimulator.h"
 131 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 132 #include "gromacs/nbnxm/nbnxm.h"
 133 #include "gromacs/nbnxm/pairlist_tuning.h"
 134 #include "gromacs/pbcutil/pbc.h"
 135 #include "gromacs/pulling/output.h"
 136 #include "gromacs/pulling/pull.h"
 137 #include "gromacs/pulling/pull_rotation.h"
 138 #include "gromacs/restraint/manager.h"
 139 #include "gromacs/restraint/restraintmdmodule.h"
 140 #include "gromacs/restraint/restraintpotential.h"
 141 #include "gromacs/swap/swapcoords.h"
 142 #include "gromacs/taskassignment/decidegpuusage.h"
 143 #include "gromacs/taskassignment/decidesimulationworkload.h"
 144 #include "gromacs/taskassignment/resourcedivision.h"
 145 #include "gromacs/taskassignment/taskassignment.h"
 146 #include "gromacs/taskassignment/usergpuids.h"
 147 #include "gromacs/timing/gpu_timing.h"
 148 #include "gromacs/timing/wallcycle.h"
 149 #include "gromacs/timing/wallcyclereporting.h"
 150 #include "gromacs/topology/mtop_util.h"
 151 #include "gromacs/trajectory/trajectoryframe.h"
 152 #include "gromacs/utility/basenetwork.h"
 153 #include "gromacs/utility/cstringutil.h"
 154 #include "gromacs/utility/exceptions.h"
 155 #include "gromacs/utility/fatalerror.h"
 156 #include "gromacs/utility/filestream.h"
 157 #include "gromacs/utility/gmxassert.h"
 158 #include "gromacs/utility/gmxmpi.h"
 159 #include "gromacs/utility/keyvaluetree.h"
 160 #include "gromacs/utility/logger.h"
 161 #include "gromacs/utility/loggerbuilder.h"
 162 #include "gromacs/utility/mdmodulenotification.h"
 163 #include "gromacs/utility/physicalnodecommunicator.h"
 164 #include "gromacs/utility/pleasecite.h"
 165 #include "gromacs/utility/programcontext.h"
 166 #include "gromacs/utility/smalloc.h"
 167 #include "gromacs/utility/stringutil.h"
 168
 169 #include "isimulator.h"
 170 #include "membedholder.h"
 171 #include "replicaexchange.h"
 172 #include "simulatorbuilder.h"
 173
 174 namespace gmx
 175 {
 176
 177
 178 /*! \brief Manage any development feature flag variables encountered
 179  *
 180  * The use of dev features indicated by environment variables is
 181  * logged in order to ensure that runs with such features enabled can
 182  * be identified from their log and standard output. Any cross
 183  * dependencies are also checked, and if unsatisfied, a fatal error
 184  * issued.
 185  *
 186  * Note that some development features overrides are applied already here:
 187  * the GPU communication flags are set to false in non-tMPI and non-CUDA builds.
 188  *
 189  * \param[in]  mdlog                Logger object.
 190  * \param[in]  useGpuForNonbonded   True if the nonbonded task is offloaded in this run.
 191  * \param[in]  pmeRunMode           The PME run mode for this run
 192  * \returns                         The object populated with development feature flags.
 193  */
 194 static DevelopmentFeatureFlags manageDevelopmentFeatures(const gmx::MDLogger& mdlog,
 195                                                          const bool           useGpuForNonbonded,
 196                                                          const PmeRunMode     pmeRunMode)
 197 {
 198     DevelopmentFeatureFlags devFlags;
 199
 200     // Some builds of GCC 5 give false positive warnings that these
 201     // getenv results are ignored when clearly they are used.
 202 #pragma GCC diagnostic push
 203 #pragma GCC diagnostic ignored "-Wunused-result"
 204
 205     devFlags.enableGpuBufferOps =
 206             GMX_GPU_CUDA && useGpuForNonbonded && (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 207     devFlags.enableGpuHaloExchange = GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_DD_COMMS") != nullptr;
 208     devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr) || GMX_FAHCORE;
 209     devFlags.enableGpuPmePPComm =
 210             GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_PME_PP_COMMS") != nullptr;
 211
 212 #pragma GCC diagnostic pop
 213
 214     if (devFlags.enableGpuBufferOps)
 215     {
 216         GMX_LOG(mdlog.warning)
 217                 .asParagraph()
 218                 .appendTextFormatted(
 219                         "This run uses the 'GPU buffer ops' feature, enabled by the "
 220                         "GMX_USE_GPU_BUFFER_OPS environment variable.");
 221     }
 222
 223     if (devFlags.forceGpuUpdateDefault)
 224     {
 225         GMX_LOG(mdlog.warning)
 226                 .asParagraph()
 227                 .appendTextFormatted(
 228                         "This run will default to '-update gpu' as requested by the "
 229                         "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable. GPU update with domain "
 230                         "decomposition lacks substantial testing and should be used with caution.");
 231     }
 232
 233     if (devFlags.enableGpuHaloExchange)
 234     {
 235         if (useGpuForNonbonded)
 236         {
 237             if (!devFlags.enableGpuBufferOps)
 238             {
 239                 GMX_LOG(mdlog.warning)
 240                         .asParagraph()
 241                         .appendTextFormatted(
 242                                 "Enabling GPU buffer operations required by GMX_GPU_DD_COMMS "
 243                                 "(equivalent with GMX_USE_GPU_BUFFER_OPS=1).");
 244                 devFlags.enableGpuBufferOps = true;
 245             }
 246             GMX_LOG(mdlog.warning)
 247                     .asParagraph()
 248                     .appendTextFormatted(
 249                             "This run has requested the 'GPU halo exchange' feature, enabled by "
 250                             "the "
 251                             "GMX_GPU_DD_COMMS environment variable.");
 252         }
 253         else
 254         {
 255             GMX_LOG(mdlog.warning)
 256                     .asParagraph()
 257                     .appendTextFormatted(
 258                             "GMX_GPU_DD_COMMS environment variable detected, but the 'GPU "
 259                             "halo exchange' feature will not be enabled as nonbonded interactions "
 260                             "are not offloaded.");
 261             devFlags.enableGpuHaloExchange = false;
 262         }
 263     }
 264
 265     if (devFlags.enableGpuPmePPComm)
 266     {
 267         if (pmeRunMode == PmeRunMode::GPU)
 268         {
 269             if (!devFlags.enableGpuBufferOps)
 270             {
 271                 GMX_LOG(mdlog.warning)
 272                         .asParagraph()
 273                         .appendTextFormatted(
 274                                 "Enabling GPU buffer operations required by GMX_GPU_PME_PP_COMMS "
 275                                 "(equivalent with GMX_USE_GPU_BUFFER_OPS=1).");
 276                 devFlags.enableGpuBufferOps = true;
 277             }
 278             GMX_LOG(mdlog.warning)
 279                     .asParagraph()
 280                     .appendTextFormatted(
 281                             "This run uses the 'GPU PME-PP communications' feature, enabled "
 282                             "by the GMX_GPU_PME_PP_COMMS environment variable.");
 283         }
 284         else
 285         {
 286             std::string clarification;
 287             if (pmeRunMode == PmeRunMode::Mixed)
 288             {
 289                 clarification =
 290                         "PME FFT and gather are not offloaded to the GPU (PME is running in mixed "
 291                         "mode).";
 292             }
 293             else
 294             {
 295                 clarification = "PME is not offloaded to the GPU.";
 296             }
 297             GMX_LOG(mdlog.warning)
 298                     .asParagraph()
 299                     .appendText(
 300                             "GMX_GPU_PME_PP_COMMS environment variable detected, but the "
 301                             "'GPU PME-PP communications' feature was not enabled as "
 302                             + clarification);
 303             devFlags.enableGpuPmePPComm = false;
 304         }
 305     }
 306
 307     return devFlags;
 308 }
 309
 310 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
 311  *
 312  * Used to ensure that the master thread does not modify mdrunner during copy
 313  * on the spawned threads. */
 314 static void threadMpiMdrunnerAccessBarrier()
 315 {
 316 #if GMX_THREAD_MPI
 317     MPI_Barrier(MPI_COMM_WORLD);
 318 #endif
 319 }
 320
 321 Mdrunner Mdrunner::cloneOnSpawnedThread() const
 322 {
 323     auto newRunner = Mdrunner(std::make_unique<MDModules>());
 324
 325     // All runners in the same process share a restraint manager resource because it is
 326     // part of the interface to the client code, which is associated only with the
 327     // original thread. Handles to the same resources can be obtained by copy.
 328     {
 329         newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
 330     }
 331
 332     // Copy members of master runner.
 333     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
 334     // Ref https://gitlab.com/gromacs/gromacs/-/issues/2587 and https://gitlab.com/gromacs/gromacs/-/issues/2375
 335     newRunner.hw_opt    = hw_opt;
 336     newRunner.filenames = filenames;
 337
 338     newRunner.oenv            = oenv;
 339     newRunner.mdrunOptions    = mdrunOptions;
 340     newRunner.domdecOptions   = domdecOptions;
 341     newRunner.nbpu_opt        = nbpu_opt;
 342     newRunner.pme_opt         = pme_opt;
 343     newRunner.pme_fft_opt     = pme_fft_opt;
 344     newRunner.bonded_opt      = bonded_opt;
 345     newRunner.update_opt      = update_opt;
 346     newRunner.nstlist_cmdline = nstlist_cmdline;
 347     newRunner.replExParams    = replExParams;
 348     newRunner.pforce          = pforce;
 349     // Give the spawned thread the newly created valid communicator
 350     // for the simulation.
 351     newRunner.libraryWorldCommunicator = MPI_COMM_WORLD;
 352     newRunner.simulationCommunicator   = MPI_COMM_WORLD;
 353     newRunner.ms                       = ms;
 354     newRunner.startingBehavior         = startingBehavior;
 355     newRunner.stopHandlerBuilder_      = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
 356     newRunner.inputHolder_             = inputHolder_;
 357
 358     threadMpiMdrunnerAccessBarrier();
 359
 360     return newRunner;
 361 }
 362
 363 /*! \brief The callback used for running on spawned threads.
 364  *
 365  * Obtains the pointer to the master mdrunner object from the one
 366  * argument permitted to the thread-launch API call, copies it to make
 367  * a new runner for this thread, reinitializes necessary data, and
 368  * proceeds to the simulation. */
 369 static void mdrunner_start_fn(const void* arg)
 370 {
 371     try
 372     {
 373         auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner*>(arg);
 374         /* copy the arg list to make sure that it's thread-local. This
 375            doesn't copy pointed-to items, of course; fnm, cr and fplog
 376            are reset in the call below, all others should be const. */
 377         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
 378         mdrunner.mdrunner();
 379     }
 380     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 381 }
 382
 383
 384 void Mdrunner::spawnThreads(int numThreadsToLaunch)
 385 {
 386 #if GMX_THREAD_MPI
 387     /* now spawn new threads that start mdrunner_start_fn(), while
 388        the main thread returns. Thread affinity is handled later. */
 389     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE, mdrunner_start_fn,
 390                      static_cast<const void*>(this))
 391         != TMPI_SUCCESS)
 392     {
 393         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
 394     }
 395
 396     // Give the master thread the newly created valid communicator for
 397     // the simulation.
 398     libraryWorldCommunicator = MPI_COMM_WORLD;
 399     simulationCommunicator   = MPI_COMM_WORLD;
 400     threadMpiMdrunnerAccessBarrier();
 401 #else
 402     GMX_UNUSED_VALUE(numThreadsToLaunch);
 403     GMX_UNUSED_VALUE(mdrunner_start_fn);
 404 #endif
 405 }
 406
 407 } // namespace gmx
 408
 409 /*! \brief Initialize variables for Verlet scheme simulation */
 410 static void prepare_verlet_scheme(FILE*               fplog,
 411                                   t_commrec*          cr,
 412                                   t_inputrec*         ir,
 413                                   int                 nstlist_cmdline,
 414                                   const gmx_mtop_t*   mtop,
 415                                   const matrix        box,
 416                                   bool                makeGpuPairList,
 417                                   const gmx::CpuInfo& cpuinfo)
 418 {
 419     // We checked the cut-offs in grompp, but double-check here.
 420     // We have PME+LJcutoff kernels for rcoulomb>rvdw.
 421     if (EEL_PME_EWALD(ir->coulombtype) && ir->vdwtype == eelCUT)
 422     {
 423         GMX_RELEASE_ASSERT(ir->rcoulomb >= ir->rvdw,
 424                            "With Verlet lists and PME we should have rcoulomb>=rvdw");
 425     }
 426     else
 427     {
 428         GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw,
 429                            "With Verlet lists and no PME rcoulomb and rvdw should be identical");
 430     }
 431     /* For NVE simulations, we will retain the initial list buffer */
 432     if (EI_DYNAMICS(ir->eI) && ir->verletbuf_tol > 0 && !(EI_MD(ir->eI) && ir->etc == etcNO))
 433     {
 434         /* Update the Verlet buffer size for the current run setup */
 435
 436         /* Here we assume SIMD-enabled kernels are being used. But as currently
 437          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 438          * and 4x2 gives a larger buffer than 4x4, this is ok.
 439          */
 440         ListSetupType listType =
 441                 (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
 442         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
 443
 444         const real rlist_new =
 445                 calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
 446
 447         if (rlist_new != ir->rlist)
 448         {
 449             if (fplog != nullptr)
 450             {
 451                 fprintf(fplog,
 452                         "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 453                         ir->rlist, rlist_new, listSetup.cluster_size_i, listSetup.cluster_size_j);
 454             }
 455             ir->rlist = rlist_new;
 456         }
 457     }
 458
 459     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
 460     {
 461         gmx_fatal(FARGS, "Can not set nstlist without %s",
 462                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
 463     }
 464
 465     if (EI_DYNAMICS(ir->eI))
 466     {
 467         /* Set or try nstlist values */
 468         increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
 469     }
 470 }
 471
 472 /*! \brief Override the nslist value in inputrec
 473  *
 474  * with value passed on the command line (if any)
 475  */
 476 static void override_nsteps_cmdline(const gmx::MDLogger& mdlog, int64_t nsteps_cmdline, t_inputrec* ir)
 477 {
 478     assert(ir);
 479
 480     /* override with anything else than the default -2 */
 481     if (nsteps_cmdline > -2)
 482     {
 483         char sbuf_steps[STEPSTRSIZE];
 484         char sbuf_msg[STRLEN];
 485
 486         ir->nsteps = nsteps_cmdline;
 487         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
 488         {
 489             sprintf(sbuf_msg,
 490                     "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
 491                     gmx_step_str(nsteps_cmdline, sbuf_steps), fabs(nsteps_cmdline * ir->delta_t));
 492         }
 493         else
 494         {
 495             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
 496                     gmx_step_str(nsteps_cmdline, sbuf_steps));
 497         }
 498
 499         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
 500     }
 501     else if (nsteps_cmdline < -2)
 502     {
 503         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64, nsteps_cmdline);
 504     }
 505     /* Do nothing if nsteps_cmdline == -2 */
 506 }
 507
 508 namespace gmx
 509 {
 510
 511 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
 512  *
 513  * If not, and if a warning may be issued, logs a warning about
 514  * falling back to CPU code. With thread-MPI, only the first
 515  * call to this function should have \c issueWarning true. */
 516 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger& mdlog, const t_inputrec& ir, bool issueWarning)
 517 {
 518     bool        gpuIsUseful = true;
 519     std::string warning;
 520
 521     if (ir.opts.ngener - ir.nwall > 1)
 522     {
 523         /* The GPU code does not support more than one energy group.
 524          * If the user requested GPUs explicitly, a fatal error is given later.
 525          */
 526         gpuIsUseful = false;
 527         warning =
 528                 "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
 529                 "For better performance, run on the GPU without energy groups and then do "
 530                 "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
 531     }
 532
 533     if (EI_TPI(ir.eI))
 534     {
 535         gpuIsUseful = false;
 536         warning     = "TPI is not implemented for GPUs.";
 537     }
 538
 539     if (!gpuIsUseful && issueWarning)
 540     {
 541         GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
 542     }
 543
 544     return gpuIsUseful;
 545 }
 546
 547 //! Initializes the logger for mdrun.
 548 static gmx::LoggerOwner buildLogger(FILE* fplog, const bool isSimulationMasterRank)
 549 {
 550     gmx::LoggerBuilder builder;
 551     if (fplog != nullptr)
 552     {
 553         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
 554     }
 555     if (isSimulationMasterRank)
 556     {
 557         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
 558     }
 559     return builder.build();
 560 }
 561
 562 //! Make a TaskTarget from an mdrun argument string.
 563 static TaskTarget findTaskTarget(const char* optionString)
 564 {
 565     TaskTarget returnValue = TaskTarget::Auto;
 566
 567     if (strncmp(optionString, "auto", 3) == 0)
 568     {
 569         returnValue = TaskTarget::Auto;
 570     }
 571     else if (strncmp(optionString, "cpu", 3) == 0)
 572     {
 573         returnValue = TaskTarget::Cpu;
 574     }
 575     else if (strncmp(optionString, "gpu", 3) == 0)
 576     {
 577         returnValue = TaskTarget::Gpu;
 578     }
 579     else
 580     {
 581         GMX_ASSERT(false, "Option string should have been checked for sanity already");
 582     }
 583
 584     return returnValue;
 585 }
 586
 587 //! Finish run, aggregate data to print performance info.
 588 static void finish_run(FILE*                     fplog,
 589                        const gmx::MDLogger&      mdlog,
 590                        const t_commrec*          cr,
 591                        const t_inputrec*         inputrec,
 592                        t_nrnb                    nrnb[],
 593                        gmx_wallcycle_t           wcycle,
 594                        gmx_walltime_accounting_t walltime_accounting,
 595                        nonbonded_verlet_t*       nbv,
 596                        const gmx_pme_t*          pme,
 597                        gmx_bool                  bWriteStat)
 598 {
 599     double delta_t = 0;
 600     double nbfs = 0, mflop = 0;
 601     double elapsed_time, elapsed_time_over_all_ranks, elapsed_time_over_all_threads,
 602             elapsed_time_over_all_threads_over_all_ranks;
 603     /* Control whether it is valid to print a report. Only the
 604        simulation master may print, but it should not do so if the run
 605        terminated e.g. before a scheduled reset step. This is
 606        complicated by the fact that PME ranks are unaware of the
 607        reason why they were sent a pmerecvqxFINISH. To avoid
 608        communication deadlocks, we always do the communication for the
 609        report, even if we've decided not to write the report, because
 610        how long it takes to finish the run is not important when we've
 611        decided not to report on the simulation performance.
 612
 613        Further, we only report performance for dynamical integrators,
 614        because those are the only ones for which we plan to
 615        consider doing any optimizations. */
 616     bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
 617
 618     if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
 619     {
 620         GMX_LOG(mdlog.warning)
 621                 .asParagraph()
 622                 .appendText("Simulation ended prematurely, no performance report will be written.");
 623         printReport = false;
 624     }
 625
 626     t_nrnb*                 nrnb_tot;
 627     std::unique_ptr<t_nrnb> nrnbTotalStorage;
 628     if (cr->nnodes > 1)
 629     {
 630         nrnbTotalStorage = std::make_unique<t_nrnb>();
 631         nrnb_tot         = nrnbTotalStorage.get();
 632 #if GMX_MPI
 633         MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
 634 #endif
 635     }
 636     else
 637     {
 638         nrnb_tot = nrnb;
 639     }
 640
 641     elapsed_time = walltime_accounting_get_time_since_reset(walltime_accounting);
 642     elapsed_time_over_all_threads =
 643             walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
 644     if (cr->nnodes > 1)
 645     {
 646 #if GMX_MPI
 647         /* reduce elapsed_time over all MPI ranks in the current simulation */
 648         MPI_Allreduce(&elapsed_time, &elapsed_time_over_all_ranks, 1, MPI_DOUBLE, MPI_SUM,
 649                       cr->mpi_comm_mysim);
 650         elapsed_time_over_all_ranks /= cr->nnodes;
 651         /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
 652          * current simulation. */
 653         MPI_Allreduce(&elapsed_time_over_all_threads, &elapsed_time_over_all_threads_over_all_ranks,
 654                       1, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
 655 #endif
 656     }
 657     else
 658     {
 659         elapsed_time_over_all_ranks                  = elapsed_time;
 660         elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
 661     }
 662
 663     if (printReport)
 664     {
 665         print_flop(fplog, nrnb_tot, &nbfs, &mflop);
 666     }
 667
 668     if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
 669     {
 670         print_dd_statistics(cr, inputrec, fplog);
 671     }
 672
 673     /* TODO Move the responsibility for any scaling by thread counts
 674      * to the code that handled the thread region, so that there's a
 675      * mechanism to keep cycle counting working during the transition
 676      * to task parallelism. */
 677     int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 678     int nthreads_pme = gmx_omp_nthreads_get(emntPME);
 679     wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP),
 680                                    nthreads_pp, nthreads_pme);
 681     auto cycle_sum(wallcycle_sum(cr, wcycle));
 682
 683     if (printReport)
 684     {
 685         auto nbnxn_gpu_timings =
 686                 (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
 687         gmx_wallclock_gpu_pme_t pme_gpu_timings = {};
 688
 689         if (pme_gpu_task_enabled(pme))
 690         {
 691             pme_gpu_get_timings(pme, &pme_gpu_timings);
 692         }
 693         wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
 694                         elapsed_time_over_all_ranks, wcycle, cycle_sum, nbnxn_gpu_timings,
 695                         &pme_gpu_timings);
 696
 697         if (EI_DYNAMICS(inputrec->eI))
 698         {
 699             delta_t = inputrec->delta_t;
 700         }
 701
 702         if (fplog)
 703         {
 704             print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks,
 705                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 706                        delta_t, nbfs, mflop);
 707         }
 708         if (bWriteStat)
 709         {
 710             print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks,
 711                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 712                        delta_t, nbfs, mflop);
 713         }
 714     }
 715 }
 716
 717 int Mdrunner::mdrunner()
 718 {
 719     matrix                    box;
 720     t_forcerec*               fr               = nullptr;
 721     real                      ewaldcoeff_q     = 0;
 722     real                      ewaldcoeff_lj    = 0;
 723     int                       nChargePerturbed = -1, nTypePerturbed = 0;
 724     gmx_wallcycle_t           wcycle;
 725     gmx_walltime_accounting_t walltime_accounting = nullptr;
 726     MembedHolder              membedHolder(filenames.size(), filenames.data());
 727     gmx_hw_info_t*            hwinfo = nullptr;
 728
 729     /* CAUTION: threads may be started later on in this function, so
 730        cr doesn't reflect the final parallel state right now */
 731     gmx_mtop_t mtop;
 732
 733     /* TODO: inputrec should tell us whether we use an algorithm, not a file option */
 734     const bool doEssentialDynamics = opt2bSet("-ei", filenames.size(), filenames.data());
 735     const bool doRerun             = mdrunOptions.rerun;
 736
 737     // Handle task-assignment related user options.
 738     EmulateGpuNonbonded emulateGpuNonbonded =
 739             (getenv("GMX_EMULATE_GPU") != nullptr ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
 740
 741     std::vector<int> userGpuTaskAssignment;
 742     try
 743     {
 744         userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
 745     }
 746     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 747     auto nonbondedTarget = findTaskTarget(nbpu_opt);
 748     auto pmeTarget       = findTaskTarget(pme_opt);
 749     auto pmeFftTarget    = findTaskTarget(pme_fft_opt);
 750     auto bondedTarget    = findTaskTarget(bonded_opt);
 751     auto updateTarget    = findTaskTarget(update_opt);
 752
 753     FILE* fplog = nullptr;
 754     // If we are appending, we don't write log output because we need
 755     // to check that the old log file matches what the checkpoint file
 756     // expects. Otherwise, we should start to write log output now if
 757     // there is a file ready for it.
 758     if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
 759     {
 760         fplog = gmx_fio_getfp(logFileHandle);
 761     }
 762     const bool isSimulationMasterRank = findIsSimulationMasterRank(ms, simulationCommunicator);
 763     gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank));
 764     gmx::MDLogger    mdlog(logOwner.logger());
 765
 766     // TODO The thread-MPI master rank makes a working
 767     // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks
 768     // after the threads have been launched. This works because no use
 769     // is made of that communicator until after the execution paths
 770     // have rejoined. But it is likely that we can improve the way
 771     // this is expressed, e.g. by expressly running detection only the
 772     // master rank for thread-MPI, rather than relying on the mutex
 773     // and reference count.
 774     PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash());
 775     hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
 776
 777     gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
 778
 779     std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->deviceInfoList, hw_opt.gpuIdsAvailable);
 780     const int        numDevicesToUse = gmx::ssize(gpuIdsToUse);
 781
 782     // Print citation requests after all software/hardware printing
 783     pleaseCiteGromacs(fplog);
 784
 785     // Note: legacy program logic relies on checking whether these pointers are assigned.
 786     // Objects may or may not be allocated later.
 787     std::unique_ptr<t_inputrec> inputrec;
 788     std::unique_ptr<t_state>    globalState;
 789
 790     auto partialDeserializedTpr = std::make_unique<PartialDeserializedTprFile>();
 791
 792     if (isSimulationMasterRank)
 793     {
 794         // Allocate objects to be initialized by later function calls.
 795         /* Only the master rank has the global state */
 796         globalState = std::make_unique<t_state>();
 797         inputrec    = std::make_unique<t_inputrec>();
 798
 799         /* Read (nearly) all data required for the simulation
 800          * and keep the partly serialized tpr contents to send to other ranks later
 801          */
 802         applyGlobalSimulationState(*inputHolder_.get(), partialDeserializedTpr.get(),
 803                                    globalState.get(), inputrec.get(), &mtop);
 804     }
 805
 806     /* Check and update the hardware options for internal consistency */
 807     checkAndUpdateHardwareOptions(mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks,
 808                                   inputrec.get());
 809
 810     if (GMX_THREAD_MPI && isSimulationMasterRank)
 811     {
 812         bool useGpuForNonbonded = false;
 813         bool useGpuForPme       = false;
 814         try
 815         {
 816             GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
 817
 818             // If the user specified the number of ranks, then we must
 819             // respect that, but in default mode, we need to allow for
 820             // the number of GPUs to choose the number of ranks.
 821             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 822             useGpuForNonbonded         = decideWhetherToUseGpusForNonbondedWithThreadMpi(
 823                     nonbondedTarget, numDevicesToUse, userGpuTaskAssignment, emulateGpuNonbonded,
 824                     canUseGpuForNonbonded,
 825                     gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
 826                     hw_opt.nthreads_tmpi);
 827             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(
 828                     useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo,
 829                     *inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
 830         }
 831         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 832
 833         /* Determine how many thread-MPI ranks to start.
 834          *
 835          * TODO Over-writing the user-supplied value here does
 836          * prevent any possible subsequent checks from working
 837          * correctly. */
 838         hw_opt.nthreads_tmpi =
 839                 get_nthreads_mpi(hwinfo, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
 840                                  inputrec.get(), &mtop, mdlog, membedHolder.doMembed());
 841
 842         // Now start the threads for thread MPI.
 843         spawnThreads(hw_opt.nthreads_tmpi);
 844         // The spawned threads enter mdrunner() and execution of
 845         // master and spawned threads joins at the end of this block.
 846         physicalNodeComm =
 847                 PhysicalNodeCommunicator(libraryWorldCommunicator, gmx_physicalnode_id_hash());
 848     }
 849
 850     GMX_RELEASE_ASSERT(ms || simulationCommunicator != MPI_COMM_NULL,
 851                        "Must have valid communicator unless running a multi-simulation");
 852     CommrecHandle crHandle = init_commrec(simulationCommunicator);
 853     t_commrec*    cr       = crHandle.get();
 854     GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec");
 855
 856     if (PAR(cr))
 857     {
 858         /* now broadcast everything to the non-master nodes/threads: */
 859         if (!isSimulationMasterRank)
 860         {
 861             // Until now, only the master rank has a non-null pointer.
 862             // On non-master ranks, allocate the object that will receive data in the following call.
 863             inputrec = std::make_unique<t_inputrec>();
 864         }
 865         init_parallel(cr->mpiDefaultCommunicator, MASTER(cr), inputrec.get(), &mtop,
 866                       partialDeserializedTpr.get());
 867     }
 868     GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now");
 869     partialDeserializedTpr.reset(nullptr);
 870
 871     // Now the number of ranks is known to all ranks, and each knows
 872     // the inputrec read by the master rank. The ranks can now all run
 873     // the task-deciding functions and will agree on the result
 874     // without needing to communicate.
 875     const bool useDomainDecomposition = (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == eiNM));
 876
 877     // Note that these variables describe only their own node.
 878     //
 879     // Note that when bonded interactions run on a GPU they always run
 880     // alongside a nonbonded task, so do not influence task assignment
 881     // even though they affect the force calculation workload.
 882     bool useGpuForNonbonded = false;
 883     bool useGpuForPme       = false;
 884     bool useGpuForBonded    = false;
 885     bool useGpuForUpdate    = false;
 886     bool gpusWereDetected   = hwinfo->ngpu_compatible_tot > 0;
 887     try
 888     {
 889         // It's possible that there are different numbers of GPUs on
 890         // different nodes, which is the user's responsibility to
 891         // handle. If unsuitable, we will notice that during task
 892         // assignment.
 893         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 894         useGpuForNonbonded         = decideWhetherToUseGpusForNonbonded(
 895                 nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded,
 896                 gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected);
 897         useGpuForPme = decideWhetherToUseGpusForPme(
 898                 useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo, *inputrec,
 899                 cr->sizeOfDefaultCommunicator, domdecOptions.numPmeRanks, gpusWereDetected);
 900         useGpuForBonded = decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
 901                                                           bondedTarget, *inputrec, mtop,
 902                                                           domdecOptions.numPmeRanks, gpusWereDetected);
 903     }
 904     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 905
 906     const PmeRunMode pmeRunMode = determinePmeRunMode(useGpuForPme, pmeFftTarget, *inputrec);
 907
 908     // Initialize development feature flags that enabled by environment variable
 909     // and report those features that are enabled.
 910     const DevelopmentFeatureFlags devFlags =
 911             manageDevelopmentFeatures(mdlog, useGpuForNonbonded, pmeRunMode);
 912
 913     const bool useModularSimulator =
 914             checkUseModularSimulator(false, inputrec.get(), doRerun, mtop, ms, replExParams,
 915                                      nullptr, doEssentialDynamics, membedHolder.doMembed());
 916
 917     // Build restraints.
 918     // TODO: hide restraint implementation details from Mdrunner.
 919     // There is nothing unique about restraints at this point as far as the
 920     // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
 921     // factory functions from the SimulationContext on which to call mdModules_->add().
 922     // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
 923     for (auto&& restraint : restraintManager_->getRestraints())
 924     {
 925         auto module = RestraintMDModule::create(restraint, restraint->sites());
 926         mdModules_->add(std::move(module));
 927     }
 928
 929     // TODO: Error handling
 930     mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
 931     // now that the MdModules know their options, they know which callbacks to sign up to
 932     mdModules_->subscribeToSimulationSetupNotifications();
 933     const auto& mdModulesNotifier = mdModules_->notifier().simulationSetupNotifications_;
 934
 935     if (inputrec->internalParameters != nullptr)
 936     {
 937         mdModulesNotifier.notify(*inputrec->internalParameters);
 938     }
 939
 940     if (fplog != nullptr)
 941     {
 942         pr_inputrec(fplog, 0, "Input Parameters", inputrec.get(), FALSE);
 943         fprintf(fplog, "\n");
 944     }
 945
 946     if (SIMMASTER(cr))
 947     {
 948         /* In rerun, set velocities to zero if present */
 949         if (doRerun && ((globalState->flags & (1 << estV)) != 0))
 950         {
 951             // rerun does not use velocities
 952             GMX_LOG(mdlog.info)
 953                     .asParagraph()
 954                     .appendText(
 955                             "Rerun trajectory contains velocities. Rerun does only evaluate "
 956                             "potential energy and forces. The velocities will be ignored.");
 957             for (int i = 0; i < globalState->natoms; i++)
 958             {
 959                 clear_rvec(globalState->v[i]);
 960             }
 961             globalState->flags &= ~(1 << estV);
 962         }
 963
 964         /* now make sure the state is initialized and propagated */
 965         set_state_entries(globalState.get(), inputrec.get(), useModularSimulator);
 966     }
 967
 968     /* NM and TPI parallelize over force/energy calculations, not atoms,
 969      * so we need to initialize and broadcast the global state.
 970      */
 971     if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
 972     {
 973         if (!MASTER(cr))
 974         {
 975             globalState = std::make_unique<t_state>();
 976         }
 977         broadcastStateWithoutDynamics(cr->mpiDefaultCommunicator, DOMAINDECOMP(cr), PAR(cr),
 978                                       globalState.get());
 979     }
 980
 981     /* A parallel command line option consistency check that we can
 982        only do after any threads have started. */
 983     if (!PAR(cr)
 984         && (domdecOptions.numCells[XX] > 1 || domdecOptions.numCells[YY] > 1
 985             || domdecOptions.numCells[ZZ] > 1 || domdecOptions.numPmeRanks > 0))
 986     {
 987         gmx_fatal(FARGS,
 988                   "The -dd or -npme option request a parallel simulation, "
 989 #if !GMX_MPI
 990                   "but %s was compiled without threads or MPI enabled",
 991                   output_env_get_program_display_name(oenv));
 992 #elif GMX_THREAD_MPI
 993                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
 994 #else
 995                   "but %s was not started through mpirun/mpiexec or only one rank was requested "
 996                   "through mpirun/mpiexec",
 997                   output_env_get_program_display_name(oenv));
 998 #endif
 999     }
1000
1001     if (doRerun && (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
1002     {
1003         gmx_fatal(FARGS,
1004                   "The .mdp file specified an energy mininization or normal mode algorithm, and "
1005                   "these are not compatible with mdrun -rerun");
1006     }
1007
1008     if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
1009     {
1010         if (domdecOptions.numPmeRanks > 0)
1011         {
1012             gmx_fatal_collective(FARGS, cr->mpiDefaultCommunicator, MASTER(cr),
1013                                  "PME-only ranks are requested, but the system does not use PME "
1014                                  "for electrostatics or LJ");
1015         }
1016
1017         domdecOptions.numPmeRanks = 0;
1018     }
1019
1020     if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
1021     {
1022         /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
1023          * improve performance with many threads per GPU, since our OpenMP
1024          * scaling is bad, but it's difficult to automate the setup.
1025          */
1026         domdecOptions.numPmeRanks = 0;
1027     }
1028     if (useGpuForPme)
1029     {
1030         if (domdecOptions.numPmeRanks < 0)
1031         {
1032             domdecOptions.numPmeRanks = 0;
1033             // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
1034         }
1035         else
1036         {
1037             GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1,
1038                                "PME GPU decomposition is not supported");
1039         }
1040     }
1041
1042     /* NMR restraints must be initialized before load_checkpoint,
1043      * since with time averaging the history is added to t_state.
1044      * For proper consistency check we therefore need to extend
1045      * t_state here.
1046      * So the PME-only nodes (if present) will also initialize
1047      * the distance restraints.
1048      */
1049
1050     /* This needs to be called before read_checkpoint to extend the state */
1051     t_disresdata* disresdata;
1052     snew(disresdata, 1);
1053     init_disres(fplog, &mtop, inputrec.get(), DisResRunMode::MDRun,
1054                 MASTER(cr) ? DDRole::Master : DDRole::Agent,
1055                 PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mysim, ms, disresdata,
1056                 globalState.get(), replExParams.exchangeInterval > 0);
1057
1058     t_oriresdata* oriresdata;
1059     snew(oriresdata, 1);
1060     init_orires(fplog, &mtop, inputrec.get(), cr, ms, globalState.get(), oriresdata);
1061
1062     auto deform = prepareBoxDeformation(
1063             globalState != nullptr ? globalState->box : box, MASTER(cr) ? DDRole::Master : DDRole::Agent,
1064             PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mygroup, *inputrec);
1065
1066 #if GMX_FAHCORE
1067     /* We have to remember the generation's first step before reading checkpoint.
1068        This way, we can report to the F@H core both the generation's first step
1069        and the restored first step, thus making it able to distinguish between
1070        an interruption/resume and start of the n-th generation simulation.
1071        Having this information, the F@H core can correctly calculate and report
1072        the progress.
1073      */
1074     int gen_first_step = 0;
1075     if (MASTER(cr))
1076     {
1077         gen_first_step = inputrec->init_step;
1078     }
1079 #endif
1080
1081     ObservablesHistory observablesHistory = {};
1082
1083     auto modularSimulatorCheckpointData = std::make_unique<ReadCheckpointDataHolder>();
1084     if (startingBehavior != StartingBehavior::NewSimulation)
1085     {
1086         /* Check if checkpoint file exists before doing continuation.
1087          * This way we can use identical input options for the first and subsequent runs...
1088          */
1089         if (mdrunOptions.numStepsCommandline > -2)
1090         {
1091             /* Temporarily set the number of steps to unlimited to avoid
1092              * triggering the nsteps check in load_checkpoint().
1093              * This hack will go away soon when the -nsteps option is removed.
1094              */
1095             inputrec->nsteps = -1;
1096         }
1097
1098         // Finish applying initial simulation state information from external sources on all ranks.
1099         // Reconcile checkpoint file data with Mdrunner state established up to this point.
1100         applyLocalState(*inputHolder_.get(), logFileHandle, cr, domdecOptions.numCells,
1101                         inputrec.get(), globalState.get(), &observablesHistory,
1102                         mdrunOptions.reproducible, mdModules_->notifier(),
1103                         modularSimulatorCheckpointData.get(), useModularSimulator);
1104         // TODO: (#3652) Synchronize filesystem state, SimulationInput contents, and program
1105         //  invariants
1106         //  on all code paths.
1107         // Write checkpoint or provide hook to update SimulationInput.
1108         // If there was a checkpoint file, SimulationInput contains more information
1109         // than if there wasn't. At this point, we have synchronized the in-memory
1110         // state with the filesystem state only for restarted simulations. We should
1111         // be calling applyLocalState unconditionally and expect that the completeness
1112         // of SimulationInput is not dependent on its creation method.
1113
1114         if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
1115         {
1116             // Now we can start normal logging to the truncated log file.
1117             fplog = gmx_fio_getfp(logFileHandle);
1118             prepareLogAppending(fplog);
1119             logOwner = buildLogger(fplog, MASTER(cr));
1120             mdlog    = logOwner.logger();
1121         }
1122     }
1123
1124 #if GMX_FAHCORE
1125     if (MASTER(cr))
1126     {
1127         fcRegisterSteps(inputrec->nsteps + inputrec->init_step, gen_first_step);
1128     }
1129 #endif
1130
1131     if (mdrunOptions.numStepsCommandline > -2)
1132     {
1133         GMX_LOG(mdlog.info)
1134                 .asParagraph()
1135                 .appendText(
1136                         "The -nsteps functionality is deprecated, and may be removed in a future "
1137                         "version. "
1138                         "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp "
1139                         "file field.");
1140     }
1141     /* override nsteps with value set on the commandline */
1142     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec.get());
1143
1144     if (isSimulationMasterRank)
1145     {
1146         copy_mat(globalState->box, box);
1147     }
1148
1149     if (PAR(cr))
1150     {
1151         gmx_bcast(sizeof(box), box, cr->mpiDefaultCommunicator);
1152     }
1153
1154     if (inputrec->cutoff_scheme != ecutsVERLET)
1155     {
1156         gmx_fatal(FARGS,
1157                   "This group-scheme .tpr file can no longer be run by mdrun. Please update to the "
1158                   "Verlet scheme, or use an earlier version of GROMACS if necessary.");
1159     }
1160     /* Update rlist and nstlist. */
1161     /* Note: prepare_verlet_scheme is calling increaseNstlist(...), which (while attempting to
1162      * increase rlist) tries to check if the newly chosen value fits with the DD scheme. As this is
1163      * run before any DD scheme is set up, this check is never executed. See #3334 for more details.
1164      */
1165     prepare_verlet_scheme(fplog, cr, inputrec.get(), nstlist_cmdline, &mtop, box,
1166                           useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes),
1167                           *hwinfo->cpuInfo);
1168
1169     // This builder is necessary while we have multi-part construction
1170     // of DD. Before DD is constructed, we use the existence of
1171     // the builder object to indicate that further construction of DD
1172     // is needed.
1173     std::unique_ptr<DomainDecompositionBuilder> ddBuilder;
1174     if (useDomainDecomposition)
1175     {
1176         ddBuilder = std::make_unique<DomainDecompositionBuilder>(
1177                 mdlog, cr, domdecOptions, mdrunOptions, mtop, *inputrec, box,
1178                 positionsFromStatePointer(globalState.get()));
1179     }
1180     else
1181     {
1182         /* PME, if used, is done on all nodes with 1D decomposition */
1183         cr->nnodes     = cr->sizeOfDefaultCommunicator;
1184         cr->sim_nodeid = cr->rankInDefaultCommunicator;
1185         cr->nodeid     = cr->rankInDefaultCommunicator;
1186         cr->npmenodes  = 0;
1187         cr->duty       = (DUTY_PP | DUTY_PME);
1188
1189         if (inputrec->pbcType == PbcType::Screw)
1190         {
1191             gmx_fatal(FARGS, "pbc=screw is only implemented with domain decomposition");
1192         }
1193     }
1194
1195     // Produce the task assignment for this rank - done after DD is constructed
1196     GpuTaskAssignments gpuTaskAssignments = GpuTaskAssignmentsBuilder::build(
1197             gpuIdsToUse, userGpuTaskAssignment, *hwinfo, simulationCommunicator, physicalNodeComm,
1198             nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded,
1199             useGpuForPme, thisRankHasDuty(cr, DUTY_PP),
1200             // TODO cr->duty & DUTY_PME should imply that a PME
1201             // algorithm is active, but currently does not.
1202             EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
1203
1204     // Get the device handles for the modules, nullptr when no task is assigned.
1205     int                deviceId   = -1;
1206     DeviceInformation* deviceInfo = gpuTaskAssignments.initDevice(&deviceId);
1207
1208     // timing enabling - TODO put this in gpu_utils (even though generally this is just option handling?)
1209     bool useTiming = true;
1210
1211     if (GMX_GPU_CUDA)
1212     {
1213         /* WARNING: CUDA timings are incorrect with multiple streams.
1214          *          This is the main reason why they are disabled by default.
1215          */
1216         // TODO: Consider turning on by default when we can detect nr of streams.
1217         useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr);
1218     }
1219     else if (GMX_GPU_OPENCL)
1220     {
1221         useTiming = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
1222     }
1223
1224     // TODO Currently this is always built, yet DD partition code
1225     // checks if it is built before using it. Probably it should
1226     // become an MDModule that is made only when another module
1227     // requires it (e.g. pull, CompEl, density fitting), so that we
1228     // don't update the local atom sets unilaterally every step.
1229     LocalAtomSetManager atomSets;
1230     if (ddBuilder)
1231     {
1232         // TODO Pass the GPU streams to ddBuilder to use in buffer
1233         // transfers (e.g. halo exchange)
1234         cr->dd = ddBuilder->build(&atomSets);
1235         // The builder's job is done, so destruct it
1236         ddBuilder.reset(nullptr);
1237         // Note that local state still does not exist yet.
1238     }
1239
1240     // The GPU update is decided here because we need to know whether the constraints or
1241     // SETTLEs can span accross the domain borders (i.e. whether or not update groups are
1242     // defined). This is only known after DD is initialized, hence decision on using GPU
1243     // update is done so late.
1244     try
1245     {
1246         const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false;
1247
1248         useGpuForUpdate = decideWhetherToUseGpuForUpdate(
1249                 useDomainDecomposition, useUpdateGroups, pmeRunMode, domdecOptions.numPmeRanks > 0,
1250                 useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop,
1251                 doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
1252                 replExParams.exchangeInterval > 0, doRerun, devFlags, mdlog);
1253     }
1254     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1255
1256     const bool printHostName = (cr->nnodes > 1);
1257     gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
1258
1259     const bool disableNonbondedCalculation = (getenv("GMX_NO_NONBONDED") != nullptr);
1260     if (disableNonbondedCalculation)
1261     {
1262         /* turn off non-bonded calculations */
1263         GMX_LOG(mdlog.warning)
1264                 .asParagraph()
1265                 .appendText(
1266                         "Found environment variable GMX_NO_NONBONDED.\n"
1267                         "Disabling nonbonded calculations.");
1268     }
1269
1270     MdrunScheduleWorkload runScheduleWork;
1271
1272     bool useGpuDirectHalo = decideWhetherToUseGpuForHalo(
1273             devFlags, havePPDomainDecomposition(cr), useGpuForNonbonded, useModularSimulator,
1274             doRerun, EI_ENERGY_MINIMIZATION(inputrec->eI));
1275
1276     // Also populates the simulation constant workload description.
1277     runScheduleWork.simulationWork = createSimulationWorkload(
1278             *inputrec, disableNonbondedCalculation, devFlags, useGpuForNonbonded, pmeRunMode,
1279             useGpuForBonded, useGpuForUpdate, useGpuDirectHalo);
1280
1281     std::unique_ptr<DeviceStreamManager> deviceStreamManager = nullptr;
1282
1283     if (deviceInfo != nullptr)
1284     {
1285         if (DOMAINDECOMP(cr) && thisRankHasDuty(cr, DUTY_PP))
1286         {
1287             dd_setup_dlb_resource_sharing(cr, deviceId);
1288         }
1289         deviceStreamManager = std::make_unique<DeviceStreamManager>(
1290                 *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useTiming);
1291     }
1292
1293     // If the user chose a task assignment, give them some hints
1294     // where appropriate.
1295     if (!userGpuTaskAssignment.empty())
1296     {
1297         gpuTaskAssignments.logPerformanceHints(mdlog, numDevicesToUse);
1298     }
1299
1300     if (PAR(cr))
1301     {
1302         /* After possible communicator splitting in make_dd_communicators.
1303          * we can set up the intra/inter node communication.
1304          */
1305         gmx_setup_nodecomm(fplog, cr);
1306     }
1307
1308 #if GMX_MPI
1309     if (isMultiSim(ms))
1310     {
1311         GMX_LOG(mdlog.warning)
1312                 .asParagraph()
1313                 .appendTextFormatted(
1314                         "This is simulation %d out of %d running as a composite GROMACS\n"
1315                         "multi-simulation job. Setup for this simulation:\n",
1316                         ms->simulationIndex_, ms->numSimulations_);
1317     }
1318     GMX_LOG(mdlog.warning)
1319             .appendTextFormatted("Using %d MPI %s\n", cr->nnodes,
1320 #    if GMX_THREAD_MPI
1321                                  cr->nnodes == 1 ? "thread" : "threads"
1322 #    else
1323                                  cr->nnodes == 1 ? "process" : "processes"
1324 #    endif
1325             );
1326     fflush(stderr);
1327 #endif
1328
1329     // If mdrun -pin auto honors any affinity setting that already
1330     // exists. If so, it is nice to provide feedback about whether
1331     // that existing affinity setting was from OpenMP or something
1332     // else, so we run this code both before and after we initialize
1333     // the OpenMP support.
1334     gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
1335     /* Check and update the number of OpenMP threads requested */
1336     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
1337                                             pmeRunMode, mtop, *inputrec);
1338
1339     gmx_omp_nthreads_init(mdlog, cr, hwinfo->nthreads_hw_avail, physicalNodeComm.size_,
1340                           hw_opt.nthreads_omp, hw_opt.nthreads_omp_pme, !thisRankHasDuty(cr, DUTY_PP));
1341
1342     // Enable FP exception detection, but not in
1343     // Release mode and not for compilers with known buggy FP
1344     // exception support (clang with any optimization) or suspected
1345     // buggy FP exception support (gcc 7.* with optimization).
1346 #if !defined NDEBUG                                                                         \
1347         && !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
1348              && defined __OPTIMIZE__)
1349     const bool bEnableFPE = true;
1350 #else
1351     const bool bEnableFPE = false;
1352 #endif
1353     // FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
1354     if (bEnableFPE)
1355     {
1356         gmx_feenableexcept();
1357     }
1358
1359     /* Now that we know the setup is consistent, check for efficiency */
1360     check_resource_division_efficiency(hwinfo, gpuTaskAssignments.thisRankHasAnyGpuTask(),
1361                                        mdrunOptions.ntompOptionIsSet, cr, mdlog);
1362
1363     /* getting number of PP/PME threads on this MPI / tMPI rank.
1364        PME: env variable should be read only on one node to make sure it is
1365        identical everywhere;
1366      */
1367     const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded)
1368                                                                   : gmx_omp_nthreads_get(emntPME);
1369     checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo->hardwareTopology,
1370                                   physicalNodeComm, mdlog);
1371
1372     // Enable Peer access between GPUs where available
1373     // Only for DD, only master PP rank needs to perform setup, and only if thread MPI plus
1374     // any of the GPU communication features are active.
1375     if (DOMAINDECOMP(cr) && MASTER(cr) && thisRankHasDuty(cr, DUTY_PP) && GMX_THREAD_MPI
1376         && (runScheduleWork.simulationWork.useGpuHaloExchange
1377             || runScheduleWork.simulationWork.useGpuPmePpCommunication))
1378     {
1379         setupGpuDevicePeerAccess(gpuIdsToUse, mdlog);
1380     }
1381
1382     if (hw_opt.threadAffinity != ThreadAffinity::Off)
1383     {
1384         /* Before setting affinity, check whether the affinity has changed
1385          * - which indicates that probably the OpenMP library has changed it
1386          * since we first checked).
1387          */
1388         gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
1389
1390         int numThreadsOnThisNode, intraNodeThreadOffset;
1391         analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
1392                                  &intraNodeThreadOffset);
1393
1394         /* Set the CPU affinity */
1395         gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology, numThreadsOnThisRank,
1396                                 numThreadsOnThisNode, intraNodeThreadOffset, nullptr);
1397     }
1398
1399     if (mdrunOptions.timingOptions.resetStep > -1)
1400     {
1401         GMX_LOG(mdlog.info)
1402                 .asParagraph()
1403                 .appendText(
1404                         "The -resetstep functionality is deprecated, and may be removed in a "
1405                         "future version.");
1406     }
1407     wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1408
1409     if (PAR(cr))
1410     {
1411         /* Master synchronizes its value of reset_counters with all nodes
1412          * including PME only nodes */
1413         int64_t reset_counters = wcycle_get_reset_counters(wcycle);
1414         gmx_bcast(sizeof(reset_counters), &reset_counters, cr->mpi_comm_mysim);
1415         wcycle_set_reset_counters(wcycle, reset_counters);
1416     }
1417
1418     // Membrane embedding must be initialized before we call init_forcerec()
1419     membedHolder.initializeMembed(fplog, filenames.size(), filenames.data(), &mtop, inputrec.get(),
1420                                   globalState.get(), cr, &mdrunOptions.checkpointOptions.period);
1421
1422     const bool               thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
1423     std::unique_ptr<MDAtoms> mdAtoms;
1424     std::unique_ptr<VirtualSitesHandler> vsite;
1425     std::unique_ptr<GpuBonded>           gpuBonded;
1426
1427     t_nrnb nrnb;
1428     if (thisRankHasDuty(cr, DUTY_PP))
1429     {
1430         mdModulesNotifier.notify(*cr);
1431         mdModulesNotifier.notify(&atomSets);
1432         mdModulesNotifier.notify(inputrec->pbcType);
1433         mdModulesNotifier.notify(SimulationTimeStep{ inputrec->delta_t });
1434         /* Initiate forcerecord */
1435         fr                 = new t_forcerec;
1436         fr->forceProviders = mdModules_->initForceProviders();
1437         init_forcerec(fplog, mdlog, fr, inputrec.get(), &mtop, cr, box,
1438                       opt2fn("-table", filenames.size(), filenames.data()),
1439                       opt2fn("-tablep", filenames.size(), filenames.data()),
1440                       opt2fns("-tableb", filenames.size(), filenames.data()), pforce);
1441         // Dirty hack, for fixing disres and orires should be made mdmodules
1442         fr->fcdata->disres = disresdata;
1443         fr->fcdata->orires = oriresdata;
1444
1445         // Save a handle to device stream manager to use elsewhere in the code
1446         // TODO: Forcerec is not a correct place to store it.
1447         fr->deviceStreamManager = deviceStreamManager.get();
1448
1449         if (runScheduleWork.simulationWork.useGpuPmePpCommunication && !thisRankHasDuty(cr, DUTY_PME))
1450         {
1451             GMX_RELEASE_ASSERT(
1452                     deviceStreamManager != nullptr,
1453                     "GPU device stream manager should be valid in order to use PME-PP direct "
1454                     "communications.");
1455             GMX_RELEASE_ASSERT(
1456                     deviceStreamManager->streamIsValid(DeviceStreamType::PmePpTransfer),
1457                     "GPU PP-PME stream should be valid in order to use GPU PME-PP direct "
1458                     "communications.");
1459             fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(
1460                     cr->mpi_comm_mysim, cr->dd->pme_nodeid, deviceStreamManager->context(),
1461                     deviceStreamManager->stream(DeviceStreamType::PmePpTransfer));
1462         }
1463
1464         fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo,
1465                                         runScheduleWork.simulationWork.useGpuNonbonded,
1466                                         deviceStreamManager.get(), &mtop, box, wcycle);
1467         // TODO: Move the logic below to a GPU bonded builder
1468         if (runScheduleWork.simulationWork.useGpuBonded)
1469         {
1470             GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
1471                                "GPU device stream manager should be valid in order to use GPU "
1472                                "version of bonded forces.");
1473             gpuBonded = std::make_unique<GpuBonded>(
1474                     mtop.ffparams, fr->ic->epsfac * fr->fudgeQQ, deviceStreamManager->context(),
1475                     deviceStreamManager->bondedStream(havePPDomainDecomposition(cr)), wcycle);
1476             fr->gpuBonded = gpuBonded.get();
1477         }
1478
1479         /* Initialize the mdAtoms structure.
1480          * mdAtoms is not filled with atom data,
1481          * as this can not be done now with domain decomposition.
1482          */
1483         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1484         if (globalState && thisRankHasPmeGpuTask)
1485         {
1486             // The pinning of coordinates in the global state object works, because we only use
1487             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1488             // points to the global state object without DD.
1489             // FIXME: MD and EM separately set up the local state - this should happen in the same
1490             // function, which should also perform the pinning.
1491             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1492         }
1493
1494         /* Initialize the virtual site communication */
1495         vsite = makeVirtualSitesHandler(mtop, cr, fr->pbcType);
1496
1497         calc_shifts(box, fr->shift_vec);
1498
1499         /* With periodic molecules the charge groups should be whole at start up
1500          * and the virtual sites should not be far from their proper positions.
1501          */
1502         if (!inputrec->bContinuation && MASTER(cr)
1503             && !(inputrec->pbcType != PbcType::No && inputrec->bPeriodicMols))
1504         {
1505             /* Make molecules whole at start of run */
1506             if (fr->pbcType != PbcType::No)
1507             {
1508                 do_pbc_first_mtop(fplog, inputrec->pbcType, box, &mtop, globalState->x.rvec_array());
1509             }
1510             if (vsite)
1511             {
1512                 /* Correct initial vsite positions are required
1513                  * for the initial distribution in the domain decomposition
1514                  * and for the initial shell prediction.
1515                  */
1516                 constructVirtualSitesGlobal(mtop, globalState->x);
1517             }
1518         }
1519
1520         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1521         {
1522             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1523             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1524         }
1525     }
1526     else
1527     {
1528         /* This is a PME only node */
1529
1530         GMX_ASSERT(globalState == nullptr,
1531                    "We don't need the state on a PME only rank and expect it to be unitialized");
1532
1533         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1534         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1535     }
1536
1537     gmx_pme_t* sepPmeData = nullptr;
1538     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1539     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr),
1540                "Double-checking that only PME-only ranks have no forcerec");
1541     gmx_pme_t*& pmedata = fr ? fr->pmedata : sepPmeData;
1542
1543     // TODO should live in ewald module once its testing is improved
1544     //
1545     // Later, this program could contain kernels that might be later
1546     // re-used as auto-tuning progresses, or subsequent simulations
1547     // are invoked.
1548     PmeGpuProgramStorage pmeGpuProgram;
1549     if (thisRankHasPmeGpuTask)
1550     {
1551         GMX_RELEASE_ASSERT(
1552                 (deviceStreamManager != nullptr),
1553                 "GPU device stream manager should be initialized in order to use GPU for PME.");
1554         GMX_RELEASE_ASSERT((deviceInfo != nullptr),
1555                            "GPU device should be initialized in order to use GPU for PME.");
1556         pmeGpuProgram = buildPmeGpuProgram(deviceStreamManager->context());
1557     }
1558
1559     /* Initiate PME if necessary,
1560      * either on all nodes or on dedicated PME nodes only. */
1561     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1562     {
1563         if (mdAtoms && mdAtoms->mdatoms())
1564         {
1565             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1566             if (EVDW_PME(inputrec->vdwtype))
1567             {
1568                 nTypePerturbed = mdAtoms->mdatoms()->nTypePerturbed;
1569             }
1570         }
1571         if (cr->npmenodes > 0)
1572         {
1573             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1574             gmx_bcast(sizeof(nChargePerturbed), &nChargePerturbed, cr->mpi_comm_mysim);
1575             gmx_bcast(sizeof(nTypePerturbed), &nTypePerturbed, cr->mpi_comm_mysim);
1576         }
1577
1578         if (thisRankHasDuty(cr, DUTY_PME))
1579         {
1580             try
1581             {
1582                 // TODO: This should be in the builder.
1583                 GMX_RELEASE_ASSERT(!runScheduleWork.simulationWork.useGpuPme
1584                                            || (deviceStreamManager != nullptr),
1585                                    "Device stream manager should be valid in order to use GPU "
1586                                    "version of PME.");
1587                 GMX_RELEASE_ASSERT(
1588                         !runScheduleWork.simulationWork.useGpuPme
1589                                 || deviceStreamManager->streamIsValid(DeviceStreamType::Pme),
1590                         "GPU PME stream should be valid in order to use GPU version of PME.");
1591
1592                 const DeviceContext* deviceContext = runScheduleWork.simulationWork.useGpuPme
1593                                                              ? &deviceStreamManager->context()
1594                                                              : nullptr;
1595                 const DeviceStream* pmeStream =
1596                         runScheduleWork.simulationWork.useGpuPme
1597                                 ? &deviceStreamManager->stream(DeviceStreamType::Pme)
1598                                 : nullptr;
1599
1600                 pmedata = gmx_pme_init(cr, getNumPmeDomains(cr->dd), inputrec.get(),
1601                                        nChargePerturbed != 0, nTypePerturbed != 0,
1602                                        mdrunOptions.reproducible, ewaldcoeff_q, ewaldcoeff_lj,
1603                                        gmx_omp_nthreads_get(emntPME), pmeRunMode, nullptr,
1604                                        deviceContext, pmeStream, pmeGpuProgram.get(), mdlog);
1605             }
1606             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1607         }
1608     }
1609
1610
1611     if (EI_DYNAMICS(inputrec->eI))
1612     {
1613         /* Turn on signal handling on all nodes */
1614         /*
1615          * (A user signal from the PME nodes (if any)
1616          * is communicated to the PP nodes.
1617          */
1618         signal_handler_install();
1619     }
1620
1621     pull_t* pull_work = nullptr;
1622     if (thisRankHasDuty(cr, DUTY_PP))
1623     {
1624         /* Assumes uniform use of the number of OpenMP threads */
1625         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
1626
1627         if (inputrec->bPull)
1628         {
1629             /* Initialize pull code */
1630             pull_work = init_pull(fplog, inputrec->pull.get(), inputrec.get(), &mtop, cr, &atomSets,
1631                                   inputrec->fepvals->init_lambda);
1632             if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
1633             {
1634                 initPullHistory(pull_work, &observablesHistory);
1635             }
1636             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1637             {
1638                 init_pull_output_files(pull_work, filenames.size(), filenames.data(), oenv, startingBehavior);
1639             }
1640         }
1641
1642         std::unique_ptr<EnforcedRotation> enforcedRotation;
1643         if (inputrec->bRot)
1644         {
1645             /* Initialize enforced rotation code */
1646             enforcedRotation = init_rot(fplog, inputrec.get(), filenames.size(), filenames.data(),
1647                                         cr, &atomSets, globalState.get(), &mtop, oenv, mdrunOptions,
1648                                         startingBehavior);
1649         }
1650
1651         t_swap* swap = nullptr;
1652         if (inputrec->eSwapCoords != eswapNO)
1653         {
1654             /* Initialize ion swapping code */
1655             swap = init_swapcoords(fplog, inputrec.get(),
1656                                    opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
1657                                    &mtop, globalState.get(), &observablesHistory, cr, &atomSets,
1658                                    oenv, mdrunOptions, startingBehavior);
1659         }
1660
1661         /* Let makeConstraints know whether we have essential dynamics constraints. */
1662         auto constr = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics, fplog, cr,
1663                                       ms, &nrnb, wcycle, fr->bMolPBC);
1664
1665         /* Energy terms and groups */
1666         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
1667                              inputrec->fepvals->n_lambda);
1668
1669         /* Kinetic energy data */
1670         gmx_ekindata_t ekind;
1671         init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel);
1672
1673         /* Set up interactive MD (IMD) */
1674         auto imdSession =
1675                 makeImdSession(inputrec.get(), cr, wcycle, &enerd, ms, &mtop, mdlog,
1676                                MASTER(cr) ? globalState->x.rvec_array() : nullptr, filenames.size(),
1677                                filenames.data(), oenv, mdrunOptions.imdOptions, startingBehavior);
1678
1679         if (DOMAINDECOMP(cr))
1680         {
1681             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1682             /* This call is not included in init_domain_decomposition mainly
1683              * because fr->cginfo_mb is set later.
1684              */
1685             dd_init_bondeds(fplog, cr->dd, mtop, vsite.get(), inputrec.get(),
1686                             domdecOptions.checkBondedInteractions, fr->cginfo_mb);
1687         }
1688
1689         if (runScheduleWork.simulationWork.useGpuBufferOps)
1690         {
1691             fr->gpuForceReduction[gmx::AtomLocality::Local] = std::make_unique<gmx::GpuForceReduction>(
1692                     deviceStreamManager->context(),
1693                     deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedLocal), wcycle);
1694             fr->gpuForceReduction[gmx::AtomLocality::NonLocal] = std::make_unique<gmx::GpuForceReduction>(
1695                     deviceStreamManager->context(),
1696                     deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedNonLocal), wcycle);
1697         }
1698
1699         std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
1700         if (gpusWereDetected
1701             && ((runScheduleWork.simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME))
1702                 || runScheduleWork.simulationWork.useGpuBufferOps))
1703         {
1704             GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator)
1705                                                       ? GpuApiCallBehavior::Async
1706                                                       : GpuApiCallBehavior::Sync;
1707             GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
1708                                "GPU device stream manager should be initialized to use GPU.");
1709             stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
1710                     *deviceStreamManager, transferKind, pme_gpu_get_block_size(fr->pmedata), wcycle);
1711             fr->stateGpu = stateGpu.get();
1712         }
1713
1714         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
1715         SimulatorBuilder simulatorBuilder;
1716
1717         simulatorBuilder.add(SimulatorStateData(globalState.get(), &observablesHistory, &enerd, &ekind));
1718         simulatorBuilder.add(std::move(membedHolder));
1719         simulatorBuilder.add(std::move(stopHandlerBuilder_));
1720         simulatorBuilder.add(SimulatorConfig(mdrunOptions, startingBehavior, &runScheduleWork));
1721
1722
1723         simulatorBuilder.add(SimulatorEnv(fplog, cr, ms, mdlog, oenv));
1724         simulatorBuilder.add(Profiling(&nrnb, walltime_accounting, wcycle));
1725         simulatorBuilder.add(ConstraintsParam(
1726                 constr.get(), enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
1727                 vsite.get()));
1728         // TODO: Separate `fr` to a separate add, and make the `build` handle the coupling sensibly.
1729         simulatorBuilder.add(LegacyInput(static_cast<int>(filenames.size()), filenames.data(),
1730                                          inputrec.get(), fr));
1731         simulatorBuilder.add(ReplicaExchangeParameters(replExParams));
1732         simulatorBuilder.add(InteractiveMD(imdSession.get()));
1733         simulatorBuilder.add(SimulatorModules(mdModules_->outputProvider(), mdModules_->notifier()));
1734         simulatorBuilder.add(CenterOfMassPulling(pull_work));
1735         // Todo move to an MDModule
1736         simulatorBuilder.add(IonSwapping(swap));
1737         simulatorBuilder.add(TopologyData(&mtop, mdAtoms.get()));
1738         simulatorBuilder.add(BoxDeformationHandle(deform.get()));
1739         simulatorBuilder.add(std::move(modularSimulatorCheckpointData));
1740
1741         // build and run simulator object based on user-input
1742         auto simulator = simulatorBuilder.build(useModularSimulator);
1743         simulator->run();
1744
1745         if (fr->pmePpCommGpu)
1746         {
1747             // destroy object since it is no longer required. (This needs to be done while the GPU context still exists.)
1748             fr->pmePpCommGpu.reset();
1749         }
1750
1751         if (inputrec->bPull)
1752         {
1753             finish_pull(pull_work);
1754         }
1755         finish_swapcoords(swap);
1756     }
1757     else
1758     {
1759         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
1760         /* do PME only */
1761         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
1762         gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec.get(), pmeRunMode,
1763                     deviceStreamManager.get());
1764     }
1765
1766     wallcycle_stop(wcycle, ewcRUN);
1767
1768     /* Finish up, write some stuff
1769      * if rerunMD, don't write last frame again
1770      */
1771     finish_run(fplog, mdlog, cr, inputrec.get(), &nrnb, wcycle, walltime_accounting,
1772                fr ? fr->nbv.get() : nullptr, pmedata, EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
1773
1774     // clean up cycle counter
1775     wallcycle_destroy(wcycle);
1776
1777     deviceStreamManager.reset(nullptr);
1778     // Free PME data
1779     if (pmedata)
1780     {
1781         gmx_pme_destroy(pmedata);
1782         pmedata = nullptr;
1783     }
1784
1785     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
1786     // before we destroy the GPU context(s)
1787     // Pinned buffers are associated with contexts in CUDA.
1788     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
1789     mdAtoms.reset(nullptr);
1790     globalState.reset(nullptr);
1791     mdModules_.reset(nullptr); // destruct force providers here as they might also use the GPU
1792     gpuBonded.reset(nullptr);
1793     /* Free pinned buffers in *fr */
1794     delete fr;
1795     fr = nullptr;
1796     // TODO convert to C++ so we can get rid of these frees
1797     sfree(disresdata);
1798     sfree(oriresdata);
1799
1800     if (!hwinfo->deviceInfoList.empty())
1801     {
1802         /* stop the GPU profiler (only CUDA) */
1803         stopGpuProfiler();
1804     }
1805
1806     /* With tMPI we need to wait for all ranks to finish deallocation before
1807      * destroying the CUDA context as some tMPI ranks may be sharing
1808      * GPU and context.
1809      *
1810      * This is not a concern in OpenCL where we use one context per rank.
1811      *
1812      * Note: it is safe to not call the barrier on the ranks which do not use GPU,
1813      * but it is easier and more futureproof to call it on the whole node.
1814      *
1815      * Note that this function needs to be called even if GPUs are not used
1816      * in this run because the PME ranks have no knowledge of whether GPUs
1817      * are used or not, but all ranks need to enter the barrier below.
1818      * \todo Remove this physical node barrier after making sure
1819      * that it's not needed anymore (with a shared GPU run).
1820      */
1821     if (GMX_THREAD_MPI)
1822     {
1823         physicalNodeComm.barrier();
1824     }
1825     releaseDevice(deviceInfo);
1826
1827     /* Does what it says */
1828     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
1829     walltime_accounting_destroy(walltime_accounting);
1830
1831     // Ensure log file content is written
1832     if (logFileHandle)
1833     {
1834         gmx_fio_flush(logFileHandle);
1835     }
1836
1837     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
1838      * exceptions were enabled before function was called. */
1839     if (bEnableFPE)
1840     {
1841         gmx_fedisableexcept();
1842     }
1843
1844     auto rc = static_cast<int>(gmx_get_stop_condition());
1845
1846 #if GMX_THREAD_MPI
1847     /* we need to join all threads. The sub-threads join when they
1848        exit this function, but the master thread needs to be told to
1849        wait for that. */
1850     if (MASTER(cr))
1851     {
1852         tMPI_Finalize();
1853     }
1854 #endif
1855     return rc;
1856 } // namespace gmx
1857
1858 Mdrunner::~Mdrunner()
1859 {
1860     // Clean up of the Manager.
1861     // This will end up getting called on every thread-MPI rank, which is unnecessary,
1862     // but okay as long as threads synchronize some time before adding or accessing
1863     // a new set of restraints.
1864     if (restraintManager_)
1865     {
1866         restraintManager_->clear();
1867         GMX_ASSERT(restraintManager_->countRestraints() == 0,
1868                    "restraints added during runner life time should be cleared at runner "
1869                    "destruction.");
1870     }
1871 };
1872
1873 void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller, const std::string& name)
1874 {
1875     GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
1876     // Not sure if this should be logged through the md logger or something else,
1877     // but it is helpful to have some sort of INFO level message sent somewhere.
1878     //    std::cout << "Registering restraint named " << name << std::endl;
1879
1880     // When multiple restraints are used, it may be wasteful to register them separately.
1881     // Maybe instead register an entire Restraint Manager as a force provider.
1882     restraintManager_->addToSpec(std::move(puller), name);
1883 }
1884
1885 Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules) : mdModules_(std::move(mdModules)) {}
1886
1887 Mdrunner::Mdrunner(Mdrunner&&) noexcept = default;
1888
1889 Mdrunner& Mdrunner::operator=(Mdrunner&& /*handle*/) noexcept = default;
1890
1891 class Mdrunner::BuilderImplementation
1892 {
1893 public:
1894     BuilderImplementation() = delete;
1895     BuilderImplementation(std::unique_ptr<MDModules> mdModules, compat::not_null<SimulationContext*> context);
1896     ~BuilderImplementation();
1897
1898     BuilderImplementation& setExtraMdrunOptions(const MdrunOptions& options,
1899                                                 real                forceWarningThreshold,
1900                                                 StartingBehavior    startingBehavior);
1901
1902     void addDomdec(const DomdecOptions& options);
1903
1904     void addInput(SimulationInputHandle inputHolder);
1905
1906     void addVerletList(int nstlist);
1907
1908     void addReplicaExchange(const ReplicaExchangeParameters& params);
1909
1910     void addNonBonded(const char* nbpu_opt);
1911
1912     void addPME(const char* pme_opt_, const char* pme_fft_opt_);
1913
1914     void addBondedTaskAssignment(const char* bonded_opt);
1915
1916     void addUpdateTaskAssignment(const char* update_opt);
1917
1918     void addHardwareOptions(const gmx_hw_opt_t& hardwareOptions);
1919
1920     void addFilenames(ArrayRef<const t_filenm> filenames);
1921
1922     void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
1923
1924     void addLogFile(t_fileio* logFileHandle);
1925
1926     void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
1927
1928     Mdrunner build();
1929
1930 private:
1931     // Default parameters copied from runner.h
1932     // \todo Clarify source(s) of default parameters.
1933
1934     const char* nbpu_opt_    = nullptr;
1935     const char* pme_opt_     = nullptr;
1936     const char* pme_fft_opt_ = nullptr;
1937     const char* bonded_opt_  = nullptr;
1938     const char* update_opt_  = nullptr;
1939
1940     MdrunOptions mdrunOptions_;
1941
1942     DomdecOptions domdecOptions_;
1943
1944     ReplicaExchangeParameters replicaExchangeParameters_;
1945
1946     //! Command-line override for the duration of a neighbor list with the Verlet scheme.
1947     int nstlist_ = 0;
1948
1949     //! World communicator, used for hardware detection and task assignment
1950     MPI_Comm libraryWorldCommunicator_ = MPI_COMM_NULL;
1951
1952     //! Multisim communicator handle.
1953     gmx_multisim_t* multiSimulation_;
1954
1955     //! mdrun communicator
1956     MPI_Comm simulationCommunicator_ = MPI_COMM_NULL;
1957
1958     //! Print a warning if any force is larger than this (in kJ/mol nm).
1959     real forceWarningThreshold_ = -1;
1960
1961     //! Whether the simulation will start afresh, or restart with/without appending.
1962     StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
1963
1964     //! The modules that comprise the functionality of mdrun.
1965     std::unique_ptr<MDModules> mdModules_;
1966
1967     //! \brief Parallelism information.
1968     gmx_hw_opt_t hardwareOptions_;
1969
1970     //! filename options for simulation.
1971     ArrayRef<const t_filenm> filenames_;
1972
1973     /*! \brief Handle to output environment.
1974      *
1975      * \todo gmx_output_env_t needs lifetime management.
1976      */
1977     gmx_output_env_t* outputEnvironment_ = nullptr;
1978
1979     /*! \brief Non-owning handle to MD log file.
1980      *
1981      * \todo Context should own output facilities for client.
1982      * \todo Improve log file handle management.
1983      * \internal
1984      * Code managing the FILE* relies on the ability to set it to
1985      * nullptr to check whether the filehandle is valid.
1986      */
1987     t_fileio* logFileHandle_ = nullptr;
1988
1989     /*!
1990      * \brief Builder for simulation stop signal handler.
1991      */
1992     std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
1993
1994     /*!
1995      * \brief Sources for initial simulation state.
1996      *
1997      * See issue #3652 for near-term refinements to the SimulationInput interface.
1998      *
1999      * See issue #3379 for broader discussion on API aspects of simulation inputs and outputs.
2000      */
2001     SimulationInputHandle inputHolder_;
2002 };
2003
2004 Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules,
2005                                                        compat::not_null<SimulationContext*> context) :
2006     mdModules_(std::move(mdModules))
2007 {
2008     libraryWorldCommunicator_ = context->libraryWorldCommunicator_;
2009     simulationCommunicator_   = context->simulationCommunicator_;
2010     multiSimulation_          = context->multiSimulation_.get();
2011 }
2012
2013 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
2014
2015 Mdrunner::BuilderImplementation&
2016 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions&    options,
2017                                                       const real             forceWarningThreshold,
2018                                                       const StartingBehavior startingBehavior)
2019 {
2020     mdrunOptions_          = options;
2021     forceWarningThreshold_ = forceWarningThreshold;
2022     startingBehavior_      = startingBehavior;
2023     return *this;
2024 }
2025
2026 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions& options)
2027 {
2028     domdecOptions_ = options;
2029 }
2030
2031 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
2032 {
2033     nstlist_ = nstlist;
2034 }
2035
2036 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters& params)
2037 {
2038     replicaExchangeParameters_ = params;
2039 }
2040
2041 Mdrunner Mdrunner::BuilderImplementation::build()
2042 {
2043     auto newRunner = Mdrunner(std::move(mdModules_));
2044
2045     newRunner.mdrunOptions     = mdrunOptions_;
2046     newRunner.pforce           = forceWarningThreshold_;
2047     newRunner.startingBehavior = startingBehavior_;
2048     newRunner.domdecOptions    = domdecOptions_;
2049
2050     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
2051     newRunner.hw_opt = hardwareOptions_;
2052
2053     // No invariant to check. This parameter exists to optionally override other behavior.
2054     newRunner.nstlist_cmdline = nstlist_;
2055
2056     newRunner.replExParams = replicaExchangeParameters_;
2057
2058     newRunner.filenames = filenames_;
2059
2060     newRunner.libraryWorldCommunicator = libraryWorldCommunicator_;
2061
2062     newRunner.simulationCommunicator = simulationCommunicator_;
2063
2064     // nullptr is a valid value for the multisim handle
2065     newRunner.ms = multiSimulation_;
2066
2067     if (inputHolder_)
2068     {
2069         newRunner.inputHolder_ = std::move(inputHolder_);
2070     }
2071     else
2072     {
2073         GMX_THROW(gmx::APIError("MdrunnerBuilder::addInput() is required before build()."));
2074     }
2075
2076     // \todo Clarify ownership and lifetime management for gmx_output_env_t
2077     // \todo Update sanity checking when output environment has clearly specified invariants.
2078     // Initialization and default values for oenv are not well specified in the current version.
2079     if (outputEnvironment_)
2080     {
2081         newRunner.oenv = outputEnvironment_;
2082     }
2083     else
2084     {
2085         GMX_THROW(gmx::APIError(
2086                 "MdrunnerBuilder::addOutputEnvironment() is required before build()"));
2087     }
2088
2089     newRunner.logFileHandle = logFileHandle_;
2090
2091     if (nbpu_opt_)
2092     {
2093         newRunner.nbpu_opt = nbpu_opt_;
2094     }
2095     else
2096     {
2097         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
2098     }
2099
2100     if (pme_opt_ && pme_fft_opt_)
2101     {
2102         newRunner.pme_opt     = pme_opt_;
2103         newRunner.pme_fft_opt = pme_fft_opt_;
2104     }
2105     else
2106     {
2107         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
2108     }
2109
2110     if (bonded_opt_)
2111     {
2112         newRunner.bonded_opt = bonded_opt_;
2113     }
2114     else
2115     {
2116         GMX_THROW(gmx::APIError(
2117                 "MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
2118     }
2119
2120     if (update_opt_)
2121     {
2122         newRunner.update_opt = update_opt_;
2123     }
2124     else
2125     {
2126         GMX_THROW(gmx::APIError(
2127                 "MdrunnerBuilder::addUpdateTaskAssignment() is required before build()  "));
2128     }
2129
2130
2131     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
2132
2133     if (stopHandlerBuilder_)
2134     {
2135         newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
2136     }
2137     else
2138     {
2139         newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
2140     }
2141
2142     return newRunner;
2143 }
2144
2145 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
2146 {
2147     nbpu_opt_ = nbpu_opt;
2148 }
2149
2150 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt, const char* pme_fft_opt)
2151 {
2152     pme_opt_     = pme_opt;
2153     pme_fft_opt_ = pme_fft_opt;
2154 }
2155
2156 void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
2157 {
2158     bonded_opt_ = bonded_opt;
2159 }
2160
2161 void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt)
2162 {
2163     update_opt_ = update_opt;
2164 }
2165
2166 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions)
2167 {
2168     hardwareOptions_ = hardwareOptions;
2169 }
2170
2171 void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
2172 {
2173     filenames_ = filenames;
2174 }
2175
2176 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2177 {
2178     outputEnvironment_ = outputEnvironment;
2179 }
2180
2181 void Mdrunner::BuilderImplementation::addLogFile(t_fileio* logFileHandle)
2182 {
2183     logFileHandle_ = logFileHandle;
2184 }
2185
2186 void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2187 {
2188     stopHandlerBuilder_ = std::move(builder);
2189 }
2190
2191 void Mdrunner::BuilderImplementation::addInput(SimulationInputHandle inputHolder)
2192 {
2193     inputHolder_ = std::move(inputHolder);
2194 }
2195
2196 MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
2197                                  compat::not_null<SimulationContext*> context) :
2198     impl_{ std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules), context) }
2199 {
2200 }
2201
2202 MdrunnerBuilder::~MdrunnerBuilder() = default;
2203
2204 MdrunnerBuilder& MdrunnerBuilder::addSimulationMethod(const MdrunOptions&    options,
2205                                                       real                   forceWarningThreshold,
2206                                                       const StartingBehavior startingBehavior)
2207 {
2208     impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
2209     return *this;
2210 }
2211
2212 MdrunnerBuilder& MdrunnerBuilder::addDomainDecomposition(const DomdecOptions& options)
2213 {
2214     impl_->addDomdec(options);
2215     return *this;
2216 }
2217
2218 MdrunnerBuilder& MdrunnerBuilder::addNeighborList(int nstlist)
2219 {
2220     impl_->addVerletList(nstlist);
2221     return *this;
2222 }
2223
2224 MdrunnerBuilder& MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters& params)
2225 {
2226     impl_->addReplicaExchange(params);
2227     return *this;
2228 }
2229
2230 MdrunnerBuilder& MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
2231 {
2232     impl_->addNonBonded(nbpu_opt);
2233     return *this;
2234 }
2235
2236 MdrunnerBuilder& MdrunnerBuilder::addElectrostatics(const char* pme_opt, const char* pme_fft_opt)
2237 {
2238     // The builder method may become more general in the future, but in this version,
2239     // parameters for PME electrostatics are both required and the only parameters
2240     // available.
2241     if (pme_opt && pme_fft_opt)
2242     {
2243         impl_->addPME(pme_opt, pme_fft_opt);
2244     }
2245     else
2246     {
2247         GMX_THROW(
2248                 gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
2249     }
2250     return *this;
2251 }
2252
2253 MdrunnerBuilder& MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
2254 {
2255     impl_->addBondedTaskAssignment(bonded_opt);
2256     return *this;
2257 }
2258
2259 MdrunnerBuilder& MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt)
2260 {
2261     impl_->addUpdateTaskAssignment(update_opt);
2262     return *this;
2263 }
2264
2265 Mdrunner MdrunnerBuilder::build()
2266 {
2267     return impl_->build();
2268 }
2269
2270 MdrunnerBuilder& MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions)
2271 {
2272     impl_->addHardwareOptions(hardwareOptions);
2273     return *this;
2274 }
2275
2276 MdrunnerBuilder& MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
2277 {
2278     impl_->addFilenames(filenames);
2279     return *this;
2280 }
2281
2282 MdrunnerBuilder& MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2283 {
2284     impl_->addOutputEnvironment(outputEnvironment);
2285     return *this;
2286 }
2287
2288 MdrunnerBuilder& MdrunnerBuilder::addLogFile(t_fileio* logFileHandle)
2289 {
2290     impl_->addLogFile(logFileHandle);
2291     return *this;
2292 }
2293
2294 MdrunnerBuilder& MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2295 {
2296     impl_->addStopHandlerBuilder(std::move(builder));
2297     return *this;
2298 }
2299
2300 MdrunnerBuilder& MdrunnerBuilder::addInput(SimulationInputHandle input)
2301 {
2302     impl_->addInput(std::move(input));
2303     return *this;
2304 }
2305
2306 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder&&) noexcept = default;
2307
2308 MdrunnerBuilder& MdrunnerBuilder::operator=(MdrunnerBuilder&&) noexcept = default;
2309
2310 } // namespace gmx