src/gromacs/mdrun/runner.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the MD runner routine calling all integrators.
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include "runner.h"
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cinttypes>
  52 #include <csignal>
  53 #include <cstdlib>
  54 #include <cstring>
  55
  56 #include <algorithm>
  57
  58 #include "gromacs/commandline/filenm.h"
  59 #include "gromacs/compat/make_unique.h"
  60 #include "gromacs/domdec/domdec.h"
  61 #include "gromacs/domdec/domdec_struct.h"
  62 #include "gromacs/domdec/localatomsetmanager.h"
  63 #include "gromacs/ewald/ewald-utils.h"
  64 #include "gromacs/ewald/pme.h"
  65 #include "gromacs/ewald/pme-gpu-program.h"
  66 #include "gromacs/fileio/checkpoint.h"
  67 #include "gromacs/fileio/oenv.h"
  68 #include "gromacs/fileio/tpxio.h"
  69 #include "gromacs/gmxlib/network.h"
  70 #include "gromacs/gmxlib/nrnb.h"
  71 #include "gromacs/gpu_utils/clfftinitializer.h"
  72 #include "gromacs/gpu_utils/gpu_utils.h"
  73 #include "gromacs/hardware/cpuinfo.h"
  74 #include "gromacs/hardware/detecthardware.h"
  75 #include "gromacs/hardware/printhardware.h"
  76 #include "gromacs/listed-forces/disre.h"
  77 #include "gromacs/listed-forces/orires.h"
  78 #include "gromacs/math/functions.h"
  79 #include "gromacs/math/utilities.h"
  80 #include "gromacs/math/vec.h"
  81 #include "gromacs/mdlib/boxdeformation.h"
  82 #include "gromacs/mdlib/calc_verletbuf.h"
  83 #include "gromacs/mdlib/forcerec.h"
  84 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  85 #include "gromacs/mdlib/makeconstraints.h"
  86 #include "gromacs/mdlib/md_support.h"
  87 #include "gromacs/mdlib/mdatoms.h"
  88 #include "gromacs/mdlib/mdrun.h"
  89 #include "gromacs/mdlib/membed.h"
  90 #include "gromacs/mdlib/nb_verlet.h"
  91 #include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
  92 #include "gromacs/mdlib/nbnxn_search.h"
  93 #include "gromacs/mdlib/nbnxn_tuning.h"
  94 #include "gromacs/mdlib/qmmm.h"
  95 #include "gromacs/mdlib/sighandler.h"
  96 #include "gromacs/mdlib/sim_util.h"
  97 #include "gromacs/mdlib/stophandler.h"
  98 #include "gromacs/mdrun/logging.h"
  99 #include "gromacs/mdrun/multisim.h"
 100 #include "gromacs/mdrun/simulationcontext.h"
 101 #include "gromacs/mdrunutility/mdmodules.h"
 102 #include "gromacs/mdrunutility/threadaffinity.h"
 103 #include "gromacs/mdtypes/commrec.h"
 104 #include "gromacs/mdtypes/fcdata.h"
 105 #include "gromacs/mdtypes/inputrec.h"
 106 #include "gromacs/mdtypes/md_enums.h"
 107 #include "gromacs/mdtypes/observableshistory.h"
 108 #include "gromacs/mdtypes/state.h"
 109 #include "gromacs/pbcutil/pbc.h"
 110 #include "gromacs/pulling/output.h"
 111 #include "gromacs/pulling/pull.h"
 112 #include "gromacs/pulling/pull_rotation.h"
 113 #include "gromacs/swap/swapcoords.h"
 114 #include "gromacs/taskassignment/decidegpuusage.h"
 115 #include "gromacs/taskassignment/resourcedivision.h"
 116 #include "gromacs/taskassignment/taskassignment.h"
 117 #include "gromacs/taskassignment/usergpuids.h"
 118 #include "gromacs/timing/wallcycle.h"
 119 #include "gromacs/topology/mtop_util.h"
 120 #include "gromacs/trajectory/trajectoryframe.h"
 121 #include "gromacs/utility/basenetwork.h"
 122 #include "gromacs/utility/cstringutil.h"
 123 #include "gromacs/utility/exceptions.h"
 124 #include "gromacs/utility/fatalerror.h"
 125 #include "gromacs/utility/filestream.h"
 126 #include "gromacs/utility/gmxassert.h"
 127 #include "gromacs/utility/gmxmpi.h"
 128 #include "gromacs/utility/logger.h"
 129 #include "gromacs/utility/loggerbuilder.h"
 130 #include "gromacs/utility/physicalnodecommunicator.h"
 131 #include "gromacs/utility/pleasecite.h"
 132 #include "gromacs/utility/programcontext.h"
 133 #include "gromacs/utility/smalloc.h"
 134 #include "gromacs/utility/stringutil.h"
 135
 136 #include "integrator.h"
 137 #include "replicaexchange.h"
 138
 139 #if GMX_FAHCORE
 140 #include "corewrap.h"
 141 #endif
 142
 143 namespace gmx
 144 {
 145
 146 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
 147  *
 148  * Used to ensure that the master thread does not modify mdrunner during copy
 149  * on the spawned threads. */
 150 static void threadMpiMdrunnerAccessBarrier()
 151 {
 152 #if GMX_THREAD_MPI
 153     MPI_Barrier(MPI_COMM_WORLD);
 154 #endif
 155 }
 156
 157 Mdrunner Mdrunner::cloneOnSpawnedThread() const
 158 {
 159     auto newRunner = Mdrunner();
 160
 161     // Copy original cr pointer before master thread can pass the thread barrier
 162     newRunner.cr  = reinitialize_commrec_for_this_thread(cr);
 163
 164     // Copy members of master runner.
 165     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
 166     // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
 167     newRunner.hw_opt    = hw_opt;
 168     newRunner.filenames = filenames;
 169
 170     newRunner.oenv            = oenv;
 171     newRunner.mdrunOptions    = mdrunOptions;
 172     newRunner.domdecOptions   = domdecOptions;
 173     newRunner.nbpu_opt        = nbpu_opt;
 174     newRunner.pme_opt         = pme_opt;
 175     newRunner.pme_fft_opt     = pme_fft_opt;
 176     newRunner.nstlist_cmdline = nstlist_cmdline;
 177     newRunner.replExParams    = replExParams;
 178     newRunner.pforce          = pforce;
 179     newRunner.ms              = ms;
 180
 181     threadMpiMdrunnerAccessBarrier();
 182
 183     GMX_RELEASE_ASSERT(!MASTER(newRunner.cr), "reinitializeOnSpawnedThread should only be called on spawned threads");
 184
 185     // Only the master rank writes to the log file
 186     newRunner.fplog = nullptr;
 187
 188     return newRunner;
 189 }
 190
 191 /*! \brief The callback used for running on spawned threads.
 192  *
 193  * Obtains the pointer to the master mdrunner object from the one
 194  * argument permitted to the thread-launch API call, copies it to make
 195  * a new runner for this thread, reinitializes necessary data, and
 196  * proceeds to the simulation. */
 197 static void mdrunner_start_fn(const void *arg)
 198 {
 199     try
 200     {
 201         auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg);
 202         /* copy the arg list to make sure that it's thread-local. This
 203            doesn't copy pointed-to items, of course; fnm, cr and fplog
 204            are reset in the call below, all others should be const. */
 205         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
 206         mdrunner.mdrunner();
 207     }
 208     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 209 }
 210
 211
 212 /*! \brief Start thread-MPI threads.
 213  *
 214  * Called by mdrunner() to start a specific number of threads
 215  * (including the main thread) for thread-parallel runs. This in turn
 216  * calls mdrunner() for each thread. All options are the same as for
 217  * mdrunner(). */
 218 t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const
 219 {
 220
 221     /* first check whether we even need to start tMPI */
 222     if (numThreadsToLaunch < 2)
 223     {
 224         return cr;
 225     }
 226
 227 #if GMX_THREAD_MPI
 228     /* now spawn new threads that start mdrunner_start_fn(), while
 229        the main thread returns, we set thread affinity later */
 230     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
 231                      mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
 232     {
 233         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
 234     }
 235
 236     threadMpiMdrunnerAccessBarrier();
 237 #else
 238     GMX_UNUSED_VALUE(mdrunner_start_fn);
 239 #endif
 240
 241     return reinitialize_commrec_for_this_thread(cr);
 242 }
 243
 244 }  // namespace gmx
 245
 246 /*! \brief Initialize variables for Verlet scheme simulation */
 247 static void prepare_verlet_scheme(FILE                           *fplog,
 248                                   t_commrec                      *cr,
 249                                   t_inputrec                     *ir,
 250                                   int                             nstlist_cmdline,
 251                                   const gmx_mtop_t               *mtop,
 252                                   const matrix                    box,
 253                                   bool                            makeGpuPairList,
 254                                   const gmx::CpuInfo             &cpuinfo)
 255 {
 256     /* For NVE simulations, we will retain the initial list buffer */
 257     if (EI_DYNAMICS(ir->eI) &&
 258         ir->verletbuf_tol > 0 &&
 259         !(EI_MD(ir->eI) && ir->etc == etcNO))
 260     {
 261         /* Update the Verlet buffer size for the current run setup */
 262
 263         /* Here we assume SIMD-enabled kernels are being used. But as currently
 264          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 265          * and 4x2 gives a larger buffer than 4x4, this is ok.
 266          */
 267         ListSetupType      listType  = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
 268         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
 269
 270         real               rlist_new;
 271         calc_verlet_buffer_size(mtop, det(box), ir, ir->nstlist, ir->nstlist - 1, -1, &listSetup, nullptr, &rlist_new);
 272
 273         if (rlist_new != ir->rlist)
 274         {
 275             if (fplog != nullptr)
 276             {
 277                 fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 278                         ir->rlist, rlist_new,
 279                         listSetup.cluster_size_i, listSetup.cluster_size_j);
 280             }
 281             ir->rlist     = rlist_new;
 282         }
 283     }
 284
 285     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
 286     {
 287         gmx_fatal(FARGS, "Can not set nstlist without %s",
 288                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
 289     }
 290
 291     if (EI_DYNAMICS(ir->eI))
 292     {
 293         /* Set or try nstlist values */
 294         increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
 295     }
 296 }
 297
 298 /*! \brief Override the nslist value in inputrec
 299  *
 300  * with value passed on the command line (if any)
 301  */
 302 static void override_nsteps_cmdline(const gmx::MDLogger &mdlog,
 303                                     int64_t              nsteps_cmdline,
 304                                     t_inputrec          *ir)
 305 {
 306     assert(ir);
 307
 308     /* override with anything else than the default -2 */
 309     if (nsteps_cmdline > -2)
 310     {
 311         char sbuf_steps[STEPSTRSIZE];
 312         char sbuf_msg[STRLEN];
 313
 314         ir->nsteps = nsteps_cmdline;
 315         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
 316         {
 317             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
 318                     gmx_step_str(nsteps_cmdline, sbuf_steps),
 319                     fabs(nsteps_cmdline*ir->delta_t));
 320         }
 321         else
 322         {
 323             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
 324                     gmx_step_str(nsteps_cmdline, sbuf_steps));
 325         }
 326
 327         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
 328     }
 329     else if (nsteps_cmdline < -2)
 330     {
 331         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64,
 332                   nsteps_cmdline);
 333     }
 334     /* Do nothing if nsteps_cmdline == -2 */
 335 }
 336
 337 namespace gmx
 338 {
 339
 340 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
 341  *
 342  * If not, and if a warning may be issued, logs a warning about
 343  * falling back to CPU code. With thread-MPI, only the first
 344  * call to this function should have \c issueWarning true. */
 345 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
 346                                                const t_inputrec *ir,
 347                                                bool              issueWarning)
 348 {
 349     if (ir->opts.ngener - ir->nwall > 1)
 350     {
 351         /* The GPU code does not support more than one energy group.
 352          * If the user requested GPUs explicitly, a fatal error is given later.
 353          */
 354         if (issueWarning)
 355         {
 356             GMX_LOG(mdlog.warning).asParagraph()
 357                 .appendText("Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
 358                             "For better performance, run on the GPU without energy groups and then do "
 359                             "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.");
 360         }
 361         return false;
 362     }
 363     return true;
 364 }
 365
 366 //! Initializes the logger for mdrun.
 367 static gmx::LoggerOwner buildLogger(FILE *fplog, const t_commrec *cr)
 368 {
 369     gmx::LoggerBuilder builder;
 370     if (fplog != nullptr)
 371     {
 372         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
 373     }
 374     if (cr == nullptr || SIMMASTER(cr))
 375     {
 376         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning,
 377                                 &gmx::TextOutputFile::standardError());
 378     }
 379     return builder.build();
 380 }
 381
 382 //! Make a TaskTarget from an mdrun argument string.
 383 static TaskTarget findTaskTarget(const char *optionString)
 384 {
 385     TaskTarget returnValue = TaskTarget::Auto;
 386
 387     if (strncmp(optionString, "auto", 3) == 0)
 388     {
 389         returnValue = TaskTarget::Auto;
 390     }
 391     else if (strncmp(optionString, "cpu", 3) == 0)
 392     {
 393         returnValue = TaskTarget::Cpu;
 394     }
 395     else if (strncmp(optionString, "gpu", 3) == 0)
 396     {
 397         returnValue = TaskTarget::Gpu;
 398     }
 399     else
 400     {
 401         GMX_ASSERT(false, "Option string should have been checked for sanity already");
 402     }
 403
 404     return returnValue;
 405 }
 406
 407 int Mdrunner::mdrunner()
 408 {
 409     matrix                    box;
 410     t_nrnb                   *nrnb;
 411     t_forcerec               *fr               = nullptr;
 412     t_fcdata                 *fcd              = nullptr;
 413     real                      ewaldcoeff_q     = 0;
 414     real                      ewaldcoeff_lj    = 0;
 415     int                       nChargePerturbed = -1, nTypePerturbed = 0;
 416     gmx_wallcycle_t           wcycle;
 417     gmx_walltime_accounting_t walltime_accounting = nullptr;
 418     int                       rc;
 419     int64_t                   reset_counters;
 420     int                       nthreads_pme = 1;
 421     gmx_membed_t *            membed       = nullptr;
 422     gmx_hw_info_t            *hwinfo       = nullptr;
 423
 424     /* CAUTION: threads may be started later on in this function, so
 425        cr doesn't reflect the final parallel state right now */
 426     std::unique_ptr<gmx::MDModules> mdModules(new gmx::MDModules);
 427     t_inputrec                      inputrecInstance;
 428     t_inputrec                     *inputrec = &inputrecInstance;
 429     gmx_mtop_t                      mtop;
 430
 431     if (mdrunOptions.continuationOptions.appendFiles)
 432     {
 433         fplog = nullptr;
 434     }
 435
 436     bool doMembed = opt2bSet("-membed", filenames().size(), filenames().data());
 437     bool doRerun  = mdrunOptions.rerun;
 438
 439     // Handle task-assignment related user options.
 440     EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
 441                                                EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
 442     std::vector<int>    gpuIdsAvailable;
 443     try
 444     {
 445         gpuIdsAvailable = parseUserGpuIds(hw_opt.gpuIdsAvailable);
 446         // TODO We could put the GPU IDs into a std::map to find
 447         // duplicates, but for the small numbers of IDs involved, this
 448         // code is simple and fast.
 449         for (size_t i = 0; i != gpuIdsAvailable.size(); ++i)
 450         {
 451             for (size_t j = i+1; j != gpuIdsAvailable.size(); ++j)
 452             {
 453                 if (gpuIdsAvailable[i] == gpuIdsAvailable[j])
 454                 {
 455                     GMX_THROW(InvalidInputError(formatString("The string of available GPU device IDs '%s' may not contain duplicate device IDs", hw_opt.gpuIdsAvailable.c_str())));
 456                 }
 457             }
 458         }
 459     }
 460     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 461
 462     std::vector<int> userGpuTaskAssignment;
 463     try
 464     {
 465         userGpuTaskAssignment = parseUserGpuIds(hw_opt.userGpuTaskAssignment);
 466     }
 467     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 468     auto       nonbondedTarget = findTaskTarget(nbpu_opt);
 469     auto       pmeTarget       = findTaskTarget(pme_opt);
 470     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
 471     PmeRunMode pmeRunMode      = PmeRunMode::None;
 472
 473     // Here we assume that SIMMASTER(cr) does not change even after the
 474     // threads are started.
 475     gmx::LoggerOwner logOwner(buildLogger(fplog, cr));
 476     gmx::MDLogger    mdlog(logOwner.logger());
 477
 478     // TODO The thread-MPI master rank makes a working
 479     // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks
 480     // after the threads have been launched. This works because no use
 481     // is made of that communicator until after the execution paths
 482     // have rejoined. But it is likely that we can improve the way
 483     // this is expressed, e.g. by expressly running detection only the
 484     // master rank for thread-MPI, rather than relying on the mutex
 485     // and reference count.
 486     PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
 487     hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
 488
 489     gmx_print_detected_hardware(fplog, cr, ms, mdlog, hwinfo);
 490
 491     std::vector<int> gpuIdsToUse;
 492     auto             compatibleGpus = getCompatibleGpus(hwinfo->gpu_info);
 493     if (gpuIdsAvailable.empty())
 494     {
 495         gpuIdsToUse = compatibleGpus;
 496     }
 497     else
 498     {
 499         for (const auto &availableGpuId : gpuIdsAvailable)
 500         {
 501             bool availableGpuIsCompatible = false;
 502             for (const auto &compatibleGpuId : compatibleGpus)
 503             {
 504                 if (availableGpuId == compatibleGpuId)
 505                 {
 506                     availableGpuIsCompatible = true;
 507                     break;
 508                 }
 509             }
 510             if (!availableGpuIsCompatible)
 511             {
 512                 gmx_fatal(FARGS, "You limited the set of compatible GPUs to a set that included ID #%d, but that ID is not for a compatible GPU. List only compatible GPUs.", availableGpuId);
 513             }
 514             gpuIdsToUse.push_back(availableGpuId);
 515         }
 516     }
 517
 518     if (fplog != nullptr)
 519     {
 520         /* Print references after all software/hardware printing */
 521         please_cite(fplog, "Abraham2015");
 522         please_cite(fplog, "Pall2015");
 523         please_cite(fplog, "Pronk2013");
 524         please_cite(fplog, "Hess2008b");
 525         please_cite(fplog, "Spoel2005a");
 526         please_cite(fplog, "Lindahl2001a");
 527         please_cite(fplog, "Berendsen95a");
 528         writeSourceDoi(fplog);
 529     }
 530
 531     std::unique_ptr<t_state> globalState;
 532
 533     if (SIMMASTER(cr))
 534     {
 535         /* Only the master rank has the global state */
 536         globalState = compat::make_unique<t_state>();
 537
 538         /* Read (nearly) all data required for the simulation */
 539         read_tpx_state(ftp2fn(efTPR, filenames().size(), filenames().data()), inputrec, globalState.get(), &mtop);
 540
 541         /* In rerun, set velocities to zero if present */
 542         if (doRerun && ((globalState->flags & (1 << estV)) != 0))
 543         {
 544             // rerun does not use velocities
 545             GMX_LOG(mdlog.info).asParagraph().appendText(
 546                     "Rerun trajectory contains velocities. Rerun does only evaluate "
 547                     "potential energy and forces. The velocities will be ignored.");
 548             for (int i = 0; i < globalState->natoms; i++)
 549             {
 550                 clear_rvec(globalState->v[i]);
 551             }
 552             globalState->flags &= ~(1 << estV);
 553         }
 554
 555         if (inputrec->cutoff_scheme != ecutsVERLET)
 556         {
 557             if (nstlist_cmdline > 0)
 558             {
 559                 gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme");
 560             }
 561
 562             if (!compatibleGpus.empty())
 563             {
 564                 GMX_LOG(mdlog.warning).asParagraph().appendText(
 565                         "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
 566                         "      To use a GPU, set the mdp option: cutoff-scheme = Verlet");
 567             }
 568         }
 569     }
 570
 571     /* Check and update the hardware options for internal consistency */
 572     check_and_update_hw_opt_1(mdlog, &hw_opt, cr, domdecOptions.numPmeRanks);
 573
 574     /* Early check for externally set process affinity. */
 575     gmx_check_thread_affinity_set(mdlog, cr,
 576                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
 577
 578     if (GMX_THREAD_MPI && SIMMASTER(cr))
 579     {
 580         if (domdecOptions.numPmeRanks > 0 && hw_opt.nthreads_tmpi <= 0)
 581         {
 582             gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks");
 583         }
 584
 585         /* Since the master knows the cut-off scheme, update hw_opt for this.
 586          * This is done later for normal MPI and also once more with tMPI
 587          * for all tMPI ranks.
 588          */
 589         check_and_update_hw_opt_2(&hw_opt, inputrec->cutoff_scheme);
 590
 591         bool useGpuForNonbonded = false;
 592         bool useGpuForPme       = false;
 593         try
 594         {
 595             // If the user specified the number of ranks, then we must
 596             // respect that, but in default mode, we need to allow for
 597             // the number of GPUs to choose the number of ranks.
 598
 599             useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
 600                     (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
 601                     inputrec->cutoff_scheme == ecutsVERLET,
 602                     gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI),
 603                     hw_opt.nthreads_tmpi);
 604             auto canUseGpuForPme   = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
 605             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
 606                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
 607                     canUseGpuForPme, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
 608
 609         }
 610         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 611
 612         /* Determine how many thread-MPI ranks to start.
 613          *
 614          * TODO Over-writing the user-supplied value here does
 615          * prevent any possible subsequent checks from working
 616          * correctly. */
 617         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
 618                                                 &hw_opt,
 619                                                 gpuIdsToUse,
 620                                                 useGpuForNonbonded,
 621                                                 useGpuForPme,
 622                                                 inputrec, &mtop,
 623                                                 mdlog,
 624                                                 doMembed);
 625
 626         // Now start the threads for thread MPI.
 627         cr = spawnThreads(hw_opt.nthreads_tmpi);
 628         /* The main thread continues here with a new cr. We don't deallocate
 629            the old cr because other threads may still be reading it. */
 630         // TODO Both master and spawned threads call dup_tfn and
 631         // reinitialize_commrec_for_this_thread. Find a way to express
 632         // this better.
 633         physicalNodeComm = PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
 634     }
 635     // END OF CAUTION: cr and physicalNodeComm are now reliable
 636
 637     if (PAR(cr))
 638     {
 639         /* now broadcast everything to the non-master nodes/threads: */
 640         init_parallel(cr, inputrec, &mtop);
 641     }
 642
 643     // Now each rank knows the inputrec that SIMMASTER read and used,
 644     // and (if applicable) cr->nnodes has been assigned the number of
 645     // thread-MPI ranks that have been chosen. The ranks can now all
 646     // run the task-deciding functions and will agree on the result
 647     // without needing to communicate.
 648     //
 649     // TODO Should we do the communication in debug mode to support
 650     // having an assertion?
 651     //
 652     // Note that these variables describe only their own node.
 653     bool useGpuForNonbonded = false;
 654     bool useGpuForPme       = false;
 655     try
 656     {
 657         // It's possible that there are different numbers of GPUs on
 658         // different nodes, which is the user's responsibilty to
 659         // handle. If unsuitable, we will notice that during task
 660         // assignment.
 661         bool gpusWereDetected = hwinfo->ngpu_compatible_tot > 0;
 662         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
 663                                                                 emulateGpuNonbonded, inputrec->cutoff_scheme == ecutsVERLET,
 664                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
 665                                                                 gpusWereDetected);
 666         auto canUseGpuForPme   = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
 667         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
 668                                                     canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks,
 669                                                     gpusWereDetected);
 670
 671         pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
 672         if (pmeRunMode == PmeRunMode::GPU)
 673         {
 674             if (pmeFftTarget == TaskTarget::Cpu)
 675             {
 676                 pmeRunMode = PmeRunMode::Mixed;
 677             }
 678         }
 679         else if (pmeFftTarget == TaskTarget::Gpu)
 680         {
 681             gmx_fatal(FARGS, "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME on CPU you should not be using -pmefft.");
 682         }
 683     }
 684     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 685
 686     // TODO: Error handling
 687     mdModules->assignOptionsToModules(*inputrec->params, nullptr);
 688
 689     if (fplog != nullptr)
 690     {
 691         pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
 692         fprintf(fplog, "\n");
 693     }
 694
 695     if (SIMMASTER(cr))
 696     {
 697         /* now make sure the state is initialized and propagated */
 698         set_state_entries(globalState.get(), inputrec);
 699     }
 700
 701     /* NM and TPI parallelize over force/energy calculations, not atoms,
 702      * so we need to initialize and broadcast the global state.
 703      */
 704     if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
 705     {
 706         if (!MASTER(cr))
 707         {
 708             globalState = compat::make_unique<t_state>();
 709         }
 710         broadcastStateWithoutDynamics(cr, globalState.get());
 711     }
 712
 713     /* A parallel command line option consistency check that we can
 714        only do after any threads have started. */
 715     if (!PAR(cr) && (domdecOptions.numCells[XX] > 1 ||
 716                      domdecOptions.numCells[YY] > 1 ||
 717                      domdecOptions.numCells[ZZ] > 1 ||
 718                      domdecOptions.numPmeRanks > 0))
 719     {
 720         gmx_fatal(FARGS,
 721                   "The -dd or -npme option request a parallel simulation, "
 722 #if !GMX_MPI
 723                   "but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv));
 724 #else
 725 #if GMX_THREAD_MPI
 726                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
 727 #else
 728                   "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv));
 729 #endif
 730 #endif
 731     }
 732
 733     if (doRerun &&
 734         (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 735     {
 736         gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 737     }
 738
 739     if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr))
 740     {
 741         gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank");
 742     }
 743
 744     if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
 745     {
 746         if (domdecOptions.numPmeRanks > 0)
 747         {
 748             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
 749                                  "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
 750         }
 751
 752         domdecOptions.numPmeRanks = 0;
 753     }
 754
 755     if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
 756     {
 757         /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
 758          * improve performance with many threads per GPU, since our OpenMP
 759          * scaling is bad, but it's difficult to automate the setup.
 760          */
 761         domdecOptions.numPmeRanks = 0;
 762     }
 763     if (useGpuForPme)
 764     {
 765         if (domdecOptions.numPmeRanks < 0)
 766         {
 767             domdecOptions.numPmeRanks = 0;
 768             // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
 769         }
 770         else
 771         {
 772             GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported");
 773         }
 774     }
 775
 776 #if GMX_FAHCORE
 777     if (MASTER(cr))
 778     {
 779         fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
 780     }
 781 #endif
 782
 783     /* NMR restraints must be initialized before load_checkpoint,
 784      * since with time averaging the history is added to t_state.
 785      * For proper consistency check we therefore need to extend
 786      * t_state here.
 787      * So the PME-only nodes (if present) will also initialize
 788      * the distance restraints.
 789      */
 790     snew(fcd, 1);
 791
 792     /* This needs to be called before read_checkpoint to extend the state */
 793     init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
 794
 795     init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
 796
 797     auto                 deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
 798
 799     ObservablesHistory   observablesHistory = {};
 800
 801     ContinuationOptions &continuationOptions = mdrunOptions.continuationOptions;
 802
 803     if (continuationOptions.startedFromCheckpoint)
 804     {
 805         /* Check if checkpoint file exists before doing continuation.
 806          * This way we can use identical input options for the first and subsequent runs...
 807          */
 808         gmx_bool bReadEkin;
 809
 810         load_checkpoint(opt2fn_master("-cpi", filenames().size(), filenames().data(), cr), &fplog,
 811                         cr, domdecOptions.numCells,
 812                         inputrec, globalState.get(),
 813                         &bReadEkin, &observablesHistory,
 814                         continuationOptions.appendFiles,
 815                         continuationOptions.appendFilesOptionSet,
 816                         mdrunOptions.reproducible);
 817
 818         if (bReadEkin)
 819         {
 820             continuationOptions.haveReadEkin = true;
 821         }
 822     }
 823
 824     if (SIMMASTER(cr) && continuationOptions.appendFiles)
 825     {
 826         gmx_log_append(cr->nodeid, cr->nnodes, fplog);
 827         logOwner = buildLogger(fplog, nullptr);
 828         mdlog    = logOwner.logger();
 829     }
 830
 831     if (mdrunOptions.numStepsCommandline > -2)
 832     {
 833         GMX_LOG(mdlog.info).asParagraph().
 834             appendText("The -nsteps functionality is deprecated, and may be removed in a future version. "
 835                        "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp file field.");
 836     }
 837     /* override nsteps with value set on the commamdline */
 838     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
 839
 840     if (SIMMASTER(cr))
 841     {
 842         copy_mat(globalState->box, box);
 843     }
 844
 845     if (PAR(cr))
 846     {
 847         gmx_bcast(sizeof(box), box, cr);
 848     }
 849
 850     /* Update rlist and nstlist. */
 851     if (inputrec->cutoff_scheme == ecutsVERLET)
 852     {
 853         prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
 854                               useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
 855     }
 856
 857     LocalAtomSetManager atomSets;
 858
 859     if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
 860                      inputrec->eI == eiNM))
 861     {
 862         cr->dd = init_domain_decomposition(mdlog, cr, domdecOptions, mdrunOptions,
 863                                            &mtop, inputrec,
 864                                            box, positionsFromStatePointer(globalState.get()),
 865                                            &atomSets);
 866         // Note that local state still does not exist yet.
 867     }
 868     else
 869     {
 870         /* PME, if used, is done on all nodes with 1D decomposition */
 871         cr->npmenodes = 0;
 872         cr->duty      = (DUTY_PP | DUTY_PME);
 873
 874         if (inputrec->ePBC == epbcSCREW)
 875         {
 876             gmx_fatal(FARGS,
 877                       "pbc=screw is only implemented with domain decomposition");
 878         }
 879     }
 880
 881     if (PAR(cr))
 882     {
 883         /* After possible communicator splitting in make_dd_communicators.
 884          * we can set up the intra/inter node communication.
 885          */
 886         gmx_setup_nodecomm(fplog, cr);
 887     }
 888
 889 #if GMX_MPI
 890     if (isMultiSim(ms))
 891     {
 892         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 893                 "This is simulation %d out of %d running as a composite GROMACS\n"
 894                 "multi-simulation job. Setup for this simulation:\n",
 895                 ms->sim, ms->nsim);
 896     }
 897     GMX_LOG(mdlog.warning).appendTextFormatted(
 898             "Using %d MPI %s\n",
 899             cr->nnodes,
 900 #if GMX_THREAD_MPI
 901             cr->nnodes == 1 ? "thread" : "threads"
 902 #else
 903             cr->nnodes == 1 ? "process" : "processes"
 904 #endif
 905             );
 906     fflush(stderr);
 907 #endif
 908
 909     /* Check and update hw_opt for the cut-off scheme */
 910     check_and_update_hw_opt_2(&hw_opt, inputrec->cutoff_scheme);
 911
 912     /* Check and update the number of OpenMP threads requested */
 913     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
 914                                             pmeRunMode, mtop);
 915
 916     gmx_omp_nthreads_init(mdlog, cr,
 917                           hwinfo->nthreads_hw_avail,
 918                           physicalNodeComm.size_,
 919                           hw_opt.nthreads_omp,
 920                           hw_opt.nthreads_omp_pme,
 921                           !thisRankHasDuty(cr, DUTY_PP),
 922                           inputrec->cutoff_scheme == ecutsVERLET);
 923
 924     // Enable FP exception detection for the Verlet scheme, but not in
 925     // Release mode and not for compilers with known buggy FP
 926     // exception support (clang with any optimization) or suspected
 927     // buggy FP exception support (gcc 7.* with optimization).
 928 #if !defined NDEBUG && \
 929     !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
 930     && defined __OPTIMIZE__)
 931     const bool bEnableFPE = inputrec->cutoff_scheme == ecutsVERLET;
 932 #else
 933     const bool bEnableFPE = false;
 934 #endif
 935     //FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
 936     if (bEnableFPE)
 937     {
 938         gmx_feenableexcept();
 939     }
 940
 941     // Build a data structure that expresses which kinds of non-bonded
 942     // task are handled by this rank.
 943     //
 944     // TODO Later, this might become a loop over all registered modules
 945     // relevant to the mdp inputs, to find those that have such tasks.
 946     //
 947     // TODO This could move before init_domain_decomposition() as part
 948     // of refactoring that separates the responsibility for duty
 949     // assignment from setup for communication between tasks, and
 950     // setup for tasks handled with a domain (ie including short-ranged
 951     // tasks, bonded tasks, etc.).
 952     //
 953     // Note that in general useGpuForNonbonded, etc. can have a value
 954     // that is inconsistent with the presence of actual GPUs on any
 955     // rank, and that is not known to be a problem until the
 956     // duty of the ranks on a node become node.
 957     //
 958     // TODO Later we might need the concept of computeTasksOnThisRank,
 959     // from which we construct gpuTasksOnThisRank.
 960     //
 961     // Currently the DD code assigns duty to ranks that can
 962     // include PP work that currently can be executed on a single
 963     // GPU, if present and compatible.  This has to be coordinated
 964     // across PP ranks on a node, with possible multiple devices
 965     // or sharing devices on a node, either from the user
 966     // selection, or automatically.
 967     auto                 haveGpus = !gpuIdsToUse.empty();
 968     std::vector<GpuTask> gpuTasksOnThisRank;
 969     if (thisRankHasDuty(cr, DUTY_PP))
 970     {
 971         if (useGpuForNonbonded)
 972         {
 973             if (haveGpus)
 974             {
 975                 gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
 976             }
 977             else if (nonbondedTarget == TaskTarget::Gpu)
 978             {
 979                 gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because there is none detected.");
 980             }
 981         }
 982     }
 983     // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not.
 984     if (EEL_PME(inputrec->coulombtype) && (thisRankHasDuty(cr, DUTY_PME)))
 985     {
 986         if (useGpuForPme)
 987         {
 988             if (haveGpus)
 989             {
 990                 gpuTasksOnThisRank.push_back(GpuTask::Pme);
 991             }
 992             else if (pmeTarget == TaskTarget::Gpu)
 993             {
 994                 gmx_fatal(FARGS, "Cannot run PME on a GPU because there is none detected.");
 995             }
 996         }
 997     }
 998
 999     GpuTaskAssignment gpuTaskAssignment;
1000     try
1001     {
1002         // Produce the task assignment for this rank.
1003         gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
1004                                               mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank);
1005     }
1006     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1007
1008     /* Prevent other ranks from continuing after an issue was found
1009      * and reported as a fatal error.
1010      *
1011      * TODO This function implements a barrier so that MPI runtimes
1012      * can organize an orderly shutdown if one of the ranks has had to
1013      * issue a fatal error in various code already run. When we have
1014      * MPI-aware error handling and reporting, this should be
1015      * improved. */
1016 #if GMX_MPI
1017     if (PAR(cr))
1018     {
1019         MPI_Barrier(cr->mpi_comm_mysim);
1020     }
1021     if (isMultiSim(ms))
1022     {
1023         if (SIMMASTER(cr))
1024         {
1025             MPI_Barrier(ms->mpi_comm_masters);
1026         }
1027         /* We need another barrier to prevent non-master ranks from contiuing
1028          * when an error occured in a different simulation.
1029          */
1030         MPI_Barrier(cr->mpi_comm_mysim);
1031     }
1032 #endif
1033
1034     /* Now that we know the setup is consistent, check for efficiency */
1035     check_resource_division_efficiency(hwinfo, !gpuTaskAssignment.empty(), mdrunOptions.ntompOptionIsSet,
1036                                        cr, mdlog);
1037
1038     gmx_device_info_t *nonbondedDeviceInfo = nullptr;
1039
1040     if (thisRankHasDuty(cr, DUTY_PP))
1041     {
1042         // This works because only one task of each type is currently permitted.
1043         auto nbGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(),
1044                                              hasTaskType<GpuTask::Nonbonded>);
1045         if (nbGpuTaskMapping != gpuTaskAssignment.end())
1046         {
1047             int nonbondedDeviceId = nbGpuTaskMapping->deviceId_;
1048             nonbondedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, nonbondedDeviceId);
1049             init_gpu(nonbondedDeviceInfo);
1050
1051             if (DOMAINDECOMP(cr))
1052             {
1053                 /* When we share GPUs over ranks, we need to know this for the DLB */
1054                 dd_setup_dlb_resource_sharing(cr, nonbondedDeviceId);
1055             }
1056
1057         }
1058     }
1059
1060     std::unique_ptr<ClfftInitializer> initializedClfftLibrary;
1061
1062     gmx_device_info_t                *pmeDeviceInfo = nullptr;
1063     // Later, this program could contain kernels that might be later
1064     // re-used as auto-tuning progresses, or subsequent simulations
1065     // are invoked.
1066     PmeGpuProgramStorage pmeGpuProgram;
1067     // This works because only one task of each type is currently permitted.
1068     auto                 pmeGpuTaskMapping     = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), hasTaskType<GpuTask::Pme>);
1069     const bool           thisRankHasPmeGpuTask = (pmeGpuTaskMapping != gpuTaskAssignment.end());
1070     if (thisRankHasPmeGpuTask)
1071     {
1072         pmeDeviceInfo = getDeviceInfo(hwinfo->gpu_info, pmeGpuTaskMapping->deviceId_);
1073         init_gpu(pmeDeviceInfo);
1074         pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
1075         // TODO It would be nice to move this logic into the factory
1076         // function. See Redmine #2535.
1077         bool isMasterThread = !GMX_THREAD_MPI || MASTER(cr);
1078         if (pmeRunMode == PmeRunMode::GPU && !initializedClfftLibrary && isMasterThread)
1079         {
1080             initializedClfftLibrary = initializeClfftLibrary();
1081         }
1082     }
1083
1084     /* getting number of PP/PME threads
1085        PME: env variable should be read only on one node to make sure it is
1086        identical everywhere;
1087      */
1088     nthreads_pme = gmx_omp_nthreads_get(emntPME);
1089
1090     int numThreadsOnThisRank;
1091     /* threads on this MPI process or TMPI thread */
1092     if (thisRankHasDuty(cr, DUTY_PP))
1093     {
1094         numThreadsOnThisRank = gmx_omp_nthreads_get(emntNonbonded);
1095     }
1096     else
1097     {
1098         numThreadsOnThisRank = nthreads_pme;
1099     }
1100
1101     checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid,
1102                                   *hwinfo->hardwareTopology,
1103                                   physicalNodeComm, mdlog);
1104
1105     if (hw_opt.thread_affinity != threadaffOFF)
1106     {
1107         /* Before setting affinity, check whether the affinity has changed
1108          * - which indicates that probably the OpenMP library has changed it
1109          * since we first checked).
1110          */
1111         gmx_check_thread_affinity_set(mdlog, cr,
1112                                       &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
1113
1114         int numThreadsOnThisNode, intraNodeThreadOffset;
1115         analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
1116                                  &intraNodeThreadOffset);
1117
1118         /* Set the CPU affinity */
1119         gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology,
1120                                 numThreadsOnThisRank, numThreadsOnThisNode,
1121                                 intraNodeThreadOffset, nullptr);
1122     }
1123
1124     if (mdrunOptions.timingOptions.resetStep > -1)
1125     {
1126         GMX_LOG(mdlog.info).asParagraph().
1127             appendText("The -resetstep functionality is deprecated, and may be removed in a future version.");
1128     }
1129     wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1130
1131     if (PAR(cr))
1132     {
1133         /* Master synchronizes its value of reset_counters with all nodes
1134          * including PME only nodes */
1135         reset_counters = wcycle_get_reset_counters(wcycle);
1136         gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
1137         wcycle_set_reset_counters(wcycle, reset_counters);
1138     }
1139
1140     // Membrane embedding must be initialized before we call init_forcerec()
1141     if (doMembed)
1142     {
1143         if (MASTER(cr))
1144         {
1145             fprintf(stderr, "Initializing membed");
1146         }
1147         /* Note that membed cannot work in parallel because mtop is
1148          * changed here. Fix this if we ever want to make it run with
1149          * multiple ranks. */
1150         membed = init_membed(fplog, filenames().size(), filenames().data(), &mtop, inputrec, globalState.get(), cr,
1151                              &mdrunOptions
1152                                  .checkpointOptions.period);
1153     }
1154
1155     std::unique_ptr<MDAtoms>     mdAtoms;
1156     std::unique_ptr<gmx_vsite_t> vsite;
1157
1158     snew(nrnb, 1);
1159     if (thisRankHasDuty(cr, DUTY_PP))
1160     {
1161         /* Initiate forcerecord */
1162         fr                 = mk_forcerec();
1163         fr->forceProviders = mdModules->initForceProviders();
1164         init_forcerec(fplog, mdlog, fr, fcd,
1165                       inputrec, &mtop, cr, box,
1166                       opt2fn("-table", filenames().size(), filenames().data()),
1167                       opt2fn("-tablep", filenames().size(), filenames().data()),
1168                       opt2fns("-tableb", filenames().size(), filenames().data()),
1169                       *hwinfo, nonbondedDeviceInfo,
1170                       FALSE,
1171                       pforce);
1172
1173         /* Initialize QM-MM */
1174         if (fr->bQMMM)
1175         {
1176             GMX_LOG(mdlog.info).asParagraph().
1177                 appendText("Large parts of the QM/MM support is deprecated, and may be removed in a future "
1178                            "version. Please get in touch with the developers if you find the support useful, "
1179                            "as help is needed if the functionality is to continue to be available.");
1180             init_QMMMrec(cr, &mtop, inputrec, fr);
1181         }
1182
1183         /* Initialize the mdAtoms structure.
1184          * mdAtoms is not filled with atom data,
1185          * as this can not be done now with domain decomposition.
1186          */
1187         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1188         if (globalState && thisRankHasPmeGpuTask)
1189         {
1190             // The pinning of coordinates in the global state object works, because we only use
1191             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1192             // points to the global state object without DD.
1193             // FIXME: MD and EM separately set up the local state - this should happen in the same function,
1194             // which should also perform the pinning.
1195             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1196         }
1197
1198         /* Initialize the virtual site communication */
1199         vsite = initVsite(mtop, cr);
1200
1201         calc_shifts(box, fr->shift_vec);
1202
1203         /* With periodic molecules the charge groups should be whole at start up
1204          * and the virtual sites should not be far from their proper positions.
1205          */
1206         if (!inputrec->bContinuation && MASTER(cr) &&
1207             !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
1208         {
1209             /* Make molecules whole at start of run */
1210             if (fr->ePBC != epbcNONE)
1211             {
1212                 do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
1213             }
1214             if (vsite)
1215             {
1216                 /* Correct initial vsite positions are required
1217                  * for the initial distribution in the domain decomposition
1218                  * and for the initial shell prediction.
1219                  */
1220                 constructVsitesGlobal(mtop, globalState->x);
1221             }
1222         }
1223
1224         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1225         {
1226             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1227             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1228         }
1229     }
1230     else
1231     {
1232         /* This is a PME only node */
1233
1234         GMX_ASSERT(globalState == nullptr, "We don't need the state on a PME only rank and expect it to be unitialized");
1235
1236         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1237         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1238     }
1239
1240     gmx_pme_t *sepPmeData = nullptr;
1241     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1242     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), "Double-checking that only PME-only ranks have no forcerec");
1243     gmx_pme_t * &pmedata = fr ? fr->pmedata : sepPmeData;
1244
1245     /* Initiate PME if necessary,
1246      * either on all nodes or on dedicated PME nodes only. */
1247     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1248     {
1249         if (mdAtoms && mdAtoms->mdatoms())
1250         {
1251             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1252             if (EVDW_PME(inputrec->vdwtype))
1253             {
1254                 nTypePerturbed   = mdAtoms->mdatoms()->nTypePerturbed;
1255             }
1256         }
1257         if (cr->npmenodes > 0)
1258         {
1259             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1260             gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
1261             gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
1262         }
1263
1264         if (thisRankHasDuty(cr, DUTY_PME))
1265         {
1266             try
1267             {
1268                 pmedata = gmx_pme_init(cr,
1269                                        getNumPmeDomains(cr->dd),
1270                                        inputrec,
1271                                        mtop.natoms, nChargePerturbed != 0, nTypePerturbed != 0,
1272                                        mdrunOptions.reproducible,
1273                                        ewaldcoeff_q, ewaldcoeff_lj,
1274                                        nthreads_pme,
1275                                        pmeRunMode, nullptr,
1276                                        pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
1277             }
1278             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1279         }
1280     }
1281
1282
1283     if (EI_DYNAMICS(inputrec->eI))
1284     {
1285         /* Turn on signal handling on all nodes */
1286         /*
1287          * (A user signal from the PME nodes (if any)
1288          * is communicated to the PP nodes.
1289          */
1290         signal_handler_install();
1291     }
1292
1293     if (thisRankHasDuty(cr, DUTY_PP))
1294     {
1295         /* Assumes uniform use of the number of OpenMP threads */
1296         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
1297
1298         if (inputrec->bPull)
1299         {
1300             /* Initialize pull code */
1301             inputrec->pull_work =
1302                 init_pull(fplog, inputrec->pull, inputrec,
1303                           &mtop, cr, &atomSets, inputrec->fepvals->init_lambda);
1304             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1305             {
1306                 init_pull_output_files(inputrec->pull_work,
1307                                        filenames().size(), filenames().data(), oenv,
1308                                        continuationOptions);
1309             }
1310         }
1311
1312         std::unique_ptr<EnforcedRotation> enforcedRotation;
1313         if (inputrec->bRot)
1314         {
1315             /* Initialize enforced rotation code */
1316             enforcedRotation = init_rot(fplog,
1317                                         inputrec,
1318                                         filenames().size(),
1319                                         filenames().data(),
1320                                         cr,
1321                                         &atomSets,
1322                                         globalState.get(),
1323                                         &mtop,
1324                                         oenv,
1325                                         mdrunOptions);
1326         }
1327
1328         if (inputrec->eSwapCoords != eswapNO)
1329         {
1330             /* Initialize ion swapping code */
1331             init_swapcoords(fplog, inputrec, opt2fn_master("-swap", filenames().size(), filenames().data(), cr),
1332                             &mtop, globalState.get(), &observablesHistory,
1333                             cr, &atomSets, oenv, mdrunOptions);
1334         }
1335
1336         /* Let makeConstraints know whether we have essential dynamics constraints.
1337          * TODO: inputrec should tell us whether we use an algorithm, not a file option or the checkpoint
1338          */
1339         bool doEssentialDynamics = (opt2fn_null("-ei", filenames().size(), filenames().data()) != nullptr
1340                                     || observablesHistory.edsamHistory);
1341         auto constr              = makeConstraints(mtop, *inputrec, doEssentialDynamics,
1342                                                    fplog, *mdAtoms->mdatoms(),
1343                                                    cr, *ms, nrnb, wcycle, fr->bMolPBC);
1344
1345         if (DOMAINDECOMP(cr))
1346         {
1347             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1348             /* This call is not included in init_domain_decomposition mainly
1349              * because fr->cginfo_mb is set later.
1350              */
1351             dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
1352                             domdecOptions.checkBondedInteractions,
1353                             fr->cginfo_mb);
1354         }
1355
1356         /* Create StopHandlerBuilder (could be moved earlier if needed - currently nobody register here */
1357         auto stopHandlerBuilder = compat::make_unique<StopHandlerBuilder>();
1358
1359         /* Now do whatever the user wants us to do (how flexible...) */
1360         Integrator integrator {
1361             fplog, cr, ms, mdlog, static_cast<int>(filenames().size()), filenames().data(),
1362             oenv,
1363             mdrunOptions,
1364             vsite.get(), constr.get(),
1365             enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
1366             deform.get(),
1367             mdModules->outputProvider(),
1368             inputrec, &mtop,
1369             fcd,
1370             globalState.get(),
1371             &observablesHistory,
1372             mdAtoms.get(), nrnb, wcycle, fr,
1373             replExParams,
1374             membed,
1375             walltime_accounting,
1376             std::move(stopHandlerBuilder)
1377         };
1378         integrator.run(inputrec->eI, doRerun);
1379
1380         if (inputrec->bPull)
1381         {
1382             finish_pull(inputrec->pull_work);
1383         }
1384
1385     }
1386     else
1387     {
1388         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
1389         /* do PME only */
1390         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
1391         gmx_pmeonly(pmedata, cr, nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
1392     }
1393
1394     wallcycle_stop(wcycle, ewcRUN);
1395
1396     /* Finish up, write some stuff
1397      * if rerunMD, don't write last frame again
1398      */
1399     finish_run(fplog, mdlog, cr,
1400                inputrec, nrnb, wcycle, walltime_accounting,
1401                fr ? fr->nbv : nullptr,
1402                pmedata,
1403                EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
1404
1405     // Free PME data
1406     if (pmedata)
1407     {
1408         gmx_pme_destroy(pmedata);
1409         pmedata = nullptr;
1410     }
1411
1412     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
1413     // before we destroy the GPU context(s) in free_gpu_resources().
1414     // Pinned buffers are associated with contexts in CUDA.
1415     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
1416     mdAtoms.reset(nullptr);
1417     globalState.reset(nullptr);
1418     mdModules.reset(nullptr);   // destruct force providers here as they might also use the GPU
1419
1420     /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
1421     free_gpu_resources(fr, physicalNodeComm);
1422     free_gpu(nonbondedDeviceInfo);
1423     free_gpu(pmeDeviceInfo);
1424     done_forcerec(fr, mtop.molblock.size(), mtop.groups.grps[egcENER].nr);
1425     sfree(fcd);
1426
1427     if (doMembed)
1428     {
1429         free_membed(membed);
1430     }
1431
1432     gmx_hardware_info_free();
1433
1434     /* Does what it says */
1435     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
1436     walltime_accounting_destroy(walltime_accounting);
1437     sfree(nrnb);
1438
1439     /* Close logfile already here if we were appending to it */
1440     if (MASTER(cr) && continuationOptions.appendFiles)
1441     {
1442         gmx_log_close(fplog);
1443         fplog = nullptr;
1444     }
1445
1446     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
1447      * exceptions were enabled before function was called. */
1448     if (bEnableFPE)
1449     {
1450         gmx_fedisableexcept();
1451     }
1452
1453     rc = static_cast<int>(gmx_get_stop_condition());
1454
1455 #if GMX_THREAD_MPI
1456     /* we need to join all threads. The sub-threads join when they
1457        exit this function, but the master thread needs to be told to
1458        wait for that. */
1459     if (PAR(cr) && MASTER(cr))
1460     {
1461         done_commrec(cr);
1462         tMPI_Finalize();
1463     }
1464 #endif
1465
1466     return rc;
1467 }
1468
1469 Mdrunner::~Mdrunner() = default;
1470
1471 Mdrunner::Mdrunner(Mdrunner &&) noexcept = default;
1472
1473 //NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
1474 Mdrunner &Mdrunner::operator=(Mdrunner && /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
1475
1476 class Mdrunner::BuilderImplementation
1477 {
1478     public:
1479         BuilderImplementation() = delete;
1480         explicit BuilderImplementation(SimulationContext* context);
1481         ~BuilderImplementation();
1482
1483         BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options,
1484                                                     real                forceWarningThreshold);
1485
1486         void addDomdec(const DomdecOptions &options);
1487
1488         void addVerletList(int nstlist);
1489
1490         void addReplicaExchange(const ReplicaExchangeParameters &params);
1491
1492         void addMultiSim(gmx_multisim_t* multisim);
1493
1494         void addNonBonded(const char* nbpu_opt);
1495
1496         void addPME(const char* pme_opt_, const char* pme_fft_opt_);
1497
1498         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
1499
1500         void addFilenames(const MdFilenames &filenames);
1501
1502         void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
1503
1504         void addLogFile(FILE** logFileHandle);
1505
1506         Mdrunner build();
1507
1508     private:
1509         // Default parameters copied from runner.h
1510         // \todo Clarify source(s) of default parameters.
1511
1512         const char* nbpu_opt_    = nullptr;
1513         const char* pme_opt_     = nullptr;
1514         const char* pme_fft_opt_ = nullptr;
1515
1516         MdrunOptions                          mdrunOptions_;
1517
1518         DomdecOptions                         domdecOptions_;
1519
1520         ReplicaExchangeParameters             replicaExchangeParameters_;
1521
1522         //! Command-line override for the duration of a neighbor list with the Verlet scheme.
1523         int         nstlist_ = 0;
1524
1525         //! Non-owning multisim communicator handle.
1526         std::unique_ptr<gmx_multisim_t*>      multisim_ = nullptr;
1527
1528         //! Print a warning if any force is larger than this (in kJ/mol nm).
1529         real forceWarningThreshold_ = -1;
1530
1531         /*! \brief  Non-owning pointer to SimulationContext (owned and managed by client)
1532          *
1533          * \internal
1534          * \todo Establish robust protocol to make sure resources remain valid.
1535          * SimulationContext will likely be separated into multiple layers for
1536          * different levels of access and different phases of execution. Ref
1537          * https://redmine.gromacs.org/issues/2375
1538          * https://redmine.gromacs.org/issues/2587
1539          */
1540         SimulationContext* context_ = nullptr;
1541
1542         //! \brief Parallelism information.
1543         gmx_hw_opt_t hardwareOptions_;
1544
1545         //! filename options for simulation.
1546         std::unique_ptr<MdFilenames> filenames_ = nullptr;
1547
1548         /*! \brief Handle to output environment.
1549          *
1550          * \todo gmx_output_env_t needs lifetime management.
1551          */
1552         gmx_output_env_t*    outputEnvironment_ = nullptr;
1553
1554         /*! \brief Non-owning handle to MD log file.
1555          *
1556          * \todo Context should own output facilities for client.
1557          * \todo Improve log file handle management.
1558          * \internal
1559          * Code managing the FILE* relies on the ability to set it to
1560          * nullptr to check whether the filehandle has been closed, so the object
1561          * we are pointing to is actually the `FILE*`, not the `FILE`.
1562          */
1563         FILE**                logFile_ = nullptr;
1564 };
1565
1566 Mdrunner::BuilderImplementation::BuilderImplementation(SimulationContext* context) :
1567     context_(context)
1568 {
1569     GMX_ASSERT(context_, "Bug found. It should not be possible to construct builder without a valid context.");
1570 }
1571
1572 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
1573
1574 Mdrunner::BuilderImplementation &
1575 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions &options,
1576                                                       real                forceWarningThreshold)
1577 {
1578     mdrunOptions_          = options;
1579     forceWarningThreshold_ = forceWarningThreshold;
1580     return *this;
1581 }
1582
1583 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions &options)
1584 {
1585     domdecOptions_ = options;
1586 }
1587
1588 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
1589 {
1590     nstlist_ = nstlist;
1591 }
1592
1593 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters &params)
1594 {
1595     replicaExchangeParameters_ = params;
1596 }
1597
1598 void Mdrunner::BuilderImplementation::addMultiSim(gmx_multisim_t* multisim)
1599 {
1600     multisim_ = compat::make_unique<gmx_multisim_t*>(multisim);
1601 }
1602
1603 Mdrunner Mdrunner::BuilderImplementation::build()
1604 {
1605     auto newRunner = Mdrunner();
1606
1607     GMX_ASSERT(context_, "Bug found. It should not be possible to call build() without a valid context.");
1608
1609     newRunner.mdrunOptions    = mdrunOptions_;
1610     newRunner.domdecOptions   = domdecOptions_;
1611
1612     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
1613     newRunner.hw_opt          = hardwareOptions_;
1614
1615     // No invariant to check. This parameter exists to optionally override other behavior.
1616     newRunner.nstlist_cmdline = nstlist_;
1617
1618     newRunner.replExParams    = replicaExchangeParameters_;
1619
1620     // \todo determine an invariant to checkout or establish that all MdFilenames objects are valid
1621     if (filenames_)
1622     {
1623         newRunner.filenames = *filenames_;
1624     }
1625     else
1626     {
1627         GMX_THROW(gmx::APIError("MdrunnerBuilder::addFilenames() is required before build()"));
1628     }
1629
1630     GMX_ASSERT(context_->communicationRecord_, "SimulationContext communications not initialized.");
1631     newRunner.cr = context_->communicationRecord_;
1632
1633     if (multisim_)
1634     {
1635         // nullptr is a valid value for the multisim handle, so we don't check the pointed-to pointer.
1636         newRunner.ms = *multisim_;
1637     }
1638     else
1639     {
1640         GMX_THROW(gmx::APIError("MdrunnerBuilder::addMultiSim() is required before build()"));
1641     }
1642
1643     // \todo Clarify ownership and lifetime management for gmx_output_env_t
1644     // \todo Update sanity checking when output environment has clearly specified invariants.
1645     // Initialization and default values for oenv are not well specified in the current version.
1646     if (outputEnvironment_)
1647     {
1648         newRunner.oenv  = outputEnvironment_;
1649     }
1650     else
1651     {
1652         GMX_THROW(gmx::APIError("MdrunnerBuilder::addOutputEnvironment() is required before build()"));
1653     }
1654
1655     /* \todo Responsibility for owning, opening and closing the log file should be consolidated.
1656      * Currently, ownership of the filehandle is unclear and it could be closed
1657      * in multiple places, so we have to keep a pointer-to-pointer in order to
1658      * be able to invalidate it by setting nullptr. However, we lose that connection
1659      * at the following assignment. Near term API functionality will require a
1660      * resolution. Ref https://redmine.gromacs.org/issues/2587 and gmxapi milestone 21
1661      * described with https://redmine.gromacs.org/issues/2585
1662      */
1663     if (logFile_)
1664     {
1665         // We do not check whether the pointed-to pointer is nullptr. nullptr is a valid
1666         // value for Mdrunner::fplog.
1667         newRunner.fplog = *logFile_;
1668     }
1669     else
1670     {
1671         GMX_THROW(gmx::APIError("MdrunnerBuilder::addLogFile() is required before build()"));
1672     }
1673
1674     if (nbpu_opt_)
1675     {
1676         newRunner.nbpu_opt    = nbpu_opt_;
1677     }
1678     else
1679     {
1680         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
1681     }
1682
1683     if (pme_opt_ && pme_fft_opt_)
1684     {
1685         newRunner.pme_opt     = pme_opt_;
1686         newRunner.pme_fft_opt = pme_fft_opt_;
1687     }
1688     else
1689     {
1690         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
1691     }
1692
1693     return newRunner;
1694 }
1695
1696 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
1697 {
1698     nbpu_opt_ = nbpu_opt;
1699 }
1700
1701 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
1702                                              const char* pme_fft_opt)
1703 {
1704     pme_opt_     = pme_opt;
1705     pme_fft_opt_ = pme_fft_opt;
1706 }
1707
1708 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
1709 {
1710     hardwareOptions_ = hardwareOptions;
1711 }
1712
1713 void Mdrunner::BuilderImplementation::addFilenames(const MdFilenames &filenames)
1714 {
1715     filenames_ = compat::make_unique<MdFilenames>(filenames);
1716 }
1717
1718 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
1719 {
1720     outputEnvironment_ = outputEnvironment;
1721 }
1722
1723 void Mdrunner::BuilderImplementation::addLogFile(FILE** logFileHandle)
1724 {
1725     assert(logFileHandle);
1726     logFile_ = logFileHandle;
1727 }
1728
1729 MdrunnerBuilder::MdrunnerBuilder(compat::not_null<SimulationContext*> context) :
1730     impl_ {gmx::compat::make_unique<Mdrunner::BuilderImplementation>(context)}
1731 {
1732 }
1733
1734 MdrunnerBuilder::~MdrunnerBuilder() = default;
1735
1736 MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions &options,
1737                                                       real                forceWarningThreshold)
1738 {
1739     impl_->setExtraMdrunOptions(options, forceWarningThreshold);
1740     return *this;
1741 }
1742
1743 MdrunnerBuilder &MdrunnerBuilder::addDomainDecomposition(const DomdecOptions &options)
1744 {
1745     impl_->addDomdec(options);
1746     return *this;
1747 }
1748
1749 MdrunnerBuilder &MdrunnerBuilder::addNeighborList(int nstlist)
1750 {
1751     impl_->addVerletList(nstlist);
1752     return *this;
1753 }
1754
1755 MdrunnerBuilder &MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters &params)
1756 {
1757     impl_->addReplicaExchange(params);
1758     return *this;
1759 }
1760
1761 MdrunnerBuilder &MdrunnerBuilder::addMultiSim(gmx_multisim_t* multisim)
1762 {
1763     impl_->addMultiSim(multisim);
1764     return *this;
1765 }
1766
1767 MdrunnerBuilder &MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
1768 {
1769     impl_->addNonBonded(nbpu_opt);
1770     return *this;
1771 }
1772
1773 MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
1774                                                     const char* pme_fft_opt)
1775 {
1776     // The builder method may become more general in the future, but in this version,
1777     // parameters for PME electrostatics are both required and the only parameters
1778     // available.
1779     if (pme_opt && pme_fft_opt)
1780     {
1781         impl_->addPME(pme_opt, pme_fft_opt);
1782     }
1783     else
1784     {
1785         GMX_THROW(gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
1786     }
1787     return *this;
1788 }
1789
1790 Mdrunner MdrunnerBuilder::build()
1791 {
1792     return impl_->build();
1793 }
1794
1795 MdrunnerBuilder &MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
1796 {
1797     impl_->addHardwareOptions(hardwareOptions);
1798     return *this;
1799 }
1800
1801 MdrunnerBuilder &MdrunnerBuilder::addFilenames(const MdFilenames &filenames)
1802 {
1803     impl_->addFilenames(filenames);
1804     return *this;
1805 }
1806
1807 MdrunnerBuilder &MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
1808 {
1809     impl_->addOutputEnvironment(outputEnvironment);
1810     return *this;
1811 }
1812
1813 MdrunnerBuilder &MdrunnerBuilder::addLogFile(FILE** logFileHandle)
1814 {
1815     impl_->addLogFile(logFileHandle);
1816     return *this;
1817 }
1818
1819 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder &&) noexcept = default;
1820
1821 MdrunnerBuilder &MdrunnerBuilder::operator=(MdrunnerBuilder &&) noexcept = default;
1822
1823 } // namespace gmx