src/gromacs/mdrun/runner.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the MD runner routine calling all integrators.
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include "runner.h"
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cinttypes>
  52 #include <csignal>
  53 #include <cstdlib>
  54 #include <cstring>
  55
  56 #include <algorithm>
  57 #include <memory>
  58
  59 #include "gromacs/commandline/filenm.h"
  60 #include "gromacs/domdec/builder.h"
  61 #include "gromacs/domdec/domdec.h"
  62 #include "gromacs/domdec/domdec_struct.h"
  63 #include "gromacs/domdec/gpuhaloexchange.h"
  64 #include "gromacs/domdec/localatomsetmanager.h"
  65 #include "gromacs/domdec/partition.h"
  66 #include "gromacs/ewald/ewald_utils.h"
  67 #include "gromacs/ewald/pme.h"
  68 #include "gromacs/ewald/pme_gpu_program.h"
  69 #include "gromacs/fileio/checkpoint.h"
  70 #include "gromacs/fileio/gmxfio.h"
  71 #include "gromacs/fileio/oenv.h"
  72 #include "gromacs/fileio/tpxio.h"
  73 #include "gromacs/gmxlib/network.h"
  74 #include "gromacs/gmxlib/nrnb.h"
  75 #include "gromacs/gpu_utils/gpu_utils.h"
  76 #include "gromacs/hardware/cpuinfo.h"
  77 #include "gromacs/hardware/detecthardware.h"
  78 #include "gromacs/hardware/printhardware.h"
  79 #include "gromacs/imd/imd.h"
  80 #include "gromacs/listed_forces/disre.h"
  81 #include "gromacs/listed_forces/gpubonded.h"
  82 #include "gromacs/listed_forces/orires.h"
  83 #include "gromacs/math/functions.h"
  84 #include "gromacs/math/utilities.h"
  85 #include "gromacs/math/vec.h"
  86 #include "gromacs/mdlib/boxdeformation.h"
  87 #include "gromacs/mdlib/broadcaststructs.h"
  88 #include "gromacs/mdlib/calc_verletbuf.h"
  89 #include "gromacs/mdlib/dispersioncorrection.h"
  90 #include "gromacs/mdlib/enerdata_utils.h"
  91 #include "gromacs/mdlib/force.h"
  92 #include "gromacs/mdlib/forcerec.h"
  93 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  94 #include "gromacs/mdlib/makeconstraints.h"
  95 #include "gromacs/mdlib/md_support.h"
  96 #include "gromacs/mdlib/mdatoms.h"
  97 #include "gromacs/mdlib/membed.h"
  98 #include "gromacs/mdlib/qmmm.h"
  99 #include "gromacs/mdlib/sighandler.h"
 100 #include "gromacs/mdlib/stophandler.h"
 101 #include "gromacs/mdrun/mdmodules.h"
 102 #include "gromacs/mdrun/simulationcontext.h"
 103 #include "gromacs/mdrunutility/handlerestart.h"
 104 #include "gromacs/mdrunutility/logging.h"
 105 #include "gromacs/mdrunutility/multisim.h"
 106 #include "gromacs/mdrunutility/printtime.h"
 107 #include "gromacs/mdrunutility/threadaffinity.h"
 108 #include "gromacs/mdtypes/commrec.h"
 109 #include "gromacs/mdtypes/enerdata.h"
 110 #include "gromacs/mdtypes/fcdata.h"
 111 #include "gromacs/mdtypes/group.h"
 112 #include "gromacs/mdtypes/inputrec.h"
 113 #include "gromacs/mdtypes/md_enums.h"
 114 #include "gromacs/mdtypes/mdrunoptions.h"
 115 #include "gromacs/mdtypes/observableshistory.h"
 116 #include "gromacs/mdtypes/simulation_workload.h"
 117 #include "gromacs/mdtypes/state.h"
 118 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
 119 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 120 #include "gromacs/nbnxm/nbnxm.h"
 121 #include "gromacs/nbnxm/pairlist_tuning.h"
 122 #include "gromacs/pbcutil/pbc.h"
 123 #include "gromacs/pulling/output.h"
 124 #include "gromacs/pulling/pull.h"
 125 #include "gromacs/pulling/pull_rotation.h"
 126 #include "gromacs/restraint/manager.h"
 127 #include "gromacs/restraint/restraintmdmodule.h"
 128 #include "gromacs/restraint/restraintpotential.h"
 129 #include "gromacs/swap/swapcoords.h"
 130 #include "gromacs/taskassignment/decidegpuusage.h"
 131 #include "gromacs/taskassignment/resourcedivision.h"
 132 #include "gromacs/taskassignment/taskassignment.h"
 133 #include "gromacs/taskassignment/usergpuids.h"
 134 #include "gromacs/timing/gpu_timing.h"
 135 #include "gromacs/timing/wallcycle.h"
 136 #include "gromacs/timing/wallcyclereporting.h"
 137 #include "gromacs/topology/mtop_util.h"
 138 #include "gromacs/trajectory/trajectoryframe.h"
 139 #include "gromacs/utility/basenetwork.h"
 140 #include "gromacs/utility/cstringutil.h"
 141 #include "gromacs/utility/exceptions.h"
 142 #include "gromacs/utility/fatalerror.h"
 143 #include "gromacs/utility/filestream.h"
 144 #include "gromacs/utility/gmxassert.h"
 145 #include "gromacs/utility/gmxmpi.h"
 146 #include "gromacs/utility/keyvaluetree.h"
 147 #include "gromacs/utility/logger.h"
 148 #include "gromacs/utility/loggerbuilder.h"
 149 #include "gromacs/utility/mdmodulenotification.h"
 150 #include "gromacs/utility/physicalnodecommunicator.h"
 151 #include "gromacs/utility/pleasecite.h"
 152 #include "gromacs/utility/programcontext.h"
 153 #include "gromacs/utility/smalloc.h"
 154 #include "gromacs/utility/stringutil.h"
 155
 156 #include "isimulator.h"
 157 #include "replicaexchange.h"
 158 #include "simulatorbuilder.h"
 159
 160 #if GMX_FAHCORE
 161 #include "corewrap.h"
 162 #endif
 163
 164 namespace gmx
 165 {
 166
 167 /*! \brief environment variable to enable GPU P2P communication */
 168 static const bool c_enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr)
 169     && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
 170
 171 /*! \brief environment variable to enable GPU buffer operations */
 172 static const bool c_enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 173
 174 /*! \brief Manage any development feature flag variables encountered
 175  *
 176  * The use of dev features indicated by environment variables is
 177  * logged in order to ensure that runs with such features enabled can
 178  * be identified from their log and standard output. Any cross
 179  * dependencies are also checked, and if unsatisfied, a fatal error
 180  * issued.
 181  *
 182  * \param[in]  mdlog        Logger object.
 183  */
 184 static void manageDevelopmentFeatures(const gmx::MDLogger &mdlog)
 185 {
 186     const bool enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 187     const bool useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
 188     const bool enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
 189
 190     if (enableGpuBufOps)
 191     {
 192         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 193                 "NOTE: This run uses the 'GPU buffer ops' feature, enabled by the GMX_USE_GPU_BUFFER_OPS environment variable.");
 194     }
 195
 196     if (enableGpuHaloExchange)
 197     {
 198         if (!enableGpuBufOps)
 199         {
 200             gmx_fatal(FARGS, "Cannot enable GPU halo exchange without GPU buffer operations, set GMX_USE_GPU_BUFFER_OPS=1\n");
 201         }
 202     }
 203
 204     if (useGpuUpdateConstrain)
 205     {
 206         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 207                 "NOTE: This run uses the 'GPU update/constraints' feature, enabled by the GMX_UPDATE_CONSTRAIN_GPU environment variable.");
 208     }
 209
 210 }
 211
 212 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
 213  *
 214  * Used to ensure that the master thread does not modify mdrunner during copy
 215  * on the spawned threads. */
 216 static void threadMpiMdrunnerAccessBarrier()
 217 {
 218 #if GMX_THREAD_MPI
 219     MPI_Barrier(MPI_COMM_WORLD);
 220 #endif
 221 }
 222
 223 Mdrunner Mdrunner::cloneOnSpawnedThread() const
 224 {
 225     auto newRunner = Mdrunner(std::make_unique<MDModules>());
 226
 227     // All runners in the same process share a restraint manager resource because it is
 228     // part of the interface to the client code, which is associated only with the
 229     // original thread. Handles to the same resources can be obtained by copy.
 230     {
 231         newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
 232     }
 233
 234     // Copy members of master runner.
 235     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
 236     // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
 237     newRunner.hw_opt    = hw_opt;
 238     newRunner.filenames = filenames;
 239
 240     newRunner.oenv                = oenv;
 241     newRunner.mdrunOptions        = mdrunOptions;
 242     newRunner.domdecOptions       = domdecOptions;
 243     newRunner.nbpu_opt            = nbpu_opt;
 244     newRunner.pme_opt             = pme_opt;
 245     newRunner.pme_fft_opt         = pme_fft_opt;
 246     newRunner.bonded_opt          = bonded_opt;
 247     newRunner.update_opt          = update_opt;
 248     newRunner.nstlist_cmdline     = nstlist_cmdline;
 249     newRunner.replExParams        = replExParams;
 250     newRunner.pforce              = pforce;
 251     // Give the spawned thread the newly created valid communicator
 252     // for the simulation.
 253     newRunner.communicator        = MPI_COMM_WORLD;
 254     newRunner.ms                  = ms;
 255     newRunner.startingBehavior    = startingBehavior;
 256     newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
 257
 258     threadMpiMdrunnerAccessBarrier();
 259
 260     return newRunner;
 261 }
 262
 263 /*! \brief The callback used for running on spawned threads.
 264  *
 265  * Obtains the pointer to the master mdrunner object from the one
 266  * argument permitted to the thread-launch API call, copies it to make
 267  * a new runner for this thread, reinitializes necessary data, and
 268  * proceeds to the simulation. */
 269 static void mdrunner_start_fn(const void *arg)
 270 {
 271     try
 272     {
 273         auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg);
 274         /* copy the arg list to make sure that it's thread-local. This
 275            doesn't copy pointed-to items, of course; fnm, cr and fplog
 276            are reset in the call below, all others should be const. */
 277         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
 278         mdrunner.mdrunner();
 279     }
 280     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 281 }
 282
 283
 284 void Mdrunner::spawnThreads(int numThreadsToLaunch)
 285 {
 286 #if GMX_THREAD_MPI
 287     /* now spawn new threads that start mdrunner_start_fn(), while
 288        the main thread returns. Thread affinity is handled later. */
 289     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
 290                      mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
 291     {
 292         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
 293     }
 294
 295     // Give the master thread the newly created valid communicator for
 296     // the simulation.
 297     communicator = MPI_COMM_WORLD;
 298     threadMpiMdrunnerAccessBarrier();
 299 #else
 300     GMX_UNUSED_VALUE(numThreadsToLaunch);
 301     GMX_UNUSED_VALUE(mdrunner_start_fn);
 302 #endif
 303 }
 304
 305 }  // namespace gmx
 306
 307 /*! \brief Initialize variables for Verlet scheme simulation */
 308 static void prepare_verlet_scheme(FILE                           *fplog,
 309                                   t_commrec                      *cr,
 310                                   t_inputrec                     *ir,
 311                                   int                             nstlist_cmdline,
 312                                   const gmx_mtop_t               *mtop,
 313                                   const matrix                    box,
 314                                   bool                            makeGpuPairList,
 315                                   const gmx::CpuInfo             &cpuinfo)
 316 {
 317     /* For NVE simulations, we will retain the initial list buffer */
 318     if (EI_DYNAMICS(ir->eI) &&
 319         ir->verletbuf_tol > 0 &&
 320         !(EI_MD(ir->eI) && ir->etc == etcNO))
 321     {
 322         /* Update the Verlet buffer size for the current run setup */
 323
 324         /* Here we assume SIMD-enabled kernels are being used. But as currently
 325          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 326          * and 4x2 gives a larger buffer than 4x4, this is ok.
 327          */
 328         ListSetupType      listType  = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
 329         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
 330
 331         const real         rlist_new =
 332             calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
 333
 334         if (rlist_new != ir->rlist)
 335         {
 336             if (fplog != nullptr)
 337             {
 338                 fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 339                         ir->rlist, rlist_new,
 340                         listSetup.cluster_size_i, listSetup.cluster_size_j);
 341             }
 342             ir->rlist     = rlist_new;
 343         }
 344     }
 345
 346     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
 347     {
 348         gmx_fatal(FARGS, "Can not set nstlist without %s",
 349                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
 350     }
 351
 352     if (EI_DYNAMICS(ir->eI))
 353     {
 354         /* Set or try nstlist values */
 355         increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
 356     }
 357 }
 358
 359 /*! \brief Override the nslist value in inputrec
 360  *
 361  * with value passed on the command line (if any)
 362  */
 363 static void override_nsteps_cmdline(const gmx::MDLogger &mdlog,
 364                                     int64_t              nsteps_cmdline,
 365                                     t_inputrec          *ir)
 366 {
 367     assert(ir);
 368
 369     /* override with anything else than the default -2 */
 370     if (nsteps_cmdline > -2)
 371     {
 372         char sbuf_steps[STEPSTRSIZE];
 373         char sbuf_msg[STRLEN];
 374
 375         ir->nsteps = nsteps_cmdline;
 376         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
 377         {
 378             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
 379                     gmx_step_str(nsteps_cmdline, sbuf_steps),
 380                     fabs(nsteps_cmdline*ir->delta_t));
 381         }
 382         else
 383         {
 384             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
 385                     gmx_step_str(nsteps_cmdline, sbuf_steps));
 386         }
 387
 388         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
 389     }
 390     else if (nsteps_cmdline < -2)
 391     {
 392         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64,
 393                   nsteps_cmdline);
 394     }
 395     /* Do nothing if nsteps_cmdline == -2 */
 396 }
 397
 398 namespace gmx
 399 {
 400
 401 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
 402  *
 403  * If not, and if a warning may be issued, logs a warning about
 404  * falling back to CPU code. With thread-MPI, only the first
 405  * call to this function should have \c issueWarning true. */
 406 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
 407                                                const t_inputrec &ir,
 408                                                bool              issueWarning)
 409 {
 410     bool        gpuIsUseful = true;
 411     std::string warning;
 412
 413     if (ir.opts.ngener - ir.nwall > 1)
 414     {
 415         /* The GPU code does not support more than one energy group.
 416          * If the user requested GPUs explicitly, a fatal error is given later.
 417          */
 418         gpuIsUseful = false;
 419         warning     =
 420             "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
 421             "For better performance, run on the GPU without energy groups and then do "
 422             "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
 423     }
 424
 425     if (EI_TPI(ir.eI))
 426     {
 427         gpuIsUseful = false;
 428         warning     = "TPI is not implemented for GPUs.";
 429     }
 430
 431     if (!gpuIsUseful && issueWarning)
 432     {
 433         GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
 434     }
 435
 436     return gpuIsUseful;
 437 }
 438
 439 //! Initializes the logger for mdrun.
 440 static gmx::LoggerOwner buildLogger(FILE      *fplog,
 441                                     const bool isSimulationMasterRank)
 442 {
 443     gmx::LoggerBuilder builder;
 444     if (fplog != nullptr)
 445     {
 446         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
 447     }
 448     if (isSimulationMasterRank)
 449     {
 450         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning,
 451                                 &gmx::TextOutputFile::standardError());
 452     }
 453     return builder.build();
 454 }
 455
 456 //! Make a TaskTarget from an mdrun argument string.
 457 static TaskTarget findTaskTarget(const char *optionString)
 458 {
 459     TaskTarget returnValue = TaskTarget::Auto;
 460
 461     if (strncmp(optionString, "auto", 3) == 0)
 462     {
 463         returnValue = TaskTarget::Auto;
 464     }
 465     else if (strncmp(optionString, "cpu", 3) == 0)
 466     {
 467         returnValue = TaskTarget::Cpu;
 468     }
 469     else if (strncmp(optionString, "gpu", 3) == 0)
 470     {
 471         returnValue = TaskTarget::Gpu;
 472     }
 473     else
 474     {
 475         GMX_ASSERT(false, "Option string should have been checked for sanity already");
 476     }
 477
 478     return returnValue;
 479 }
 480
 481 //! Finish run, aggregate data to print performance info.
 482 static void finish_run(FILE *fplog,
 483                        const gmx::MDLogger &mdlog,
 484                        const t_commrec *cr,
 485                        const t_inputrec *inputrec,
 486                        t_nrnb nrnb[], gmx_wallcycle_t wcycle,
 487                        gmx_walltime_accounting_t walltime_accounting,
 488                        nonbonded_verlet_t *nbv,
 489                        const gmx_pme_t *pme,
 490                        gmx_bool bWriteStat)
 491 {
 492     double  delta_t  = 0;
 493     double  nbfs     = 0, mflop = 0;
 494     double  elapsed_time,
 495             elapsed_time_over_all_ranks,
 496             elapsed_time_over_all_threads,
 497             elapsed_time_over_all_threads_over_all_ranks;
 498     /* Control whether it is valid to print a report. Only the
 499        simulation master may print, but it should not do so if the run
 500        terminated e.g. before a scheduled reset step. This is
 501        complicated by the fact that PME ranks are unaware of the
 502        reason why they were sent a pmerecvqxFINISH. To avoid
 503        communication deadlocks, we always do the communication for the
 504        report, even if we've decided not to write the report, because
 505        how long it takes to finish the run is not important when we've
 506        decided not to report on the simulation performance.
 507
 508        Further, we only report performance for dynamical integrators,
 509        because those are the only ones for which we plan to
 510        consider doing any optimizations. */
 511     bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
 512
 513     if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
 514     {
 515         GMX_LOG(mdlog.warning).asParagraph().appendText("Simulation ended prematurely, no performance report will be written.");
 516         printReport = false;
 517     }
 518
 519     t_nrnb                  *nrnb_tot;
 520     std::unique_ptr<t_nrnb>  nrnbTotalStorage;
 521     if (cr->nnodes > 1)
 522     {
 523         nrnbTotalStorage = std::make_unique<t_nrnb>();
 524         nrnb_tot         = nrnbTotalStorage.get();
 525 #if GMX_MPI
 526         MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
 527                       cr->mpi_comm_mysim);
 528 #endif
 529     }
 530     else
 531     {
 532         nrnb_tot = nrnb;
 533     }
 534
 535     elapsed_time                  = walltime_accounting_get_time_since_reset(walltime_accounting);
 536     elapsed_time_over_all_threads = walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
 537     if (cr->nnodes > 1)
 538     {
 539 #if GMX_MPI
 540         /* reduce elapsed_time over all MPI ranks in the current simulation */
 541         MPI_Allreduce(&elapsed_time,
 542                       &elapsed_time_over_all_ranks,
 543                       1, MPI_DOUBLE, MPI_SUM,
 544                       cr->mpi_comm_mysim);
 545         elapsed_time_over_all_ranks /= cr->nnodes;
 546         /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
 547          * current simulation. */
 548         MPI_Allreduce(&elapsed_time_over_all_threads,
 549                       &elapsed_time_over_all_threads_over_all_ranks,
 550                       1, MPI_DOUBLE, MPI_SUM,
 551                       cr->mpi_comm_mysim);
 552 #endif
 553     }
 554     else
 555     {
 556         elapsed_time_over_all_ranks                  = elapsed_time;
 557         elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
 558     }
 559
 560     if (printReport)
 561     {
 562         print_flop(fplog, nrnb_tot, &nbfs, &mflop);
 563     }
 564
 565     if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
 566     {
 567         print_dd_statistics(cr, inputrec, fplog);
 568     }
 569
 570     /* TODO Move the responsibility for any scaling by thread counts
 571      * to the code that handled the thread region, so that there's a
 572      * mechanism to keep cycle counting working during the transition
 573      * to task parallelism. */
 574     int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 575     int nthreads_pme = gmx_omp_nthreads_get(emntPME);
 576     wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), nthreads_pp, nthreads_pme);
 577     auto cycle_sum(wallcycle_sum(cr, wcycle));
 578
 579     if (printReport)
 580     {
 581         auto                    nbnxn_gpu_timings = (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
 582         gmx_wallclock_gpu_pme_t pme_gpu_timings   = {};
 583
 584         if (pme_gpu_task_enabled(pme))
 585         {
 586             pme_gpu_get_timings(pme, &pme_gpu_timings);
 587         }
 588         wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
 589                         elapsed_time_over_all_ranks,
 590                         wcycle, cycle_sum,
 591                         nbnxn_gpu_timings,
 592                         &pme_gpu_timings);
 593
 594         if (EI_DYNAMICS(inputrec->eI))
 595         {
 596             delta_t = inputrec->delta_t;
 597         }
 598
 599         if (fplog)
 600         {
 601             print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
 602                        elapsed_time_over_all_ranks,
 603                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 604                        delta_t, nbfs, mflop);
 605         }
 606         if (bWriteStat)
 607         {
 608             print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
 609                        elapsed_time_over_all_ranks,
 610                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 611                        delta_t, nbfs, mflop);
 612         }
 613     }
 614 }
 615
 616 int Mdrunner::mdrunner()
 617 {
 618     matrix                    box;
 619     t_forcerec               *fr               = nullptr;
 620     t_fcdata                 *fcd              = nullptr;
 621     real                      ewaldcoeff_q     = 0;
 622     real                      ewaldcoeff_lj    = 0;
 623     int                       nChargePerturbed = -1, nTypePerturbed = 0;
 624     gmx_wallcycle_t           wcycle;
 625     gmx_walltime_accounting_t walltime_accounting = nullptr;
 626     gmx_membed_t *            membed              = nullptr;
 627     gmx_hw_info_t            *hwinfo              = nullptr;
 628
 629     /* CAUTION: threads may be started later on in this function, so
 630        cr doesn't reflect the final parallel state right now */
 631     gmx_mtop_t                      mtop;
 632
 633     bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data());
 634     bool doRerun  = mdrunOptions.rerun;
 635
 636     // Handle task-assignment related user options.
 637     EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
 638                                                EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
 639
 640     std::vector<int> userGpuTaskAssignment;
 641     try
 642     {
 643         userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
 644     }
 645     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 646     auto       nonbondedTarget = findTaskTarget(nbpu_opt);
 647     auto       pmeTarget       = findTaskTarget(pme_opt);
 648     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
 649     auto       bondedTarget    = findTaskTarget(bonded_opt);
 650     auto       updateTarget    = findTaskTarget(update_opt);
 651     PmeRunMode pmeRunMode      = PmeRunMode::None;
 652
 653     FILE      *fplog = nullptr;
 654     // If we are appending, we don't write log output because we need
 655     // to check that the old log file matches what the checkpoint file
 656     // expects. Otherwise, we should start to write log output now if
 657     // there is a file ready for it.
 658     if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
 659     {
 660         fplog = gmx_fio_getfp(logFileHandle);
 661     }
 662     const bool       isSimulationMasterRank = findIsSimulationMasterRank(ms, communicator);
 663     gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank));
 664     gmx::MDLogger    mdlog(logOwner.logger());
 665
 666     // report any development features that may be enabled by environment variables
 667     manageDevelopmentFeatures(mdlog);
 668
 669     // TODO The thread-MPI master rank makes a working
 670     // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks
 671     // after the threads have been launched. This works because no use
 672     // is made of that communicator until after the execution paths
 673     // have rejoined. But it is likely that we can improve the way
 674     // this is expressed, e.g. by expressly running detection only the
 675     // master rank for thread-MPI, rather than relying on the mutex
 676     // and reference count.
 677     PhysicalNodeCommunicator physicalNodeComm(communicator, gmx_physicalnode_id_hash());
 678     hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
 679
 680     gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
 681
 682     std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
 683
 684     // Print citation requests after all software/hardware printing
 685     pleaseCiteGromacs(fplog);
 686
 687     // TODO Replace this by unique_ptr once t_inputrec is C++
 688     t_inputrec               inputrecInstance;
 689     t_inputrec              *inputrec = nullptr;
 690     std::unique_ptr<t_state> globalState;
 691
 692     auto                     partialDeserializedTpr = std::make_unique<PartialDeserializedTprFile>();
 693
 694     if (isSimulationMasterRank)
 695     {
 696         /* Only the master rank has the global state */
 697         globalState = std::make_unique<t_state>();
 698
 699         /* Read (nearly) all data required for the simulation
 700          * and keep the partly serialized tpr contents to send to other ranks later
 701          */
 702         *partialDeserializedTpr = read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), &inputrecInstance, globalState.get(), &mtop);
 703         inputrec                = &inputrecInstance;
 704     }
 705
 706     /* Check and update the hardware options for internal consistency */
 707     checkAndUpdateHardwareOptions(mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks, inputrec);
 708
 709     if (GMX_THREAD_MPI && isSimulationMasterRank)
 710     {
 711         bool useGpuForNonbonded = false;
 712         bool useGpuForPme       = false;
 713         try
 714         {
 715             GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
 716
 717             // If the user specified the number of ranks, then we must
 718             // respect that, but in default mode, we need to allow for
 719             // the number of GPUs to choose the number of ranks.
 720             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 721             useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
 722                     (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
 723                     canUseGpuForNonbonded,
 724                     gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
 725                     hw_opt.nthreads_tmpi);
 726             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
 727                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
 728                     *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
 729
 730         }
 731         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 732
 733         /* Determine how many thread-MPI ranks to start.
 734          *
 735          * TODO Over-writing the user-supplied value here does
 736          * prevent any possible subsequent checks from working
 737          * correctly. */
 738         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
 739                                                 &hw_opt,
 740                                                 gpuIdsToUse,
 741                                                 useGpuForNonbonded,
 742                                                 useGpuForPme,
 743                                                 inputrec, &mtop,
 744                                                 mdlog,
 745                                                 doMembed);
 746
 747         // Now start the threads for thread MPI.
 748         spawnThreads(hw_opt.nthreads_tmpi);
 749         // The spawned threads enter mdrunner() and execution of
 750         // master and spawned threads joins at the end of this block.
 751         physicalNodeComm = PhysicalNodeCommunicator(communicator, gmx_physicalnode_id_hash());
 752     }
 753
 754     GMX_RELEASE_ASSERT(communicator == MPI_COMM_WORLD, "Must have valid world communicator");
 755     CommrecHandle crHandle = init_commrec(communicator, ms);
 756     t_commrec    *cr       = crHandle.get();
 757     GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec");
 758
 759     if (PAR(cr))
 760     {
 761         /* now broadcast everything to the non-master nodes/threads: */
 762         if (!isSimulationMasterRank)
 763         {
 764             inputrec = &inputrecInstance;
 765         }
 766         init_parallel(cr, inputrec, &mtop, partialDeserializedTpr.get());
 767     }
 768     GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now");
 769     partialDeserializedTpr.reset(nullptr);
 770
 771     // Now the number of ranks is known to all ranks, and each knows
 772     // the inputrec read by the master rank. The ranks can now all run
 773     // the task-deciding functions and will agree on the result
 774     // without needing to communicate.
 775     //
 776     // TODO Should we do the communication in debug mode to support
 777     // having an assertion?
 778     //
 779     // Note that these variables describe only their own node.
 780     //
 781     // Note that when bonded interactions run on a GPU they always run
 782     // alongside a nonbonded task, so do not influence task assignment
 783     // even though they affect the force calculation workload.
 784     bool useGpuForNonbonded     = false;
 785     bool useGpuForPme           = false;
 786     bool useGpuForBonded        = false;
 787     bool useGpuForUpdate        = false;
 788     bool gpusWereDetected       = hwinfo->ngpu_compatible_tot > 0;
 789     try
 790     {
 791         // It's possible that there are different numbers of GPUs on
 792         // different nodes, which is the user's responsibility to
 793         // handle. If unsuitable, we will notice that during task
 794         // assignment.
 795         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 796         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
 797                                                                 emulateGpuNonbonded,
 798                                                                 canUseGpuForNonbonded,
 799                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI),
 800                                                                 gpusWereDetected);
 801         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
 802                                                     *hwinfo, *inputrec, mtop,
 803                                                     cr->nnodes, domdecOptions.numPmeRanks,
 804                                                     gpusWereDetected);
 805         auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
 806         useGpuForBonded =
 807             decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
 808                                             bondedTarget, canUseGpuForBonded,
 809                                             EVDW_PME(inputrec->vdwtype),
 810                                             EEL_PME_EWALD(inputrec->coulombtype),
 811                                             domdecOptions.numPmeRanks, gpusWereDetected);
 812
 813         pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
 814         if (pmeRunMode == PmeRunMode::GPU)
 815         {
 816             if (pmeFftTarget == TaskTarget::Cpu)
 817             {
 818                 pmeRunMode = PmeRunMode::Mixed;
 819             }
 820         }
 821         else if (pmeFftTarget == TaskTarget::Gpu)
 822         {
 823             gmx_fatal(FARGS, "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME on CPU you should not be using -pmefft.");
 824         }
 825     }
 826     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 827
 828     // Build restraints.
 829     // TODO: hide restraint implementation details from Mdrunner.
 830     // There is nothing unique about restraints at this point as far as the
 831     // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
 832     // factory functions from the SimulationContext on which to call mdModules_->add().
 833     // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
 834     for (auto && restraint : restraintManager_->getRestraints())
 835     {
 836         auto module = RestraintMDModule::create(restraint,
 837                                                 restraint->sites());
 838         mdModules_->add(std::move(module));
 839     }
 840
 841     // TODO: Error handling
 842     mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
 843     const auto &mdModulesNotifier = mdModules_->notifier().notifier_;
 844
 845     if (inputrec->internalParameters != nullptr)
 846     {
 847         mdModulesNotifier.notify(*inputrec->internalParameters);
 848     }
 849
 850     if (fplog != nullptr)
 851     {
 852         pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
 853         fprintf(fplog, "\n");
 854     }
 855
 856     if (SIMMASTER(cr))
 857     {
 858         /* In rerun, set velocities to zero if present */
 859         if (doRerun && ((globalState->flags & (1 << estV)) != 0))
 860         {
 861             // rerun does not use velocities
 862             GMX_LOG(mdlog.info).asParagraph().appendText(
 863                     "Rerun trajectory contains velocities. Rerun does only evaluate "
 864                     "potential energy and forces. The velocities will be ignored.");
 865             for (int i = 0; i < globalState->natoms; i++)
 866             {
 867                 clear_rvec(globalState->v[i]);
 868             }
 869             globalState->flags &= ~(1 << estV);
 870         }
 871
 872         /* now make sure the state is initialized and propagated */
 873         set_state_entries(globalState.get(), inputrec);
 874     }
 875
 876     /* NM and TPI parallelize over force/energy calculations, not atoms,
 877      * so we need to initialize and broadcast the global state.
 878      */
 879     if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
 880     {
 881         if (!MASTER(cr))
 882         {
 883             globalState = std::make_unique<t_state>();
 884         }
 885         broadcastStateWithoutDynamics(cr, globalState.get());
 886     }
 887
 888     /* A parallel command line option consistency check that we can
 889        only do after any threads have started. */
 890     if (!PAR(cr) && (domdecOptions.numCells[XX] > 1 ||
 891                      domdecOptions.numCells[YY] > 1 ||
 892                      domdecOptions.numCells[ZZ] > 1 ||
 893                      domdecOptions.numPmeRanks > 0))
 894     {
 895         gmx_fatal(FARGS,
 896                   "The -dd or -npme option request a parallel simulation, "
 897 #if !GMX_MPI
 898                   "but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv));
 899 #elif GMX_THREAD_MPI
 900                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
 901 #else
 902                   "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv));
 903 #endif
 904     }
 905
 906     if (doRerun &&
 907         (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 908     {
 909         gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 910     }
 911
 912     if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
 913     {
 914         if (domdecOptions.numPmeRanks > 0)
 915         {
 916             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
 917                                  "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
 918         }
 919
 920         domdecOptions.numPmeRanks = 0;
 921     }
 922
 923     if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
 924     {
 925         /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
 926          * improve performance with many threads per GPU, since our OpenMP
 927          * scaling is bad, but it's difficult to automate the setup.
 928          */
 929         domdecOptions.numPmeRanks = 0;
 930     }
 931     if (useGpuForPme)
 932     {
 933         if (domdecOptions.numPmeRanks < 0)
 934         {
 935             domdecOptions.numPmeRanks = 0;
 936             // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
 937         }
 938         else
 939         {
 940             GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported");
 941         }
 942     }
 943
 944 #if GMX_FAHCORE
 945     if (MASTER(cr))
 946     {
 947         fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
 948     }
 949 #endif
 950
 951     /* NMR restraints must be initialized before load_checkpoint,
 952      * since with time averaging the history is added to t_state.
 953      * For proper consistency check we therefore need to extend
 954      * t_state here.
 955      * So the PME-only nodes (if present) will also initialize
 956      * the distance restraints.
 957      */
 958     snew(fcd, 1);
 959
 960     /* This needs to be called before read_checkpoint to extend the state */
 961     init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
 962
 963     init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
 964
 965     auto                 deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
 966
 967     ObservablesHistory   observablesHistory = {};
 968
 969     if (startingBehavior != StartingBehavior::NewSimulation)
 970     {
 971         /* Check if checkpoint file exists before doing continuation.
 972          * This way we can use identical input options for the first and subsequent runs...
 973          */
 974         if (mdrunOptions.numStepsCommandline > -2)
 975         {
 976             /* Temporarily set the number of steps to unlimited to avoid
 977              * triggering the nsteps check in load_checkpoint().
 978              * This hack will go away soon when the -nsteps option is removed.
 979              */
 980             inputrec->nsteps = -1;
 981         }
 982
 983         load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr),
 984                         logFileHandle,
 985                         cr, domdecOptions.numCells,
 986                         inputrec, globalState.get(),
 987                         &observablesHistory,
 988                         mdrunOptions.reproducible, mdModules_->notifier());
 989
 990         if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
 991         {
 992             // Now we can start normal logging to the truncated log file.
 993             fplog    = gmx_fio_getfp(logFileHandle);
 994             prepareLogAppending(fplog);
 995             logOwner = buildLogger(fplog, MASTER(cr));
 996             mdlog    = logOwner.logger();
 997         }
 998     }
 999
1000     if (mdrunOptions.numStepsCommandline > -2)
1001     {
1002         GMX_LOG(mdlog.info).asParagraph().
1003             appendText("The -nsteps functionality is deprecated, and may be removed in a future version. "
1004                        "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp file field.");
1005     }
1006     /* override nsteps with value set on the commandline */
1007     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
1008
1009     if (SIMMASTER(cr))
1010     {
1011         copy_mat(globalState->box, box);
1012     }
1013
1014     if (PAR(cr))
1015     {
1016         gmx_bcast(sizeof(box), box, cr);
1017     }
1018
1019     if (inputrec->cutoff_scheme != ecutsVERLET)
1020     {
1021         gmx_fatal(FARGS, "This group-scheme .tpr file can no longer be run by mdrun. Please update to the Verlet scheme, or use an earlier version of GROMACS if necessary.");
1022     }
1023     /* Update rlist and nstlist. */
1024     prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
1025                           useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
1026
1027     const bool prefer1DAnd1PulseDD = (c_enableGpuHaloExchange && useGpuForNonbonded);
1028     // This builder is necessary while we have multi-part construction
1029     // of DD. Before DD is constructed, we use the existence of
1030     // the builder object to indicate that further construction of DD
1031     // is needed.
1032     std::unique_ptr<DomainDecompositionBuilder> ddBuilder;
1033     if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
1034                      inputrec->eI == eiNM))
1035     {
1036         ddBuilder = std::make_unique<DomainDecompositionBuilder>
1037                 (mdlog, cr, domdecOptions, mdrunOptions,
1038                 prefer1DAnd1PulseDD, mtop, *inputrec,
1039                 box, positionsFromStatePointer(globalState.get()));
1040     }
1041     else
1042     {
1043         /* PME, if used, is done on all nodes with 1D decomposition */
1044         cr->npmenodes = 0;
1045         cr->duty      = (DUTY_PP | DUTY_PME);
1046
1047         if (inputrec->ePBC == epbcSCREW)
1048         {
1049             gmx_fatal(FARGS,
1050                       "pbc=screw is only implemented with domain decomposition");
1051         }
1052     }
1053
1054     // Produce the task assignment for this rank.
1055     GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
1056     GpuTaskAssignments        gpuTaskAssignments =
1057         gpuTaskAssignmentsBuilder.build(gpuIdsToUse,
1058                                         userGpuTaskAssignment,
1059                                         *hwinfo,
1060                                         cr,
1061                                         ms,
1062                                         physicalNodeComm,
1063                                         nonbondedTarget,
1064                                         pmeTarget,
1065                                         bondedTarget,
1066                                         updateTarget,
1067                                         useGpuForNonbonded,
1068                                         useGpuForPme,
1069                                         thisRankHasDuty(cr, DUTY_PP),
1070                                         // TODO cr->duty & DUTY_PME should imply that a PME
1071                                         // algorithm is active, but currently does not.
1072                                         EEL_PME(inputrec->coulombtype) &&
1073                                         thisRankHasDuty(cr, DUTY_PME));
1074
1075     const bool printHostName = (cr->nnodes > 1);
1076     gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode);
1077
1078     // If the user chose a task assignment, give them some hints
1079     // where appropriate.
1080     if (!userGpuTaskAssignment.empty())
1081     {
1082         gpuTaskAssignments.logPerformanceHints(mdlog,
1083                                                ssize(gpuIdsToUse));
1084     }
1085
1086     // Get the device handles for the modules, nullptr when no task is assigned.
1087     gmx_device_info_t *nonbondedDeviceInfo   = gpuTaskAssignments.initNonbondedDevice(cr);
1088     gmx_device_info_t *pmeDeviceInfo         = gpuTaskAssignments.initPmeDevice();
1089
1090     // TODO Initialize GPU streams here.
1091
1092     // TODO Currently this is always built, yet DD partition code
1093     // checks if it is built before using it. Probably it should
1094     // become an MDModule that is made only when another module
1095     // requires it (e.g. pull, CompEl, density fitting), so that we
1096     // don't update the local atom sets unilaterally every step.
1097     LocalAtomSetManager atomSets;
1098     if (ddBuilder)
1099     {
1100         // TODO Pass the GPU streams to ddBuilder to use in buffer
1101         // transfers (e.g. halo exchange)
1102         cr->dd = ddBuilder->build(&atomSets);
1103         // The builder's job is done, so destruct it
1104         ddBuilder.reset(nullptr);
1105         // Note that local state still does not exist yet.
1106     }
1107
1108     if (PAR(cr))
1109     {
1110         /* After possible communicator splitting in make_dd_communicators.
1111          * we can set up the intra/inter node communication.
1112          */
1113         gmx_setup_nodecomm(fplog, cr);
1114     }
1115
1116 #if GMX_MPI
1117     if (isMultiSim(ms))
1118     {
1119         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1120                 "This is simulation %d out of %d running as a composite GROMACS\n"
1121                 "multi-simulation job. Setup for this simulation:\n",
1122                 ms->sim, ms->nsim);
1123     }
1124     GMX_LOG(mdlog.warning).appendTextFormatted(
1125             "Using %d MPI %s\n",
1126             cr->nnodes,
1127 #if GMX_THREAD_MPI
1128             cr->nnodes == 1 ? "thread" : "threads"
1129 #else
1130             cr->nnodes == 1 ? "process" : "processes"
1131 #endif
1132             );
1133     fflush(stderr);
1134 #endif
1135
1136     // If mdrun -pin auto honors any affinity setting that already
1137     // exists. If so, it is nice to provide feedback about whether
1138     // that existing affinity setting was from OpenMP or something
1139     // else, so we run this code both before and after we initialize
1140     // the OpenMP support.
1141     gmx_check_thread_affinity_set(mdlog,
1142                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
1143     /* Check and update the number of OpenMP threads requested */
1144     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
1145                                             pmeRunMode, mtop, *inputrec);
1146
1147     gmx_omp_nthreads_init(mdlog, cr,
1148                           hwinfo->nthreads_hw_avail,
1149                           physicalNodeComm.size_,
1150                           hw_opt.nthreads_omp,
1151                           hw_opt.nthreads_omp_pme,
1152                           !thisRankHasDuty(cr, DUTY_PP));
1153
1154     // Enable FP exception detection, but not in
1155     // Release mode and not for compilers with known buggy FP
1156     // exception support (clang with any optimization) or suspected
1157     // buggy FP exception support (gcc 7.* with optimization).
1158 #if !defined NDEBUG && \
1159     !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
1160     && defined __OPTIMIZE__)
1161     const bool bEnableFPE = true;
1162 #else
1163     const bool bEnableFPE = false;
1164 #endif
1165     //FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
1166     if (bEnableFPE)
1167     {
1168         gmx_feenableexcept();
1169     }
1170
1171     /* Now that we know the setup is consistent, check for efficiency */
1172     check_resource_division_efficiency(hwinfo,
1173                                        gpuTaskAssignments.thisRankHasAnyGpuTask(),
1174                                        mdrunOptions.ntompOptionIsSet,
1175                                        cr,
1176                                        mdlog);
1177
1178     /* getting number of PP/PME threads on this MPI / tMPI rank.
1179        PME: env variable should be read only on one node to make sure it is
1180        identical everywhere;
1181      */
1182     const int numThreadsOnThisRank =
1183         thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded) : gmx_omp_nthreads_get(emntPME);
1184     checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid,
1185                                   *hwinfo->hardwareTopology,
1186                                   physicalNodeComm, mdlog);
1187
1188     if (hw_opt.threadAffinity != ThreadAffinity::Off)
1189     {
1190         /* Before setting affinity, check whether the affinity has changed
1191          * - which indicates that probably the OpenMP library has changed it
1192          * since we first checked).
1193          */
1194         gmx_check_thread_affinity_set(mdlog,
1195                                       &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
1196
1197         int numThreadsOnThisNode, intraNodeThreadOffset;
1198         analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
1199                                  &intraNodeThreadOffset);
1200
1201         /* Set the CPU affinity */
1202         gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology,
1203                                 numThreadsOnThisRank, numThreadsOnThisNode,
1204                                 intraNodeThreadOffset, nullptr);
1205     }
1206
1207     if (mdrunOptions.timingOptions.resetStep > -1)
1208     {
1209         GMX_LOG(mdlog.info).asParagraph().
1210             appendText("The -resetstep functionality is deprecated, and may be removed in a future version.");
1211     }
1212     wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1213
1214     if (PAR(cr))
1215     {
1216         /* Master synchronizes its value of reset_counters with all nodes
1217          * including PME only nodes */
1218         int64_t reset_counters = wcycle_get_reset_counters(wcycle);
1219         gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
1220         wcycle_set_reset_counters(wcycle, reset_counters);
1221     }
1222
1223     // Membrane embedding must be initialized before we call init_forcerec()
1224     if (doMembed)
1225     {
1226         if (MASTER(cr))
1227         {
1228             fprintf(stderr, "Initializing membed");
1229         }
1230         /* Note that membed cannot work in parallel because mtop is
1231          * changed here. Fix this if we ever want to make it run with
1232          * multiple ranks. */
1233         membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec, globalState.get(), cr,
1234                              &mdrunOptions
1235                                  .checkpointOptions.period);
1236     }
1237
1238     const bool                   thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
1239     std::unique_ptr<MDAtoms>     mdAtoms;
1240     std::unique_ptr<gmx_vsite_t> vsite;
1241
1242     t_nrnb nrnb;
1243     if (thisRankHasDuty(cr, DUTY_PP))
1244     {
1245         mdModulesNotifier.notify(*cr);
1246         mdModulesNotifier.notify(&atomSets);
1247         mdModulesNotifier.notify(PeriodicBoundaryConditionType {inputrec->ePBC});
1248         /* Initiate forcerecord */
1249         fr                 = new t_forcerec;
1250         fr->forceProviders = mdModules_->initForceProviders();
1251         init_forcerec(fplog, mdlog, fr, fcd,
1252                       inputrec, &mtop, cr, box,
1253                       opt2fn("-table", filenames.size(), filenames.data()),
1254                       opt2fn("-tablep", filenames.size(), filenames.data()),
1255                       opt2fns("-tableb", filenames.size(), filenames.data()),
1256                       *hwinfo, nonbondedDeviceInfo,
1257                       useGpuForBonded,
1258                       pforce,
1259                       wcycle);
1260
1261         // TODO Move this to happen during domain decomposition setup,
1262         // once stream and event handling works well with that.
1263         // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
1264         if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
1265         {
1266             GMX_RELEASE_ASSERT(c_enableGpuBufOps, "Must use GMX_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
1267             void *streamLocal                   = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
1268             void *streamNonLocal                =
1269                 Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
1270             void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event();
1271             GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1272                     "NOTE: This run uses the 'GPU halo exchange' feature, enabled by the GMX_GPU_DD_COMMS environment variable.");
1273             cr->dd->gpuHaloExchange = std::make_unique<GpuHaloExchange>(cr->dd, cr->mpi_comm_mysim, streamLocal,
1274                                                                         streamNonLocal, coordinatesOnDeviceEvent);
1275         }
1276
1277         /* Initialize the mdAtoms structure.
1278          * mdAtoms is not filled with atom data,
1279          * as this can not be done now with domain decomposition.
1280          */
1281         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1282         if (globalState && thisRankHasPmeGpuTask)
1283         {
1284             // The pinning of coordinates in the global state object works, because we only use
1285             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1286             // points to the global state object without DD.
1287             // FIXME: MD and EM separately set up the local state - this should happen in the same function,
1288             // which should also perform the pinning.
1289             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1290         }
1291
1292         /* Initialize the virtual site communication */
1293         vsite = initVsite(mtop, cr);
1294
1295         calc_shifts(box, fr->shift_vec);
1296
1297         /* With periodic molecules the charge groups should be whole at start up
1298          * and the virtual sites should not be far from their proper positions.
1299          */
1300         if (!inputrec->bContinuation && MASTER(cr) &&
1301             !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
1302         {
1303             /* Make molecules whole at start of run */
1304             if (fr->ePBC != epbcNONE)
1305             {
1306                 do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
1307             }
1308             if (vsite)
1309             {
1310                 /* Correct initial vsite positions are required
1311                  * for the initial distribution in the domain decomposition
1312                  * and for the initial shell prediction.
1313                  */
1314                 constructVsitesGlobal(mtop, globalState->x);
1315             }
1316         }
1317
1318         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1319         {
1320             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1321             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1322         }
1323     }
1324     else
1325     {
1326         /* This is a PME only node */
1327
1328         GMX_ASSERT(globalState == nullptr, "We don't need the state on a PME only rank and expect it to be unitialized");
1329
1330         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1331         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1332     }
1333
1334     gmx_pme_t *sepPmeData = nullptr;
1335     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1336     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), "Double-checking that only PME-only ranks have no forcerec");
1337     gmx_pme_t * &pmedata = fr ? fr->pmedata : sepPmeData;
1338
1339     // TODO should live in ewald module once its testing is improved
1340     //
1341     // Later, this program could contain kernels that might be later
1342     // re-used as auto-tuning progresses, or subsequent simulations
1343     // are invoked.
1344     PmeGpuProgramStorage pmeGpuProgram;
1345     if (thisRankHasPmeGpuTask)
1346     {
1347         pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
1348     }
1349
1350     /* Initiate PME if necessary,
1351      * either on all nodes or on dedicated PME nodes only. */
1352     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1353     {
1354         if (mdAtoms && mdAtoms->mdatoms())
1355         {
1356             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1357             if (EVDW_PME(inputrec->vdwtype))
1358             {
1359                 nTypePerturbed   = mdAtoms->mdatoms()->nTypePerturbed;
1360             }
1361         }
1362         if (cr->npmenodes > 0)
1363         {
1364             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1365             gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
1366             gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
1367         }
1368
1369         if (thisRankHasDuty(cr, DUTY_PME))
1370         {
1371             try
1372             {
1373                 pmedata = gmx_pme_init(cr,
1374                                        getNumPmeDomains(cr->dd),
1375                                        inputrec,
1376                                        nChargePerturbed != 0, nTypePerturbed != 0,
1377                                        mdrunOptions.reproducible,
1378                                        ewaldcoeff_q, ewaldcoeff_lj,
1379                                        gmx_omp_nthreads_get(emntPME),
1380                                        pmeRunMode, nullptr,
1381                                        pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
1382             }
1383             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1384         }
1385     }
1386
1387
1388     if (EI_DYNAMICS(inputrec->eI))
1389     {
1390         /* Turn on signal handling on all nodes */
1391         /*
1392          * (A user signal from the PME nodes (if any)
1393          * is communicated to the PP nodes.
1394          */
1395         signal_handler_install();
1396     }
1397
1398     pull_t *pull_work = nullptr;
1399     if (thisRankHasDuty(cr, DUTY_PP))
1400     {
1401         /* Assumes uniform use of the number of OpenMP threads */
1402         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
1403
1404         if (inputrec->bPull)
1405         {
1406             /* Initialize pull code */
1407             pull_work =
1408                 init_pull(fplog, inputrec->pull, inputrec,
1409                           &mtop, cr, &atomSets, inputrec->fepvals->init_lambda);
1410             if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
1411             {
1412                 initPullHistory(pull_work, &observablesHistory);
1413             }
1414             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1415             {
1416                 init_pull_output_files(pull_work,
1417                                        filenames.size(), filenames.data(), oenv,
1418                                        startingBehavior);
1419             }
1420         }
1421
1422         std::unique_ptr<EnforcedRotation> enforcedRotation;
1423         if (inputrec->bRot)
1424         {
1425             /* Initialize enforced rotation code */
1426             enforcedRotation = init_rot(fplog,
1427                                         inputrec,
1428                                         filenames.size(),
1429                                         filenames.data(),
1430                                         cr,
1431                                         &atomSets,
1432                                         globalState.get(),
1433                                         &mtop,
1434                                         oenv,
1435                                         mdrunOptions,
1436                                         startingBehavior);
1437         }
1438
1439         t_swap *swap = nullptr;
1440         if (inputrec->eSwapCoords != eswapNO)
1441         {
1442             /* Initialize ion swapping code */
1443             swap = init_swapcoords(fplog, inputrec,
1444                                    opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
1445                                    &mtop, globalState.get(), &observablesHistory,
1446                                    cr, &atomSets, oenv, mdrunOptions,
1447                                    startingBehavior);
1448         }
1449
1450         /* Let makeConstraints know whether we have essential dynamics constraints.
1451          * TODO: inputrec should tell us whether we use an algorithm, not a file option or the checkpoint
1452          */
1453         bool doEssentialDynamics = (opt2fn_null("-ei", filenames.size(), filenames.data()) != nullptr
1454                                     || observablesHistory.edsamHistory);
1455         auto constr              = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics,
1456                                                    fplog, *mdAtoms->mdatoms(),
1457                                                    cr, ms, &nrnb, wcycle, fr->bMolPBC);
1458
1459         /* Energy terms and groups */
1460         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(), inputrec->fepvals->n_lambda);
1461
1462         /* Kinetic energy data */
1463         gmx_ekindata_t ekind;
1464         init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel);
1465
1466         /* Set up interactive MD (IMD) */
1467         auto imdSession = makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
1468                                          MASTER(cr) ? globalState->x.rvec_array() : nullptr,
1469                                          filenames.size(), filenames.data(), oenv, mdrunOptions.imdOptions,
1470                                          startingBehavior);
1471
1472         if (DOMAINDECOMP(cr))
1473         {
1474             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1475             /* This call is not included in init_domain_decomposition mainly
1476              * because fr->cginfo_mb is set later.
1477              */
1478             dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
1479                             domdecOptions.checkBondedInteractions,
1480                             fr->cginfo_mb);
1481         }
1482
1483         if (updateTarget == TaskTarget::Gpu)
1484         {
1485             if (SIMMASTER(cr))
1486             {
1487                 gmx_fatal(FARGS, "It is currently not possible to redirect the calculation "
1488                           "of update and constraints to the GPU!");
1489             }
1490         }
1491
1492         // Before we start the actual simulator, try if we can run the update task on the GPU.
1493         useGpuForUpdate = decideWhetherToUseGpuForUpdate(DOMAINDECOMP(cr),
1494                                                          useGpuForPme,
1495                                                          useGpuForNonbonded,
1496                                                          c_enableGpuBufOps,
1497                                                          updateTarget,
1498                                                          gpusWereDetected,
1499                                                          *inputrec,
1500                                                          *mdAtoms,
1501                                                          doEssentialDynamics,
1502                                                          fcd->orires.nr != 0,
1503                                                          fcd->disres.nsystems != 0);
1504
1505         const void *commandStream = ((GMX_GPU == GMX_GPU_OPENCL) && thisRankHasPmeGpuTask) ? pme_gpu_get_device_stream(fr->pmedata) : nullptr;
1506         const void *gpuContext    = ((GMX_GPU == GMX_GPU_OPENCL) && thisRankHasPmeGpuTask) ? pme_gpu_get_device_context(fr->pmedata) : nullptr;
1507         const int   paddingSize   = pme_gpu_get_padding_size(fr->pmedata);
1508
1509         const bool  inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
1510                     false,
1511                     inputrec, doRerun, vsite.get(), ms, replExParams,
1512                     fcd, static_cast<int>(filenames.size()), filenames.data(),
1513                     &observablesHistory, membed);
1514
1515         const bool          useModularSimulator = inputIsCompatibleWithModularSimulator && !(getenv("GMX_DISABLE_MODULAR_SIMULATOR") != nullptr);
1516         GpuApiCallBehavior  transferKind        = (inputrec->eI == eiMD && !doRerun && !useModularSimulator) ? GpuApiCallBehavior::Async : GpuApiCallBehavior::Sync;
1517
1518         // We initialize GPU state even for the CPU runs so we will have a more verbose
1519         // error if someone will try accessing it from the CPU codepath
1520         gmx::StatePropagatorDataGpu stateGpu(commandStream,
1521                                              gpuContext,
1522                                              transferKind,
1523                                              paddingSize);
1524         fr->stateGpu = &stateGpu;
1525
1526         // TODO This is not the right place to manage the lifetime of
1527         // this data structure, but currently it's the easiest way to
1528         // make it work.
1529         MdrunScheduleWorkload runScheduleWork;
1530
1531         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
1532         SimulatorBuilder simulatorBuilder;
1533
1534         // build and run simulator object based on user-input
1535         auto simulator = simulatorBuilder.build(
1536                     inputIsCompatibleWithModularSimulator,
1537                     fplog, cr, ms, mdlog, static_cast<int>(filenames.size()), filenames.data(),
1538                     oenv,
1539                     mdrunOptions,
1540                     startingBehavior,
1541                     vsite.get(), constr.get(),
1542                     enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
1543                     deform.get(),
1544                     mdModules_->outputProvider(),
1545                     mdModules_->notifier(),
1546                     inputrec, imdSession.get(), pull_work, swap, &mtop,
1547                     fcd,
1548                     globalState.get(),
1549                     &observablesHistory,
1550                     mdAtoms.get(), &nrnb, wcycle, fr,
1551                     &enerd,
1552                     &ekind,
1553                     &runScheduleWork,
1554                     replExParams,
1555                     membed,
1556                     walltime_accounting,
1557                     std::move(stopHandlerBuilder_),
1558                     doRerun,
1559                     useGpuForUpdate);
1560         simulator->run();
1561
1562         if (inputrec->bPull)
1563         {
1564             finish_pull(pull_work);
1565         }
1566         finish_swapcoords(swap);
1567     }
1568     else
1569     {
1570         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
1571         /* do PME only */
1572         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
1573         gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
1574     }
1575
1576     wallcycle_stop(wcycle, ewcRUN);
1577
1578     /* Finish up, write some stuff
1579      * if rerunMD, don't write last frame again
1580      */
1581     finish_run(fplog, mdlog, cr,
1582                inputrec, &nrnb, wcycle, walltime_accounting,
1583                fr ? fr->nbv.get() : nullptr,
1584                pmedata,
1585                EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
1586
1587     // clean up cycle counter
1588     wallcycle_destroy(wcycle);
1589
1590 // Free PME data
1591     if (pmedata)
1592     {
1593         gmx_pme_destroy(pmedata);
1594         pmedata = nullptr;
1595     }
1596
1597     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
1598     // before we destroy the GPU context(s) in free_gpu_resources().
1599     // Pinned buffers are associated with contexts in CUDA.
1600     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
1601     mdAtoms.reset(nullptr);
1602     globalState.reset(nullptr);
1603     mdModules_.reset(nullptr);   // destruct force providers here as they might also use the GPU
1604
1605     /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
1606     free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
1607     free_gpu(nonbondedDeviceInfo);
1608     free_gpu(pmeDeviceInfo);
1609     done_forcerec(fr, mtop.molblock.size());
1610     sfree(fcd);
1611
1612     if (doMembed)
1613     {
1614         free_membed(membed);
1615     }
1616
1617     /* Does what it says */
1618     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
1619     walltime_accounting_destroy(walltime_accounting);
1620
1621     // Ensure log file content is written
1622     if (logFileHandle)
1623     {
1624         gmx_fio_flush(logFileHandle);
1625     }
1626
1627     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
1628      * exceptions were enabled before function was called. */
1629     if (bEnableFPE)
1630     {
1631         gmx_fedisableexcept();
1632     }
1633
1634     auto rc = static_cast<int>(gmx_get_stop_condition());
1635
1636 #if GMX_THREAD_MPI
1637     /* we need to join all threads. The sub-threads join when they
1638        exit this function, but the master thread needs to be told to
1639        wait for that. */
1640     if (PAR(cr) && MASTER(cr))
1641     {
1642         tMPI_Finalize();
1643     }
1644 #endif
1645     return rc;
1646 }
1647
1648 Mdrunner::~Mdrunner()
1649 {
1650     // Clean up of the Manager.
1651     // This will end up getting called on every thread-MPI rank, which is unnecessary,
1652     // but okay as long as threads synchronize some time before adding or accessing
1653     // a new set of restraints.
1654     if (restraintManager_)
1655     {
1656         restraintManager_->clear();
1657         GMX_ASSERT(restraintManager_->countRestraints() == 0,
1658                    "restraints added during runner life time should be cleared at runner destruction.");
1659     }
1660 };
1661
1662 void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller,
1663                             const std::string                        &name)
1664 {
1665     GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
1666     // Not sure if this should be logged through the md logger or something else,
1667     // but it is helpful to have some sort of INFO level message sent somewhere.
1668     //    std::cout << "Registering restraint named " << name << std::endl;
1669
1670     // When multiple restraints are used, it may be wasteful to register them separately.
1671     // Maybe instead register an entire Restraint Manager as a force provider.
1672     restraintManager_->addToSpec(std::move(puller),
1673                                  name);
1674 }
1675
1676 Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules)
1677     : mdModules_(std::move(mdModules))
1678 {
1679 }
1680
1681 Mdrunner::Mdrunner(Mdrunner &&) noexcept = default;
1682
1683 //NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
1684 Mdrunner &Mdrunner::operator=(Mdrunner && /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
1685
1686 class Mdrunner::BuilderImplementation
1687 {
1688     public:
1689         BuilderImplementation() = delete;
1690         BuilderImplementation(std::unique_ptr<MDModules>           mdModules,
1691                               compat::not_null<SimulationContext*> context);
1692         ~BuilderImplementation();
1693
1694         BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options,
1695                                                     real                forceWarningThreshold,
1696                                                     StartingBehavior    startingBehavior);
1697
1698         void addDomdec(const DomdecOptions &options);
1699
1700         void addVerletList(int nstlist);
1701
1702         void addReplicaExchange(const ReplicaExchangeParameters &params);
1703
1704         void addNonBonded(const char* nbpu_opt);
1705
1706         void addPME(const char* pme_opt_, const char* pme_fft_opt_);
1707
1708         void addBondedTaskAssignment(const char* bonded_opt);
1709
1710         void addUpdateTaskAssignment(const char* update_opt);
1711
1712         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
1713
1714         void addFilenames(ArrayRef <const t_filenm> filenames);
1715
1716         void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
1717
1718         void addLogFile(t_fileio *logFileHandle);
1719
1720         void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
1721
1722         Mdrunner build();
1723
1724     private:
1725
1726         // Default parameters copied from runner.h
1727         // \todo Clarify source(s) of default parameters.
1728
1729         const char* nbpu_opt_          = nullptr;
1730         const char* pme_opt_           = nullptr;
1731         const char* pme_fft_opt_       = nullptr;
1732         const char *bonded_opt_        = nullptr;
1733         const char *update_opt_        = nullptr;
1734
1735         MdrunOptions                          mdrunOptions_;
1736
1737         DomdecOptions                         domdecOptions_;
1738
1739         ReplicaExchangeParameters             replicaExchangeParameters_;
1740
1741         //! Command-line override for the duration of a neighbor list with the Verlet scheme.
1742         int         nstlist_ = 0;
1743
1744         //! Multisim communicator handle.
1745         gmx_multisim_t *multiSimulation_;
1746
1747         //! mdrun communicator
1748         MPI_Comm communicator_ = MPI_COMM_NULL;
1749
1750         //! Print a warning if any force is larger than this (in kJ/mol nm).
1751         real forceWarningThreshold_ = -1;
1752
1753         //! Whether the simulation will start afresh, or restart with/without appending.
1754         StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
1755
1756         //! The modules that comprise the functionality of mdrun.
1757         std::unique_ptr<MDModules> mdModules_;
1758
1759         //! \brief Parallelism information.
1760         gmx_hw_opt_t hardwareOptions_;
1761
1762         //! filename options for simulation.
1763         ArrayRef<const t_filenm> filenames_;
1764
1765         /*! \brief Handle to output environment.
1766          *
1767          * \todo gmx_output_env_t needs lifetime management.
1768          */
1769         gmx_output_env_t*    outputEnvironment_ = nullptr;
1770
1771         /*! \brief Non-owning handle to MD log file.
1772          *
1773          * \todo Context should own output facilities for client.
1774          * \todo Improve log file handle management.
1775          * \internal
1776          * Code managing the FILE* relies on the ability to set it to
1777          * nullptr to check whether the filehandle is valid.
1778          */
1779         t_fileio* logFileHandle_ = nullptr;
1780
1781         /*!
1782          * \brief Builder for simulation stop signal handler.
1783          */
1784         std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
1785 };
1786
1787 Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules>           mdModules,
1788                                                        compat::not_null<SimulationContext*> context) :
1789     mdModules_(std::move(mdModules))
1790 {
1791     communicator_    = context->communicator_;
1792     multiSimulation_ = context->multiSimulation_.get();
1793 }
1794
1795 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
1796
1797 Mdrunner::BuilderImplementation &
1798 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions    &options,
1799                                                       const real             forceWarningThreshold,
1800                                                       const StartingBehavior startingBehavior)
1801 {
1802     mdrunOptions_          = options;
1803     forceWarningThreshold_ = forceWarningThreshold;
1804     startingBehavior_      = startingBehavior;
1805     return *this;
1806 }
1807
1808 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions &options)
1809 {
1810     domdecOptions_ = options;
1811 }
1812
1813 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
1814 {
1815     nstlist_ = nstlist;
1816 }
1817
1818 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters &params)
1819 {
1820     replicaExchangeParameters_ = params;
1821 }
1822
1823 Mdrunner Mdrunner::BuilderImplementation::build()
1824 {
1825     auto newRunner = Mdrunner(std::move(mdModules_));
1826
1827     newRunner.mdrunOptions          = mdrunOptions_;
1828     newRunner.pforce                = forceWarningThreshold_;
1829     newRunner.startingBehavior      = startingBehavior_;
1830     newRunner.domdecOptions         = domdecOptions_;
1831
1832     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
1833     newRunner.hw_opt          = hardwareOptions_;
1834
1835     // No invariant to check. This parameter exists to optionally override other behavior.
1836     newRunner.nstlist_cmdline = nstlist_;
1837
1838     newRunner.replExParams    = replicaExchangeParameters_;
1839
1840     newRunner.filenames = filenames_;
1841
1842     newRunner.communicator = communicator_;
1843
1844     // nullptr is a valid value for the multisim handle
1845     newRunner.ms = multiSimulation_;
1846
1847     // \todo Clarify ownership and lifetime management for gmx_output_env_t
1848     // \todo Update sanity checking when output environment has clearly specified invariants.
1849     // Initialization and default values for oenv are not well specified in the current version.
1850     if (outputEnvironment_)
1851     {
1852         newRunner.oenv  = outputEnvironment_;
1853     }
1854     else
1855     {
1856         GMX_THROW(gmx::APIError("MdrunnerBuilder::addOutputEnvironment() is required before build()"));
1857     }
1858
1859     newRunner.logFileHandle = logFileHandle_;
1860
1861     if (nbpu_opt_)
1862     {
1863         newRunner.nbpu_opt    = nbpu_opt_;
1864     }
1865     else
1866     {
1867         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
1868     }
1869
1870     if (pme_opt_ && pme_fft_opt_)
1871     {
1872         newRunner.pme_opt     = pme_opt_;
1873         newRunner.pme_fft_opt = pme_fft_opt_;
1874     }
1875     else
1876     {
1877         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
1878     }
1879
1880     if (bonded_opt_)
1881     {
1882         newRunner.bonded_opt = bonded_opt_;
1883     }
1884     else
1885     {
1886         GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
1887     }
1888
1889     if (update_opt_)
1890     {
1891         newRunner.update_opt = update_opt_;
1892     }
1893     else
1894     {
1895         GMX_THROW(gmx::APIError("MdrunnerBuilder::addUpdateTaskAssignment() is required before build()  "));
1896     }
1897
1898
1899     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
1900
1901     if (stopHandlerBuilder_)
1902     {
1903         newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
1904     }
1905     else
1906     {
1907         newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
1908     }
1909
1910     return newRunner;
1911 }
1912
1913 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
1914 {
1915     nbpu_opt_ = nbpu_opt;
1916 }
1917
1918 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
1919                                              const char* pme_fft_opt)
1920 {
1921     pme_opt_     = pme_opt;
1922     pme_fft_opt_ = pme_fft_opt;
1923 }
1924
1925 void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
1926 {
1927     bonded_opt_ = bonded_opt;
1928 }
1929
1930 void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt)
1931 {
1932     update_opt_ = update_opt;
1933 }
1934
1935 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
1936 {
1937     hardwareOptions_ = hardwareOptions;
1938 }
1939
1940 void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
1941 {
1942     filenames_ = filenames;
1943 }
1944
1945 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
1946 {
1947     outputEnvironment_ = outputEnvironment;
1948 }
1949
1950 void Mdrunner::BuilderImplementation::addLogFile(t_fileio *logFileHandle)
1951 {
1952     logFileHandle_ = logFileHandle;
1953 }
1954
1955 void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
1956 {
1957     stopHandlerBuilder_ = std::move(builder);
1958 }
1959
1960 MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
1961                                  compat::not_null<SimulationContext*> context) :
1962     impl_ {std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules), context)}
1963 {
1964 }
1965
1966 MdrunnerBuilder::~MdrunnerBuilder() = default;
1967
1968 MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions    &options,
1969                                                       real                   forceWarningThreshold,
1970                                                       const StartingBehavior startingBehavior)
1971 {
1972     impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
1973     return *this;
1974 }
1975
1976 MdrunnerBuilder &MdrunnerBuilder::addDomainDecomposition(const DomdecOptions &options)
1977 {
1978     impl_->addDomdec(options);
1979     return *this;
1980 }
1981
1982 MdrunnerBuilder &MdrunnerBuilder::addNeighborList(int nstlist)
1983 {
1984     impl_->addVerletList(nstlist);
1985     return *this;
1986 }
1987
1988 MdrunnerBuilder &MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters &params)
1989 {
1990     impl_->addReplicaExchange(params);
1991     return *this;
1992 }
1993
1994 MdrunnerBuilder &MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
1995 {
1996     impl_->addNonBonded(nbpu_opt);
1997     return *this;
1998 }
1999
2000 MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
2001                                                     const char* pme_fft_opt)
2002 {
2003     // The builder method may become more general in the future, but in this version,
2004     // parameters for PME electrostatics are both required and the only parameters
2005     // available.
2006     if (pme_opt && pme_fft_opt)
2007     {
2008         impl_->addPME(pme_opt, pme_fft_opt);
2009     }
2010     else
2011     {
2012         GMX_THROW(gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
2013     }
2014     return *this;
2015 }
2016
2017 MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
2018 {
2019     impl_->addBondedTaskAssignment(bonded_opt);
2020     return *this;
2021 }
2022
2023 MdrunnerBuilder &MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt)
2024 {
2025     impl_->addUpdateTaskAssignment(update_opt);
2026     return *this;
2027 }
2028
2029 Mdrunner MdrunnerBuilder::build()
2030 {
2031     return impl_->build();
2032 }
2033
2034 MdrunnerBuilder &MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
2035 {
2036     impl_->addHardwareOptions(hardwareOptions);
2037     return *this;
2038 }
2039
2040 MdrunnerBuilder &MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
2041 {
2042     impl_->addFilenames(filenames);
2043     return *this;
2044 }
2045
2046 MdrunnerBuilder &MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2047 {
2048     impl_->addOutputEnvironment(outputEnvironment);
2049     return *this;
2050 }
2051
2052 MdrunnerBuilder &MdrunnerBuilder::addLogFile(t_fileio *logFileHandle)
2053 {
2054     impl_->addLogFile(logFileHandle);
2055     return *this;
2056 }
2057
2058 MdrunnerBuilder &MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2059 {
2060     impl_->addStopHandlerBuilder(std::move(builder));
2061     return *this;
2062 }
2063
2064 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder &&) noexcept = default;
2065
2066 MdrunnerBuilder &MdrunnerBuilder::operator=(MdrunnerBuilder &&) noexcept = default;
2067
2068 } // namespace gmx