Use workload data structures for GPU halo exchange triggers
[gromacs.git] / src / gromacs / taskassignment / decidegpuusage.cpp
blobbe1657af8eaa54714c9b90240c3f6d3d3c241d90
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
5 * Copyright (c) 2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
37 * \brief Defines functionality for deciding whether tasks will run on GPUs.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
43 #include "gmxpre.h"
45 #include "decidegpuusage.h"
47 #include "config.h"
49 #include <cstdlib>
50 #include <cstring>
52 #include <algorithm>
53 #include <string>
55 #include "gromacs/ewald/pme.h"
56 #include "gromacs/hardware/cpuinfo.h"
57 #include "gromacs/hardware/detecthardware.h"
58 #include "gromacs/hardware/hardwaretopology.h"
59 #include "gromacs/hardware/hw_info.h"
60 #include "gromacs/mdlib/gmx_omp_nthreads.h"
61 #include "gromacs/mdlib/update_constrain_gpu.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/inputrec.h"
64 #include "gromacs/mdtypes/md_enums.h"
65 #include "gromacs/mdtypes/mdrunoptions.h"
66 #include "gromacs/pulling/pull.h"
67 #include "gromacs/taskassignment/taskassignment.h"
68 #include "gromacs/topology/mtop_util.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/utility/baseversion.h"
71 #include "gromacs/utility/exceptions.h"
72 #include "gromacs/utility/fatalerror.h"
73 #include "gromacs/utility/gmxassert.h"
74 #include "gromacs/utility/logger.h"
75 #include "gromacs/utility/stringutil.h"
78 namespace gmx
81 namespace
84 //! Helper variable to localise the text of an often repeated message.
85 const char* g_specifyEverythingFormatString =
86 "When you use mdrun -gputasks, %s must be set to non-default "
87 "values, so that the device IDs can be interpreted correctly."
88 #if GMX_GPU
89 " If you simply want to restrict which GPUs are used, then it is "
90 "better to use mdrun -gpu_id. Otherwise, setting the "
91 # if GMX_GPU_CUDA
92 "CUDA_VISIBLE_DEVICES"
93 # elif GMX_GPU_OPENCL
94 // Technically there is no portable way to do this offered by the
95 // OpenCL standard, but the only current relevant case for GROMACS
96 // is AMD OpenCL, which offers this variable.
97 "GPU_DEVICE_ORDINAL"
98 # elif GMX_GPU_SYCL
99 // SYCL-TODO:
100 "How to restrict visible devices in SYCL?"
101 # else
102 # error "Unreachable branch"
103 # endif
104 " environment variable in your bash profile or job "
105 "script may be more convenient."
106 #endif
109 } // namespace
111 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
112 const std::vector<int>& gpuIdsToUse,
113 const std::vector<int>& userGpuTaskAssignment,
114 const EmulateGpuNonbonded emulateGpuNonbonded,
115 const bool buildSupportsNonbondedOnGpu,
116 const bool nonbondedOnGpuIsUseful,
117 const int numRanksPerSimulation)
119 // First, exclude all cases where we can't run NB on GPUs.
120 if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
121 || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
123 // If the user required NB on GPUs, we issue an error later.
124 return false;
127 // We now know that NB on GPUs makes sense, if we have any.
129 if (!userGpuTaskAssignment.empty())
131 // Specifying -gputasks requires specifying everything.
132 if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
134 GMX_THROW(InconsistentInputError(
135 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
137 return true;
140 if (nonbondedTarget == TaskTarget::Gpu)
142 return true;
145 // Because this is thread-MPI, we already know about the GPUs that
146 // all potential ranks can use, and can use that in a global
147 // decision that will later be consistent.
148 auto haveGpus = !gpuIdsToUse.empty();
150 // If we get here, then the user permitted or required GPUs.
151 return haveGpus;
154 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
155 const TaskTarget pmeTarget,
156 const std::vector<int>& gpuIdsToUse,
157 const std::vector<int>& userGpuTaskAssignment,
158 const gmx_hw_info_t& hardwareInfo,
159 const t_inputrec& inputrec,
160 const int numRanksPerSimulation,
161 const int numPmeRanksPerSimulation)
163 // First, exclude all cases where we can't run PME on GPUs.
164 if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
165 || !pme_gpu_supports_hardware(hardwareInfo, nullptr) || !pme_gpu_supports_input(inputrec, nullptr))
167 // PME can't run on a GPU. If the user required that, we issue
168 // an error later.
169 return false;
172 // We now know that PME on GPUs might make sense, if we have any.
174 if (!userGpuTaskAssignment.empty())
176 // Follow the user's choice of GPU task assignment, if we
177 // can. Checking that their IDs are for compatible GPUs comes
178 // later.
180 // Specifying -gputasks requires specifying everything.
181 if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
183 GMX_THROW(InconsistentInputError(
184 formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
187 // PME on GPUs is only supported in a single case
188 if (pmeTarget == TaskTarget::Gpu)
190 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
191 || (numPmeRanksPerSimulation > 1))
193 GMX_THROW(InconsistentInputError(
194 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
195 "file and use a single PME rank."));
197 return true;
200 // pmeTarget == TaskTarget::Auto
201 return numRanksPerSimulation == 1;
204 // Because this is thread-MPI, we already know about the GPUs that
205 // all potential ranks can use, and can use that in a global
206 // decision that will later be consistent.
208 if (pmeTarget == TaskTarget::Gpu)
210 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
211 || (numPmeRanksPerSimulation > 1))
213 GMX_THROW(NotImplementedError(
214 "PME tasks were required to run on GPUs, but that is not implemented with "
215 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
216 "or permit PME tasks to be assigned to the CPU."));
218 return true;
221 if (numRanksPerSimulation == 1)
223 // PME can run well on a GPU shared with NB, and we permit
224 // mdrun to default to try that.
225 return !gpuIdsToUse.empty();
228 if (numRanksPerSimulation < 1)
230 // Full automated mode for thread-MPI (the default). PME can
231 // run well on a GPU shared with NB, and we permit mdrun to
232 // default to it if there is only one GPU available.
233 return (gpuIdsToUse.size() == 1);
236 // Not enough support for PME on GPUs for anything else
237 return false;
240 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
241 const std::vector<int>& userGpuTaskAssignment,
242 const EmulateGpuNonbonded emulateGpuNonbonded,
243 const bool buildSupportsNonbondedOnGpu,
244 const bool nonbondedOnGpuIsUseful,
245 const bool gpusWereDetected)
247 if (nonbondedTarget == TaskTarget::Cpu)
249 if (!userGpuTaskAssignment.empty())
251 GMX_THROW(InconsistentInputError(
252 "A GPU task assignment was specified, but nonbonded interactions were "
253 "assigned to the CPU. Make no more than one of these choices."));
256 return false;
259 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
261 GMX_THROW(InconsistentInputError(
262 "Nonbonded interactions on the GPU were requested with -nb gpu, "
263 "but the GROMACS binary has been built without GPU support. "
264 "Either run without selecting GPU options, or recompile GROMACS "
265 "with GPU support enabled"));
268 // TODO refactor all these TaskTarget::Gpu checks into one place?
269 // e.g. use a subfunction that handles only the cases where
270 // TaskTargets are not Cpu?
271 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
273 if (nonbondedTarget == TaskTarget::Gpu)
275 GMX_THROW(InconsistentInputError(
276 "Nonbonded interactions on the GPU were required, which is inconsistent "
277 "with choosing emulation. Make no more than one of these choices."));
279 if (!userGpuTaskAssignment.empty())
281 GMX_THROW(
282 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
283 "no more than one of these choices."));
286 return false;
289 if (!nonbondedOnGpuIsUseful)
291 if (nonbondedTarget == TaskTarget::Gpu)
293 GMX_THROW(InconsistentInputError(
294 "Nonbonded interactions on the GPU were required, but not supported for these "
295 "simulation settings. Change your settings, or do not require using GPUs."));
298 return false;
301 if (!userGpuTaskAssignment.empty())
303 // Specifying -gputasks requires specifying everything.
304 if (nonbondedTarget == TaskTarget::Auto)
306 GMX_THROW(InconsistentInputError(
307 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
310 return true;
313 if (nonbondedTarget == TaskTarget::Gpu)
315 // We still don't know whether it is an error if no GPUs are found
316 // because we don't know the duty of this rank, yet. For example,
317 // a node with only PME ranks and -pme cpu is OK if there are not
318 // GPUs.
319 return true;
322 // If we get here, then the user permitted GPUs, which we should
323 // use for nonbonded interactions.
324 return gpusWereDetected;
327 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
328 const TaskTarget pmeTarget,
329 const std::vector<int>& userGpuTaskAssignment,
330 const gmx_hw_info_t& hardwareInfo,
331 const t_inputrec& inputrec,
332 const int numRanksPerSimulation,
333 const int numPmeRanksPerSimulation,
334 const bool gpusWereDetected)
336 if (pmeTarget == TaskTarget::Cpu)
338 return false;
341 if (!useGpuForNonbonded)
343 if (pmeTarget == TaskTarget::Gpu)
345 GMX_THROW(NotImplementedError(
346 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
348 return false;
351 std::string message;
352 if (!pme_gpu_supports_build(&message))
354 if (pmeTarget == TaskTarget::Gpu)
356 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
358 return false;
360 if (!pme_gpu_supports_hardware(hardwareInfo, &message))
362 if (pmeTarget == TaskTarget::Gpu)
364 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
366 return false;
368 if (!pme_gpu_supports_input(inputrec, &message))
370 if (pmeTarget == TaskTarget::Gpu)
372 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
374 return false;
377 if (pmeTarget == TaskTarget::Cpu)
379 if (!userGpuTaskAssignment.empty())
381 GMX_THROW(InconsistentInputError(
382 "A GPU task assignment was specified, but PME interactions were "
383 "assigned to the CPU. Make no more than one of these choices."));
386 return false;
389 if (!userGpuTaskAssignment.empty())
391 // Specifying -gputasks requires specifying everything.
392 if (pmeTarget == TaskTarget::Auto)
394 GMX_THROW(InconsistentInputError(formatString(
395 g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
398 return true;
401 // We still don't know whether it is an error if no GPUs are found
402 // because we don't know the duty of this rank, yet. For example,
403 // a node with only PME ranks and -pme cpu is OK if there are not
404 // GPUs.
406 if (pmeTarget == TaskTarget::Gpu)
408 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
409 || (numPmeRanksPerSimulation > 1))
411 GMX_THROW(NotImplementedError(
412 "PME tasks were required to run on GPUs, but that is not implemented with "
413 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
414 "or permit PME tasks to be assigned to the CPU."));
416 return true;
419 // If we get here, then the user permitted GPUs.
420 if (numRanksPerSimulation == 1)
422 // PME can run well on a single GPU shared with NB when there
423 // is one rank, so we permit mdrun to try that if we have
424 // detected GPUs.
425 return gpusWereDetected;
428 // Not enough support for PME on GPUs for anything else
429 return false;
433 PmeRunMode determinePmeRunMode(const bool useGpuForPme, const TaskTarget& pmeFftTarget, const t_inputrec& inputrec)
435 if (!EEL_PME(inputrec.coulombtype))
437 return PmeRunMode::None;
440 if (useGpuForPme)
442 if (pmeFftTarget == TaskTarget::Cpu)
444 return PmeRunMode::Mixed;
446 else
448 return PmeRunMode::GPU;
451 else
453 if (pmeFftTarget == TaskTarget::Gpu)
455 gmx_fatal(FARGS,
456 "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
457 "on CPU you should not be using -pmefft.");
459 return PmeRunMode::CPU;
463 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded,
464 const bool useGpuForPme,
465 const TaskTarget bondedTarget,
466 const bool canUseGpuForBonded,
467 const bool usingLJPme,
468 const bool usingElecPmeOrEwald,
469 const int numPmeRanksPerSimulation,
470 const bool gpusWereDetected)
472 if (bondedTarget == TaskTarget::Cpu)
474 return false;
477 if (!canUseGpuForBonded)
479 if (bondedTarget == TaskTarget::Gpu)
481 GMX_THROW(InconsistentInputError(
482 "Bonded interactions on the GPU were required, but not supported for these "
483 "simulation settings. Change your settings, or do not require using GPUs."));
486 return false;
489 if (!useGpuForNonbonded)
491 if (bondedTarget == TaskTarget::Gpu)
493 GMX_THROW(InconsistentInputError(
494 "Bonded interactions on the GPU were required, but this requires that "
495 "short-ranged non-bonded interactions are also run on the GPU. Change "
496 "your settings, or do not require using GPUs."));
499 return false;
502 // TODO If the bonded kernels do not get fused, then performance
503 // overheads might suggest alternative choices here.
505 if (bondedTarget == TaskTarget::Gpu)
507 // We still don't know whether it is an error if no GPUs are
508 // found.
509 return true;
512 // If we get here, then the user permitted GPUs, which we should
513 // use for bonded interactions if any were detected and the CPU
514 // is busy, for which we currently only check PME or Ewald.
515 // (It would be better to dynamically assign bondeds based on timings)
516 // Note that here we assume that the auto setting of PME ranks will not
517 // choose seperate PME ranks when nonBonded are assigned to the GPU.
518 bool usingOurCpuForPmeOrEwald =
519 (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
521 return gpusWereDetected && usingOurCpuForPmeOrEwald;
524 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
525 const bool useUpdateGroups,
526 const PmeRunMode pmeRunMode,
527 const bool havePmeOnlyRank,
528 const bool useGpuForNonbonded,
529 const TaskTarget updateTarget,
530 const bool gpusWereDetected,
531 const t_inputrec& inputrec,
532 const gmx_mtop_t& mtop,
533 const bool useEssentialDynamics,
534 const bool doOrientationRestraints,
535 const bool useReplicaExchange,
536 const bool doRerun,
537 const DevelopmentFeatureFlags& devFlags,
538 const gmx::MDLogger& mdlog)
541 // '-update cpu' overrides the environment variable, '-update auto' does not
542 if (updateTarget == TaskTarget::Cpu
543 || (updateTarget == TaskTarget::Auto && !devFlags.forceGpuUpdateDefault))
545 return false;
548 const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0;
549 const bool pmeUsesCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed);
551 std::string errorMessage;
553 if (isDomainDecomposition)
555 if (!devFlags.enableGpuHaloExchange)
557 errorMessage += "Domain decomposition without GPU halo exchange is not supported.\n ";
559 else
561 if (hasAnyConstraints && !useUpdateGroups)
563 errorMessage +=
564 "Domain decomposition is only supported with constraints when update "
565 "groups "
566 "are used. This means constraining all bonds is not supported, except for "
567 "small molecules, and box sizes close to half the pair-list cutoff are not "
568 "supported.\n ";
571 if (pmeUsesCpu)
573 errorMessage += "With domain decomposition, PME must run fully on the GPU.\n";
578 if (havePmeOnlyRank)
580 if (pmeUsesCpu)
582 errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
585 if (!devFlags.enableGpuPmePPComm)
587 errorMessage += "With separate PME rank(s), PME must use direct communication.\n";
591 if (inputrec.useMts)
593 errorMessage += "Multiple time stepping is not supported.\n";
596 if (inputrec.eConstrAlg == econtSHAKE && hasAnyConstraints && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0)
598 errorMessage += "SHAKE constraints are not supported.\n";
600 // Using the GPU-version of update if:
601 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
602 // 2. Non-bonded interactions are on the GPU.
603 if ((pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::None) && !useGpuForNonbonded)
605 errorMessage +=
606 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
609 if (!gpusWereDetected)
611 errorMessage += "Compatible GPUs must have been found.\n";
613 if (!GMX_GPU_CUDA)
615 errorMessage += "Only a CUDA build is supported.\n";
617 if (inputrec.eI != eiMD)
619 errorMessage += "Only the md integrator is supported.\n";
621 if (inputrec.etc == etcNOSEHOOVER)
623 errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
625 if (!(inputrec.epc == epcNO || inputrec.epc == epcPARRINELLORAHMAN
626 || inputrec.epc == epcBERENDSEN || inputrec.epc == epcCRESCALE))
628 errorMessage +=
629 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are "
630 "supported.\n";
632 if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
634 // The graph is needed, but not supported
635 errorMessage += "Ewald surface correction is not supported.\n";
637 if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
639 errorMessage += "Virtual sites are not supported.\n";
641 if (useEssentialDynamics)
643 errorMessage += "Essential dynamics is not supported.\n";
645 if (inputrec.bPull && pull_have_constraint(inputrec.pull))
647 errorMessage += "Constraints pulling is not supported.\n";
649 if (doOrientationRestraints)
651 // The graph is needed, but not supported
652 errorMessage += "Orientation restraints are not supported.\n";
654 if (inputrec.efep != efepNO
655 && (haveFreeEnergyType(inputrec, efptBONDED) || haveFreeEnergyType(inputrec, efptMASS)))
657 errorMessage += "Free energy perturbation for mass and constraints are not supported.\n";
659 const auto particleTypes = gmx_mtop_particletype_count(mtop);
660 if (particleTypes[eptShell] > 0)
662 errorMessage += "Shells are not supported.\n";
664 if (useReplicaExchange)
666 errorMessage += "Replica exchange simulations are not supported.\n";
668 if (inputrec.eSwapCoords != eswapNO)
670 errorMessage += "Swapping the coordinates is not supported.\n";
672 if (doRerun)
674 errorMessage += "Re-run is not supported.\n";
677 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
678 // does not support it, the actual CUDA LINCS code does support it
679 if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
681 errorMessage += "Non-connecting constraints are not supported\n";
683 if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop))
685 errorMessage +=
686 "The number of coupled constraints is higher than supported in the GPU LINCS "
687 "code.\n";
690 if (!errorMessage.empty())
692 if (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault)
694 GMX_LOG(mdlog.warning)
695 .asParagraph()
696 .appendText(
697 "Update task on the GPU was required, by the "
698 "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
699 "condition(s) were not satisfied:");
700 GMX_LOG(mdlog.warning).asParagraph().appendText(errorMessage.c_str());
701 GMX_LOG(mdlog.warning).asParagraph().appendText("Will use CPU version of update.");
703 else if (updateTarget == TaskTarget::Gpu)
705 std::string prefix = gmx::formatString(
706 "Update task on the GPU was required,\n"
707 "but the following condition(s) were not satisfied:\n");
708 GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
710 return false;
713 return (updateTarget == TaskTarget::Gpu
714 || (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault));
717 bool decideWhetherToUseGpuForHalo(const DevelopmentFeatureFlags& devFlags,
718 bool havePPDomainDecomposition,
719 bool useGpuForNonbonded,
720 bool useModularSimulator,
721 bool doRerun,
722 bool haveEnergyMinimization)
724 return havePPDomainDecomposition && devFlags.enableGpuHaloExchange && useGpuForNonbonded
725 && !useModularSimulator && !doRerun && !haveEnergyMinimization;
728 } // namespace gmx