2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
5 * Copyright (c) 2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Defines functionality for deciding whether tasks will run on GPUs.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
45 #include "decidegpuusage.h"
55 #include "gromacs/ewald/pme.h"
56 #include "gromacs/hardware/cpuinfo.h"
57 #include "gromacs/hardware/detecthardware.h"
58 #include "gromacs/hardware/hardwaretopology.h"
59 #include "gromacs/hardware/hw_info.h"
60 #include "gromacs/mdlib/gmx_omp_nthreads.h"
61 #include "gromacs/mdlib/update_constrain_gpu.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/inputrec.h"
64 #include "gromacs/mdtypes/md_enums.h"
65 #include "gromacs/mdtypes/mdrunoptions.h"
66 #include "gromacs/pulling/pull.h"
67 #include "gromacs/taskassignment/taskassignment.h"
68 #include "gromacs/topology/mtop_util.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/utility/baseversion.h"
71 #include "gromacs/utility/exceptions.h"
72 #include "gromacs/utility/fatalerror.h"
73 #include "gromacs/utility/gmxassert.h"
74 #include "gromacs/utility/logger.h"
75 #include "gromacs/utility/stringutil.h"
84 //! Helper variable to localise the text of an often repeated message.
85 const char* g_specifyEverythingFormatString
=
86 "When you use mdrun -gputasks, %s must be set to non-default "
87 "values, so that the device IDs can be interpreted correctly."
89 " If you simply want to restrict which GPUs are used, then it is "
90 "better to use mdrun -gpu_id. Otherwise, setting the "
92 "CUDA_VISIBLE_DEVICES"
94 // Technically there is no portable way to do this offered by the
95 // OpenCL standard, but the only current relevant case for GROMACS
96 // is AMD OpenCL, which offers this variable.
100 "How to restrict visible devices in SYCL?"
102 # error "Unreachable branch"
104 " environment variable in your bash profile or job "
105 "script may be more convenient."
111 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget
,
112 const int numDevicesToUse
,
113 const std::vector
<int>& userGpuTaskAssignment
,
114 const EmulateGpuNonbonded emulateGpuNonbonded
,
115 const bool buildSupportsNonbondedOnGpu
,
116 const bool nonbondedOnGpuIsUseful
,
117 const int numRanksPerSimulation
)
119 // First, exclude all cases where we can't run NB on GPUs.
120 if (nonbondedTarget
== TaskTarget::Cpu
|| emulateGpuNonbonded
== EmulateGpuNonbonded::Yes
121 || !nonbondedOnGpuIsUseful
|| !buildSupportsNonbondedOnGpu
)
123 // If the user required NB on GPUs, we issue an error later.
127 // We now know that NB on GPUs makes sense, if we have any.
129 if (!userGpuTaskAssignment
.empty())
131 // Specifying -gputasks requires specifying everything.
132 if (nonbondedTarget
== TaskTarget::Auto
|| numRanksPerSimulation
< 1)
134 GMX_THROW(InconsistentInputError(
135 formatString(g_specifyEverythingFormatString
, "-nb and -ntmpi")));
140 if (nonbondedTarget
== TaskTarget::Gpu
)
145 // Because this is thread-MPI, we already know about the GPUs that
146 // all potential ranks can use, and can use that in a global
147 // decision that will later be consistent.
148 // If we get here, then the user permitted or required GPUs.
149 return (numDevicesToUse
> 0);
152 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded
,
153 const TaskTarget pmeTarget
,
154 const int numDevicesToUse
,
155 const std::vector
<int>& userGpuTaskAssignment
,
156 const gmx_hw_info_t
& hardwareInfo
,
157 const t_inputrec
& inputrec
,
158 const int numRanksPerSimulation
,
159 const int numPmeRanksPerSimulation
)
161 // First, exclude all cases where we can't run PME on GPUs.
162 if ((pmeTarget
== TaskTarget::Cpu
) || !useGpuForNonbonded
|| !pme_gpu_supports_build(nullptr)
163 || !pme_gpu_supports_hardware(hardwareInfo
, nullptr) || !pme_gpu_supports_input(inputrec
, nullptr))
165 // PME can't run on a GPU. If the user required that, we issue
170 // We now know that PME on GPUs might make sense, if we have any.
172 if (!userGpuTaskAssignment
.empty())
174 // Follow the user's choice of GPU task assignment, if we
175 // can. Checking that their IDs are for compatible GPUs comes
178 // Specifying -gputasks requires specifying everything.
179 if (pmeTarget
== TaskTarget::Auto
|| numRanksPerSimulation
< 1)
181 GMX_THROW(InconsistentInputError(
182 formatString(g_specifyEverythingFormatString
, "all of -nb, -pme, and -ntmpi")));
185 // PME on GPUs is only supported in a single case
186 if (pmeTarget
== TaskTarget::Gpu
)
188 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
189 || (numPmeRanksPerSimulation
> 1))
191 GMX_THROW(InconsistentInputError(
192 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
193 "file and use a single PME rank."));
198 // pmeTarget == TaskTarget::Auto
199 return numRanksPerSimulation
== 1;
202 // Because this is thread-MPI, we already know about the GPUs that
203 // all potential ranks can use, and can use that in a global
204 // decision that will later be consistent.
206 if (pmeTarget
== TaskTarget::Gpu
)
208 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
209 || (numPmeRanksPerSimulation
> 1))
211 GMX_THROW(NotImplementedError(
212 "PME tasks were required to run on GPUs, but that is not implemented with "
213 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
214 "or permit PME tasks to be assigned to the CPU."));
219 if (numRanksPerSimulation
== 1)
221 // PME can run well on a GPU shared with NB, and we permit
222 // mdrun to default to try that.
223 return numDevicesToUse
> 0;
226 if (numRanksPerSimulation
< 1)
228 // Full automated mode for thread-MPI (the default). PME can
229 // run well on a GPU shared with NB, and we permit mdrun to
230 // default to it if there is only one GPU available.
231 return (numDevicesToUse
== 1);
234 // Not enough support for PME on GPUs for anything else
238 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget
,
239 const std::vector
<int>& userGpuTaskAssignment
,
240 const EmulateGpuNonbonded emulateGpuNonbonded
,
241 const bool buildSupportsNonbondedOnGpu
,
242 const bool nonbondedOnGpuIsUseful
,
243 const bool gpusWereDetected
)
245 if (nonbondedTarget
== TaskTarget::Cpu
)
247 if (!userGpuTaskAssignment
.empty())
249 GMX_THROW(InconsistentInputError(
250 "A GPU task assignment was specified, but nonbonded interactions were "
251 "assigned to the CPU. Make no more than one of these choices."));
257 if (!buildSupportsNonbondedOnGpu
&& nonbondedTarget
== TaskTarget::Gpu
)
259 GMX_THROW(InconsistentInputError(
260 "Nonbonded interactions on the GPU were requested with -nb gpu, "
261 "but the GROMACS binary has been built without GPU support. "
262 "Either run without selecting GPU options, or recompile GROMACS "
263 "with GPU support enabled"));
266 // TODO refactor all these TaskTarget::Gpu checks into one place?
267 // e.g. use a subfunction that handles only the cases where
268 // TaskTargets are not Cpu?
269 if (emulateGpuNonbonded
== EmulateGpuNonbonded::Yes
)
271 if (nonbondedTarget
== TaskTarget::Gpu
)
273 GMX_THROW(InconsistentInputError(
274 "Nonbonded interactions on the GPU were required, which is inconsistent "
275 "with choosing emulation. Make no more than one of these choices."));
277 if (!userGpuTaskAssignment
.empty())
280 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
281 "no more than one of these choices."));
287 if (!nonbondedOnGpuIsUseful
)
289 if (nonbondedTarget
== TaskTarget::Gpu
)
291 GMX_THROW(InconsistentInputError(
292 "Nonbonded interactions on the GPU were required, but not supported for these "
293 "simulation settings. Change your settings, or do not require using GPUs."));
299 if (!userGpuTaskAssignment
.empty())
301 // Specifying -gputasks requires specifying everything.
302 if (nonbondedTarget
== TaskTarget::Auto
)
304 GMX_THROW(InconsistentInputError(
305 formatString(g_specifyEverythingFormatString
, "-nb and -ntmpi")));
311 if (nonbondedTarget
== TaskTarget::Gpu
)
313 // We still don't know whether it is an error if no GPUs are found
314 // because we don't know the duty of this rank, yet. For example,
315 // a node with only PME ranks and -pme cpu is OK if there are not
320 // If we get here, then the user permitted GPUs, which we should
321 // use for nonbonded interactions.
322 return buildSupportsNonbondedOnGpu
&& gpusWereDetected
;
325 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded
,
326 const TaskTarget pmeTarget
,
327 const std::vector
<int>& userGpuTaskAssignment
,
328 const gmx_hw_info_t
& hardwareInfo
,
329 const t_inputrec
& inputrec
,
330 const int numRanksPerSimulation
,
331 const int numPmeRanksPerSimulation
,
332 const bool gpusWereDetected
)
334 if (pmeTarget
== TaskTarget::Cpu
)
339 if (!useGpuForNonbonded
)
341 if (pmeTarget
== TaskTarget::Gpu
)
343 GMX_THROW(NotImplementedError(
344 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
350 if (!pme_gpu_supports_build(&message
))
352 if (pmeTarget
== TaskTarget::Gpu
)
354 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
358 if (!pme_gpu_supports_hardware(hardwareInfo
, &message
))
360 if (pmeTarget
== TaskTarget::Gpu
)
362 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
366 if (!pme_gpu_supports_input(inputrec
, &message
))
368 if (pmeTarget
== TaskTarget::Gpu
)
370 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
375 if (pmeTarget
== TaskTarget::Cpu
)
377 if (!userGpuTaskAssignment
.empty())
379 GMX_THROW(InconsistentInputError(
380 "A GPU task assignment was specified, but PME interactions were "
381 "assigned to the CPU. Make no more than one of these choices."));
387 if (!userGpuTaskAssignment
.empty())
389 // Specifying -gputasks requires specifying everything.
390 if (pmeTarget
== TaskTarget::Auto
)
392 GMX_THROW(InconsistentInputError(formatString(
393 g_specifyEverythingFormatString
, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
399 // We still don't know whether it is an error if no GPUs are found
400 // because we don't know the duty of this rank, yet. For example,
401 // a node with only PME ranks and -pme cpu is OK if there are not
404 if (pmeTarget
== TaskTarget::Gpu
)
406 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
407 || (numPmeRanksPerSimulation
> 1))
409 GMX_THROW(NotImplementedError(
410 "PME tasks were required to run on GPUs, but that is not implemented with "
411 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
412 "or permit PME tasks to be assigned to the CPU."));
417 // If we get here, then the user permitted GPUs.
418 if (numRanksPerSimulation
== 1)
420 // PME can run well on a single GPU shared with NB when there
421 // is one rank, so we permit mdrun to try that if we have
423 return gpusWereDetected
;
426 // Not enough support for PME on GPUs for anything else
431 PmeRunMode
determinePmeRunMode(const bool useGpuForPme
, const TaskTarget
& pmeFftTarget
, const t_inputrec
& inputrec
)
433 if (!EEL_PME(inputrec
.coulombtype
))
435 return PmeRunMode::None
;
440 if (pmeFftTarget
== TaskTarget::Cpu
)
442 return PmeRunMode::Mixed
;
446 return PmeRunMode::GPU
;
451 if (pmeFftTarget
== TaskTarget::Gpu
)
454 "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
455 "on CPU you should not be using -pmefft.");
457 return PmeRunMode::CPU
;
461 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded
,
462 const bool useGpuForPme
,
463 const TaskTarget bondedTarget
,
464 const bool canUseGpuForBonded
,
465 const bool usingLJPme
,
466 const bool usingElecPmeOrEwald
,
467 const int numPmeRanksPerSimulation
,
468 const bool gpusWereDetected
)
470 if (bondedTarget
== TaskTarget::Cpu
)
475 if (!canUseGpuForBonded
)
477 if (bondedTarget
== TaskTarget::Gpu
)
479 GMX_THROW(InconsistentInputError(
480 "Bonded interactions on the GPU were required, but not supported for these "
481 "simulation settings. Change your settings, or do not require using GPUs."));
487 if (!useGpuForNonbonded
)
489 if (bondedTarget
== TaskTarget::Gpu
)
491 GMX_THROW(InconsistentInputError(
492 "Bonded interactions on the GPU were required, but this requires that "
493 "short-ranged non-bonded interactions are also run on the GPU. Change "
494 "your settings, or do not require using GPUs."));
500 // TODO If the bonded kernels do not get fused, then performance
501 // overheads might suggest alternative choices here.
503 if (bondedTarget
== TaskTarget::Gpu
)
505 // We still don't know whether it is an error if no GPUs are
510 // If we get here, then the user permitted GPUs, which we should
511 // use for bonded interactions if any were detected and the CPU
512 // is busy, for which we currently only check PME or Ewald.
513 // (It would be better to dynamically assign bondeds based on timings)
514 // Note that here we assume that the auto setting of PME ranks will not
515 // choose seperate PME ranks when nonBonded are assigned to the GPU.
516 bool usingOurCpuForPmeOrEwald
=
517 (usingLJPme
|| (usingElecPmeOrEwald
&& !useGpuForPme
&& numPmeRanksPerSimulation
<= 0));
519 return gpusWereDetected
&& usingOurCpuForPmeOrEwald
;
522 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition
,
523 const bool useUpdateGroups
,
524 const PmeRunMode pmeRunMode
,
525 const bool havePmeOnlyRank
,
526 const bool useGpuForNonbonded
,
527 const TaskTarget updateTarget
,
528 const bool gpusWereDetected
,
529 const t_inputrec
& inputrec
,
530 const gmx_mtop_t
& mtop
,
531 const bool useEssentialDynamics
,
532 const bool doOrientationRestraints
,
533 const bool useReplicaExchange
,
535 const DevelopmentFeatureFlags
& devFlags
,
536 const gmx::MDLogger
& mdlog
)
539 // '-update cpu' overrides the environment variable, '-update auto' does not
540 if (updateTarget
== TaskTarget::Cpu
541 || (updateTarget
== TaskTarget::Auto
&& !devFlags
.forceGpuUpdateDefault
))
546 const bool hasAnyConstraints
= gmx_mtop_interaction_count(mtop
, IF_CONSTRAINT
) > 0;
547 const bool pmeUsesCpu
= (pmeRunMode
== PmeRunMode::CPU
|| pmeRunMode
== PmeRunMode::Mixed
);
549 std::string errorMessage
;
551 if (isDomainDecomposition
)
553 if (!devFlags
.enableGpuHaloExchange
)
555 errorMessage
+= "Domain decomposition without GPU halo exchange is not supported.\n ";
559 if (hasAnyConstraints
&& !useUpdateGroups
)
562 "Domain decomposition is only supported with constraints when update "
564 "are used. This means constraining all bonds is not supported, except for "
565 "small molecules, and box sizes close to half the pair-list cutoff are not "
571 errorMessage
+= "With domain decomposition, PME must run fully on the GPU.\n";
580 errorMessage
+= "With separate PME rank(s), PME must run fully on the GPU.\n";
583 if (!devFlags
.enableGpuPmePPComm
)
585 errorMessage
+= "With separate PME rank(s), PME must use direct communication.\n";
591 errorMessage
+= "Multiple time stepping is not supported.\n";
594 if (inputrec
.eConstrAlg
== econtSHAKE
&& hasAnyConstraints
&& gmx_mtop_ftype_count(mtop
, F_CONSTR
) > 0)
596 errorMessage
+= "SHAKE constraints are not supported.\n";
598 // Using the GPU-version of update if:
599 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
600 // 2. Non-bonded interactions are on the GPU.
601 if ((pmeRunMode
== PmeRunMode::CPU
|| pmeRunMode
== PmeRunMode::None
) && !useGpuForNonbonded
)
604 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
607 if (!gpusWereDetected
)
609 errorMessage
+= "Compatible GPUs must have been found.\n";
613 errorMessage
+= "Only a CUDA build is supported.\n";
615 if (inputrec
.eI
!= eiMD
)
617 errorMessage
+= "Only the md integrator is supported.\n";
619 if (inputrec
.etc
== etcNOSEHOOVER
)
621 errorMessage
+= "Nose-Hoover temperature coupling is not supported.\n";
623 if (!(inputrec
.epc
== epcNO
|| inputrec
.epc
== epcPARRINELLORAHMAN
624 || inputrec
.epc
== epcBERENDSEN
|| inputrec
.epc
== epcCRESCALE
))
627 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are "
630 if (EEL_PME_EWALD(inputrec
.coulombtype
) && inputrec
.epsilon_surface
!= 0)
632 // The graph is needed, but not supported
633 errorMessage
+= "Ewald surface correction is not supported.\n";
635 if (gmx_mtop_interaction_count(mtop
, IF_VSITE
) > 0)
637 errorMessage
+= "Virtual sites are not supported.\n";
639 if (useEssentialDynamics
)
641 errorMessage
+= "Essential dynamics is not supported.\n";
643 if (inputrec
.bPull
&& pull_have_constraint(*inputrec
.pull
))
645 errorMessage
+= "Constraints pulling is not supported.\n";
647 if (doOrientationRestraints
)
649 // The graph is needed, but not supported
650 errorMessage
+= "Orientation restraints are not supported.\n";
652 if (inputrec
.efep
!= efepNO
653 && (haveFreeEnergyType(inputrec
, efptBONDED
) || haveFreeEnergyType(inputrec
, efptMASS
)))
655 errorMessage
+= "Free energy perturbation for mass and constraints are not supported.\n";
657 const auto particleTypes
= gmx_mtop_particletype_count(mtop
);
658 if (particleTypes
[eptShell
] > 0)
660 errorMessage
+= "Shells are not supported.\n";
662 if (useReplicaExchange
)
664 errorMessage
+= "Replica exchange simulations are not supported.\n";
666 if (inputrec
.eSwapCoords
!= eswapNO
)
668 errorMessage
+= "Swapping the coordinates is not supported.\n";
672 errorMessage
+= "Re-run is not supported.\n";
675 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
676 // does not support it, the actual CUDA LINCS code does support it
677 if (gmx_mtop_ftype_count(mtop
, F_CONSTRNC
) > 0)
679 errorMessage
+= "Non-connecting constraints are not supported\n";
681 if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop
))
684 "The number of coupled constraints is higher than supported in the GPU LINCS "
688 if (!errorMessage
.empty())
690 if (updateTarget
== TaskTarget::Auto
&& devFlags
.forceGpuUpdateDefault
)
692 GMX_LOG(mdlog
.warning
)
695 "Update task on the GPU was required, by the "
696 "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
697 "condition(s) were not satisfied:");
698 GMX_LOG(mdlog
.warning
).asParagraph().appendText(errorMessage
.c_str());
699 GMX_LOG(mdlog
.warning
).asParagraph().appendText("Will use CPU version of update.");
701 else if (updateTarget
== TaskTarget::Gpu
)
703 std::string prefix
= gmx::formatString(
704 "Update task on the GPU was required,\n"
705 "but the following condition(s) were not satisfied:\n");
706 GMX_THROW(InconsistentInputError((prefix
+ errorMessage
).c_str()));
711 return (updateTarget
== TaskTarget::Gpu
712 || (updateTarget
== TaskTarget::Auto
&& devFlags
.forceGpuUpdateDefault
));
715 bool decideWhetherToUseGpuForHalo(const DevelopmentFeatureFlags
& devFlags
,
716 bool havePPDomainDecomposition
,
717 bool useGpuForNonbonded
,
718 bool useModularSimulator
,
720 bool haveEnergyMinimization
)
722 return havePPDomainDecomposition
&& devFlags
.enableGpuHaloExchange
&& useGpuForNonbonded
723 && !useModularSimulator
&& !doRerun
&& !haveEnergyMinimization
;