2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
5 * Copyright (c) 2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Defines functionality for deciding whether tasks will run on GPUs.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
45 #include "decidegpuusage.h"
55 #include "gromacs/ewald/pme.h"
56 #include "gromacs/hardware/cpuinfo.h"
57 #include "gromacs/hardware/detecthardware.h"
58 #include "gromacs/hardware/hardwaretopology.h"
59 #include "gromacs/hardware/hw_info.h"
60 #include "gromacs/listed_forces/gpubonded.h"
61 #include "gromacs/mdlib/gmx_omp_nthreads.h"
62 #include "gromacs/mdlib/update_constrain_gpu.h"
63 #include "gromacs/mdtypes/commrec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/mdrunoptions.h"
67 #include "gromacs/pulling/pull.h"
68 #include "gromacs/taskassignment/taskassignment.h"
69 #include "gromacs/topology/mtop_util.h"
70 #include "gromacs/topology/topology.h"
71 #include "gromacs/utility/baseversion.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/fatalerror.h"
74 #include "gromacs/utility/gmxassert.h"
75 #include "gromacs/utility/logger.h"
76 #include "gromacs/utility/stringutil.h"
85 //! Helper variable to localise the text of an often repeated message.
86 const char* g_specifyEverythingFormatString
=
87 "When you use mdrun -gputasks, %s must be set to non-default "
88 "values, so that the device IDs can be interpreted correctly."
90 " If you simply want to restrict which GPUs are used, then it is "
91 "better to use mdrun -gpu_id. Otherwise, setting the "
93 "CUDA_VISIBLE_DEVICES"
95 // Technically there is no portable way to do this offered by the
96 // OpenCL standard, but the only current relevant case for GROMACS
97 // is AMD OpenCL, which offers this variable.
100 // As with OpenCL, there are no portable way to do it.
101 // Intel reference: https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md
102 // While SYCL_DEVICE_FILTER is a better option, as of 2021.1-beta10 it is not yet supported.
103 "SYCL_DEVICE_ALLOWLIST"
105 # error "Unreachable branch"
107 " environment variable in your bash profile or job "
108 "script may be more convenient."
114 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget
,
115 const int numDevicesToUse
,
116 const std::vector
<int>& userGpuTaskAssignment
,
117 const EmulateGpuNonbonded emulateGpuNonbonded
,
118 const bool buildSupportsNonbondedOnGpu
,
119 const bool nonbondedOnGpuIsUseful
,
120 const int numRanksPerSimulation
)
122 // First, exclude all cases where we can't run NB on GPUs.
123 if (nonbondedTarget
== TaskTarget::Cpu
|| emulateGpuNonbonded
== EmulateGpuNonbonded::Yes
124 || !nonbondedOnGpuIsUseful
|| !buildSupportsNonbondedOnGpu
)
126 // If the user required NB on GPUs, we issue an error later.
130 // We now know that NB on GPUs makes sense, if we have any.
132 if (!userGpuTaskAssignment
.empty())
134 // Specifying -gputasks requires specifying everything.
135 if (nonbondedTarget
== TaskTarget::Auto
|| numRanksPerSimulation
< 1)
137 GMX_THROW(InconsistentInputError(
138 formatString(g_specifyEverythingFormatString
, "-nb and -ntmpi")));
143 if (nonbondedTarget
== TaskTarget::Gpu
)
148 // Because this is thread-MPI, we already know about the GPUs that
149 // all potential ranks can use, and can use that in a global
150 // decision that will later be consistent.
151 // If we get here, then the user permitted or required GPUs.
152 return (numDevicesToUse
> 0);
155 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded
,
156 const TaskTarget pmeTarget
,
157 const int numDevicesToUse
,
158 const std::vector
<int>& userGpuTaskAssignment
,
159 const gmx_hw_info_t
& hardwareInfo
,
160 const t_inputrec
& inputrec
,
161 const int numRanksPerSimulation
,
162 const int numPmeRanksPerSimulation
)
164 // First, exclude all cases where we can't run PME on GPUs.
165 if ((pmeTarget
== TaskTarget::Cpu
) || !useGpuForNonbonded
|| !pme_gpu_supports_build(nullptr)
166 || !pme_gpu_supports_hardware(hardwareInfo
, nullptr) || !pme_gpu_supports_input(inputrec
, nullptr))
168 // PME can't run on a GPU. If the user required that, we issue
173 // We now know that PME on GPUs might make sense, if we have any.
175 if (!userGpuTaskAssignment
.empty())
177 // Follow the user's choice of GPU task assignment, if we
178 // can. Checking that their IDs are for compatible GPUs comes
181 // Specifying -gputasks requires specifying everything.
182 if (pmeTarget
== TaskTarget::Auto
|| numRanksPerSimulation
< 1)
184 GMX_THROW(InconsistentInputError(
185 formatString(g_specifyEverythingFormatString
, "all of -nb, -pme, and -ntmpi")));
188 // PME on GPUs is only supported in a single case
189 if (pmeTarget
== TaskTarget::Gpu
)
191 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
192 || (numPmeRanksPerSimulation
> 1))
194 GMX_THROW(InconsistentInputError(
195 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
196 "file and use a single PME rank."));
201 // pmeTarget == TaskTarget::Auto
202 return numRanksPerSimulation
== 1;
205 // Because this is thread-MPI, we already know about the GPUs that
206 // all potential ranks can use, and can use that in a global
207 // decision that will later be consistent.
209 if (pmeTarget
== TaskTarget::Gpu
)
211 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
212 || (numPmeRanksPerSimulation
> 1))
214 GMX_THROW(NotImplementedError(
215 "PME tasks were required to run on GPUs, but that is not implemented with "
216 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
217 "or permit PME tasks to be assigned to the CPU."));
222 if (numRanksPerSimulation
== 1)
224 // PME can run well on a GPU shared with NB, and we permit
225 // mdrun to default to try that.
226 return numDevicesToUse
> 0;
229 if (numRanksPerSimulation
< 1)
231 // Full automated mode for thread-MPI (the default). PME can
232 // run well on a GPU shared with NB, and we permit mdrun to
233 // default to it if there is only one GPU available.
234 return (numDevicesToUse
== 1);
237 // Not enough support for PME on GPUs for anything else
241 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget
,
242 const std::vector
<int>& userGpuTaskAssignment
,
243 const EmulateGpuNonbonded emulateGpuNonbonded
,
244 const bool buildSupportsNonbondedOnGpu
,
245 const bool nonbondedOnGpuIsUseful
,
246 const bool gpusWereDetected
)
248 if (nonbondedTarget
== TaskTarget::Cpu
)
250 if (!userGpuTaskAssignment
.empty())
252 GMX_THROW(InconsistentInputError(
253 "A GPU task assignment was specified, but nonbonded interactions were "
254 "assigned to the CPU. Make no more than one of these choices."));
260 if (!buildSupportsNonbondedOnGpu
&& nonbondedTarget
== TaskTarget::Gpu
)
262 GMX_THROW(InconsistentInputError(
263 "Nonbonded interactions on the GPU were requested with -nb gpu, "
264 "but the GROMACS binary has been built without GPU support. "
265 "Either run without selecting GPU options, or recompile GROMACS "
266 "with GPU support enabled"));
269 // TODO refactor all these TaskTarget::Gpu checks into one place?
270 // e.g. use a subfunction that handles only the cases where
271 // TaskTargets are not Cpu?
272 if (emulateGpuNonbonded
== EmulateGpuNonbonded::Yes
)
274 if (nonbondedTarget
== TaskTarget::Gpu
)
276 GMX_THROW(InconsistentInputError(
277 "Nonbonded interactions on the GPU were required, which is inconsistent "
278 "with choosing emulation. Make no more than one of these choices."));
280 if (!userGpuTaskAssignment
.empty())
283 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
284 "no more than one of these choices."));
290 if (!nonbondedOnGpuIsUseful
)
292 if (nonbondedTarget
== TaskTarget::Gpu
)
294 GMX_THROW(InconsistentInputError(
295 "Nonbonded interactions on the GPU were required, but not supported for these "
296 "simulation settings. Change your settings, or do not require using GPUs."));
302 if (!userGpuTaskAssignment
.empty())
304 // Specifying -gputasks requires specifying everything.
305 if (nonbondedTarget
== TaskTarget::Auto
)
307 GMX_THROW(InconsistentInputError(
308 formatString(g_specifyEverythingFormatString
, "-nb and -ntmpi")));
314 if (nonbondedTarget
== TaskTarget::Gpu
)
316 // We still don't know whether it is an error if no GPUs are found
317 // because we don't know the duty of this rank, yet. For example,
318 // a node with only PME ranks and -pme cpu is OK if there are not
323 // If we get here, then the user permitted GPUs, which we should
324 // use for nonbonded interactions.
325 return buildSupportsNonbondedOnGpu
&& gpusWereDetected
;
328 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded
,
329 const TaskTarget pmeTarget
,
330 const std::vector
<int>& userGpuTaskAssignment
,
331 const gmx_hw_info_t
& hardwareInfo
,
332 const t_inputrec
& inputrec
,
333 const int numRanksPerSimulation
,
334 const int numPmeRanksPerSimulation
,
335 const bool gpusWereDetected
)
337 if (pmeTarget
== TaskTarget::Cpu
)
342 if (!useGpuForNonbonded
)
344 if (pmeTarget
== TaskTarget::Gpu
)
346 GMX_THROW(NotImplementedError(
347 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
353 if (!pme_gpu_supports_build(&message
))
355 if (pmeTarget
== TaskTarget::Gpu
)
357 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
361 if (!pme_gpu_supports_hardware(hardwareInfo
, &message
))
363 if (pmeTarget
== TaskTarget::Gpu
)
365 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
369 if (!pme_gpu_supports_input(inputrec
, &message
))
371 if (pmeTarget
== TaskTarget::Gpu
)
373 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
378 if (pmeTarget
== TaskTarget::Cpu
)
380 if (!userGpuTaskAssignment
.empty())
382 GMX_THROW(InconsistentInputError(
383 "A GPU task assignment was specified, but PME interactions were "
384 "assigned to the CPU. Make no more than one of these choices."));
390 if (!userGpuTaskAssignment
.empty())
392 // Specifying -gputasks requires specifying everything.
393 if (pmeTarget
== TaskTarget::Auto
)
395 GMX_THROW(InconsistentInputError(formatString(
396 g_specifyEverythingFormatString
, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
402 // We still don't know whether it is an error if no GPUs are found
403 // because we don't know the duty of this rank, yet. For example,
404 // a node with only PME ranks and -pme cpu is OK if there are not
407 if (pmeTarget
== TaskTarget::Gpu
)
409 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
410 || (numPmeRanksPerSimulation
> 1))
412 GMX_THROW(NotImplementedError(
413 "PME tasks were required to run on GPUs, but that is not implemented with "
414 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
415 "or permit PME tasks to be assigned to the CPU."));
420 // If we get here, then the user permitted GPUs.
421 if (numRanksPerSimulation
== 1)
423 // PME can run well on a single GPU shared with NB when there
424 // is one rank, so we permit mdrun to try that if we have
426 return gpusWereDetected
;
429 // Not enough support for PME on GPUs for anything else
434 PmeRunMode
determinePmeRunMode(const bool useGpuForPme
, const TaskTarget
& pmeFftTarget
, const t_inputrec
& inputrec
)
436 if (!EEL_PME(inputrec
.coulombtype
))
438 return PmeRunMode::None
;
443 if (pmeFftTarget
== TaskTarget::Cpu
)
445 return PmeRunMode::Mixed
;
449 return PmeRunMode::GPU
;
454 if (pmeFftTarget
== TaskTarget::Gpu
)
457 "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
458 "on CPU you should not be using -pmefft.");
460 return PmeRunMode::CPU
;
464 bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded
,
466 TaskTarget bondedTarget
,
467 const t_inputrec
& inputrec
,
468 const gmx_mtop_t
& mtop
,
469 int numPmeRanksPerSimulation
,
470 bool gpusWereDetected
)
472 if (bondedTarget
== TaskTarget::Cpu
)
477 std::string errorMessage
;
479 if (!buildSupportsGpuBondeds(&errorMessage
))
481 if (bondedTarget
== TaskTarget::Gpu
)
483 GMX_THROW(InconsistentInputError(errorMessage
.c_str()));
489 if (!inputSupportsGpuBondeds(inputrec
, mtop
, &errorMessage
))
491 if (bondedTarget
== TaskTarget::Gpu
)
493 GMX_THROW(InconsistentInputError(errorMessage
.c_str()));
499 if (!useGpuForNonbonded
)
501 if (bondedTarget
== TaskTarget::Gpu
)
503 GMX_THROW(InconsistentInputError(
504 "Bonded interactions on the GPU were required, but this requires that "
505 "short-ranged non-bonded interactions are also run on the GPU. Change "
506 "your settings, or do not require using GPUs."));
512 // TODO If the bonded kernels do not get fused, then performance
513 // overheads might suggest alternative choices here.
515 if (bondedTarget
== TaskTarget::Gpu
)
517 // We still don't know whether it is an error if no GPUs are
522 // If we get here, then the user permitted GPUs, which we should
523 // use for bonded interactions if any were detected and the CPU
524 // is busy, for which we currently only check PME or Ewald.
525 // (It would be better to dynamically assign bondeds based on timings)
526 // Note that here we assume that the auto setting of PME ranks will not
527 // choose seperate PME ranks when nonBonded are assigned to the GPU.
528 bool usingOurCpuForPmeOrEwald
=
529 (EVDW_PME(inputrec
.vdwtype
)
530 || (EEL_PME_EWALD(inputrec
.coulombtype
) && !useGpuForPme
&& numPmeRanksPerSimulation
<= 0));
532 return gpusWereDetected
&& usingOurCpuForPmeOrEwald
;
535 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition
,
536 const bool useUpdateGroups
,
537 const PmeRunMode pmeRunMode
,
538 const bool havePmeOnlyRank
,
539 const bool useGpuForNonbonded
,
540 const TaskTarget updateTarget
,
541 const bool gpusWereDetected
,
542 const t_inputrec
& inputrec
,
543 const gmx_mtop_t
& mtop
,
544 const bool useEssentialDynamics
,
545 const bool doOrientationRestraints
,
546 const bool useReplicaExchange
,
548 const DevelopmentFeatureFlags
& devFlags
,
549 const gmx::MDLogger
& mdlog
)
552 // '-update cpu' overrides the environment variable, '-update auto' does not
553 if (updateTarget
== TaskTarget::Cpu
554 || (updateTarget
== TaskTarget::Auto
&& !devFlags
.forceGpuUpdateDefault
))
559 const bool hasAnyConstraints
= gmx_mtop_interaction_count(mtop
, IF_CONSTRAINT
) > 0;
560 const bool pmeUsesCpu
= (pmeRunMode
== PmeRunMode::CPU
|| pmeRunMode
== PmeRunMode::Mixed
);
562 std::string errorMessage
;
564 if (isDomainDecomposition
)
566 if (hasAnyConstraints
&& !useUpdateGroups
)
569 "Domain decomposition is only supported with constraints when update "
571 "are used. This means constraining all bonds is not supported, except for "
572 "small molecules, and box sizes close to half the pair-list cutoff are not "
578 errorMessage
+= "With domain decomposition, PME must run fully on the GPU.\n";
586 errorMessage
+= "With separate PME rank(s), PME must run fully on the GPU.\n";
592 errorMessage
+= "Multiple time stepping is not supported.\n";
595 if (inputrec
.eConstrAlg
== econtSHAKE
&& hasAnyConstraints
&& gmx_mtop_ftype_count(mtop
, F_CONSTR
) > 0)
597 errorMessage
+= "SHAKE constraints are not supported.\n";
599 // Using the GPU-version of update if:
600 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
601 // 2. Non-bonded interactions are on the GPU.
602 if ((pmeRunMode
== PmeRunMode::CPU
|| pmeRunMode
== PmeRunMode::None
) && !useGpuForNonbonded
)
605 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
608 if (!gpusWereDetected
)
610 errorMessage
+= "Compatible GPUs must have been found.\n";
614 errorMessage
+= "Only a CUDA build is supported.\n";
616 if (inputrec
.eI
!= eiMD
)
618 errorMessage
+= "Only the md integrator is supported.\n";
620 if (inputrec
.etc
== etcNOSEHOOVER
)
622 errorMessage
+= "Nose-Hoover temperature coupling is not supported.\n";
624 if (!(inputrec
.epc
== epcNO
|| inputrec
.epc
== epcPARRINELLORAHMAN
625 || inputrec
.epc
== epcBERENDSEN
|| inputrec
.epc
== epcCRESCALE
))
628 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are "
631 if (EEL_PME_EWALD(inputrec
.coulombtype
) && inputrec
.epsilon_surface
!= 0)
633 // The graph is needed, but not supported
634 errorMessage
+= "Ewald surface correction is not supported.\n";
636 if (gmx_mtop_interaction_count(mtop
, IF_VSITE
) > 0)
638 errorMessage
+= "Virtual sites are not supported.\n";
640 if (useEssentialDynamics
)
642 errorMessage
+= "Essential dynamics is not supported.\n";
644 if (inputrec
.bPull
&& pull_have_constraint(*inputrec
.pull
))
646 errorMessage
+= "Constraints pulling is not supported.\n";
648 if (doOrientationRestraints
)
650 // The graph is needed, but not supported
651 errorMessage
+= "Orientation restraints are not supported.\n";
653 if (inputrec
.efep
!= efepNO
&& (haveFepPerturbedMasses(mtop
) || havePerturbedConstraints(mtop
)))
655 errorMessage
+= "Free energy perturbation for mass and constraints are not supported.\n";
657 const auto particleTypes
= gmx_mtop_particletype_count(mtop
);
658 if (particleTypes
[eptShell
] > 0)
660 errorMessage
+= "Shells are not supported.\n";
662 if (useReplicaExchange
)
664 errorMessage
+= "Replica exchange simulations are not supported.\n";
666 if (inputrec
.eSwapCoords
!= eswapNO
)
668 errorMessage
+= "Swapping the coordinates is not supported.\n";
672 errorMessage
+= "Re-run is not supported.\n";
675 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
676 // does not support it, the actual CUDA LINCS code does support it
677 if (gmx_mtop_ftype_count(mtop
, F_CONSTRNC
) > 0)
679 errorMessage
+= "Non-connecting constraints are not supported\n";
681 if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop
))
684 "The number of coupled constraints is higher than supported in the GPU LINCS "
688 if (!errorMessage
.empty())
690 if (updateTarget
== TaskTarget::Auto
&& devFlags
.forceGpuUpdateDefault
)
692 GMX_LOG(mdlog
.warning
)
695 "Update task on the GPU was required, by the "
696 "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
697 "condition(s) were not satisfied:");
698 GMX_LOG(mdlog
.warning
).asParagraph().appendText(errorMessage
.c_str());
699 GMX_LOG(mdlog
.warning
).asParagraph().appendText("Will use CPU version of update.");
701 else if (updateTarget
== TaskTarget::Gpu
)
703 std::string prefix
= gmx::formatString(
704 "Update task on the GPU was required,\n"
705 "but the following condition(s) were not satisfied:\n");
706 GMX_THROW(InconsistentInputError((prefix
+ errorMessage
).c_str()));
711 return (updateTarget
== TaskTarget::Gpu
712 || (updateTarget
== TaskTarget::Auto
&& devFlags
.forceGpuUpdateDefault
));
715 bool decideWhetherToUseGpuForHalo(const DevelopmentFeatureFlags
& devFlags
,
716 bool havePPDomainDecomposition
,
717 bool useGpuForNonbonded
,
718 bool useModularSimulator
,
720 bool haveEnergyMinimization
)
722 return havePPDomainDecomposition
&& devFlags
.enableGpuHaloExchange
&& useGpuForNonbonded
723 && !useModularSimulator
&& !doRerun
&& !haveEnergyMinimization
;