2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Defines functionality for deciding whether tasks will run on GPUs.
38 * \author Mark Abraham <mark.j.abraham@gmail.com>
39 * \ingroup module_taskassignment
44 #include "decidegpuusage.h"
54 #include "gromacs/ewald/pme.h"
55 #include "gromacs/hardware/cpuinfo.h"
56 #include "gromacs/hardware/detecthardware.h"
57 #include "gromacs/hardware/hardwaretopology.h"
58 #include "gromacs/hardware/hw_info.h"
59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
60 #include "gromacs/mdlib/update_constrain_cuda.h"
61 #include "gromacs/mdtypes/commrec.h"
62 #include "gromacs/mdtypes/inputrec.h"
63 #include "gromacs/mdtypes/md_enums.h"
64 #include "gromacs/mdtypes/mdrunoptions.h"
65 #include "gromacs/pulling/pull.h"
66 #include "gromacs/taskassignment/taskassignment.h"
67 #include "gromacs/topology/mtop_util.h"
68 #include "gromacs/topology/topology.h"
69 #include "gromacs/utility/baseversion.h"
70 #include "gromacs/utility/exceptions.h"
71 #include "gromacs/utility/fatalerror.h"
72 #include "gromacs/utility/gmxassert.h"
73 #include "gromacs/utility/logger.h"
74 #include "gromacs/utility/stringutil.h"
83 //! Helper variable to localise the text of an often repeated message.
84 const char* g_specifyEverythingFormatString
=
85 "When you use mdrun -gputasks, %s must be set to non-default "
86 "values, so that the device IDs can be interpreted correctly."
87 #if GMX_GPU != GMX_GPU_NONE
88 " If you simply want to restrict which GPUs are used, then it is "
89 "better to use mdrun -gpu_id. Otherwise, setting the "
90 # if GMX_GPU == GMX_GPU_CUDA
91 "CUDA_VISIBLE_DEVICES"
92 # elif GMX_GPU == GMX_GPU_OPENCL
93 // Technically there is no portable way to do this offered by the
94 // OpenCL standard, but the only current relevant case for GROMACS
95 // is AMD OpenCL, which offers this variable.
98 # error "Unreachable branch"
100 " environment variable in your bash profile or job "
101 "script may be more convenient."
107 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget
,
108 const std::vector
<int>& gpuIdsToUse
,
109 const std::vector
<int>& userGpuTaskAssignment
,
110 const EmulateGpuNonbonded emulateGpuNonbonded
,
111 const bool buildSupportsNonbondedOnGpu
,
112 const bool nonbondedOnGpuIsUseful
,
113 const int numRanksPerSimulation
)
115 // First, exclude all cases where we can't run NB on GPUs.
116 if (nonbondedTarget
== TaskTarget::Cpu
|| emulateGpuNonbonded
== EmulateGpuNonbonded::Yes
117 || !nonbondedOnGpuIsUseful
|| !buildSupportsNonbondedOnGpu
)
119 // If the user required NB on GPUs, we issue an error later.
123 // We now know that NB on GPUs makes sense, if we have any.
125 if (!userGpuTaskAssignment
.empty())
127 // Specifying -gputasks requires specifying everything.
128 if (nonbondedTarget
== TaskTarget::Auto
|| numRanksPerSimulation
< 1)
130 GMX_THROW(InconsistentInputError(
131 formatString(g_specifyEverythingFormatString
, "-nb and -ntmpi")));
136 if (nonbondedTarget
== TaskTarget::Gpu
)
141 // Because this is thread-MPI, we already know about the GPUs that
142 // all potential ranks can use, and can use that in a global
143 // decision that will later be consistent.
144 auto haveGpus
= !gpuIdsToUse
.empty();
146 // If we get here, then the user permitted or required GPUs.
150 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded
,
151 const TaskTarget pmeTarget
,
152 const std::vector
<int>& gpuIdsToUse
,
153 const std::vector
<int>& userGpuTaskAssignment
,
154 const gmx_hw_info_t
& hardwareInfo
,
155 const t_inputrec
& inputrec
,
156 const gmx_mtop_t
& mtop
,
157 const int numRanksPerSimulation
,
158 const int numPmeRanksPerSimulation
)
160 // First, exclude all cases where we can't run PME on GPUs.
161 if ((pmeTarget
== TaskTarget::Cpu
) || !useGpuForNonbonded
|| !pme_gpu_supports_build(nullptr)
162 || !pme_gpu_supports_hardware(hardwareInfo
, nullptr)
163 || !pme_gpu_supports_input(inputrec
, mtop
, nullptr))
165 // PME can't run on a GPU. If the user required that, we issue
170 // We now know that PME on GPUs might make sense, if we have any.
172 if (!userGpuTaskAssignment
.empty())
174 // Follow the user's choice of GPU task assignment, if we
175 // can. Checking that their IDs are for compatible GPUs comes
178 // Specifying -gputasks requires specifying everything.
179 if (pmeTarget
== TaskTarget::Auto
|| numRanksPerSimulation
< 1)
181 GMX_THROW(InconsistentInputError(
182 formatString(g_specifyEverythingFormatString
, "all of -nb, -pme, and -ntmpi")));
185 // PME on GPUs is only supported in a single case
186 if (pmeTarget
== TaskTarget::Gpu
)
188 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
189 || (numPmeRanksPerSimulation
> 1))
191 GMX_THROW(InconsistentInputError(
192 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
193 "file and use a single PME rank."));
198 // pmeTarget == TaskTarget::Auto
199 return numRanksPerSimulation
== 1;
202 // Because this is thread-MPI, we already know about the GPUs that
203 // all potential ranks can use, and can use that in a global
204 // decision that will later be consistent.
206 if (pmeTarget
== TaskTarget::Gpu
)
208 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
209 || (numPmeRanksPerSimulation
> 1))
211 GMX_THROW(NotImplementedError(
212 "PME tasks were required to run on GPUs, but that is not implemented with "
213 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
214 "or permit PME tasks to be assigned to the CPU."));
219 if (numRanksPerSimulation
== 1)
221 // PME can run well on a GPU shared with NB, and we permit
222 // mdrun to default to try that.
223 return !gpuIdsToUse
.empty();
226 if (numRanksPerSimulation
< 1)
228 // Full automated mode for thread-MPI (the default). PME can
229 // run well on a GPU shared with NB, and we permit mdrun to
230 // default to it if there is only one GPU available.
231 return (gpuIdsToUse
.size() == 1);
234 // Not enough support for PME on GPUs for anything else
238 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget
,
239 const std::vector
<int>& userGpuTaskAssignment
,
240 const EmulateGpuNonbonded emulateGpuNonbonded
,
241 const bool buildSupportsNonbondedOnGpu
,
242 const bool nonbondedOnGpuIsUseful
,
243 const bool gpusWereDetected
)
245 if (nonbondedTarget
== TaskTarget::Cpu
)
247 if (!userGpuTaskAssignment
.empty())
249 GMX_THROW(InconsistentInputError(
250 "A GPU task assignment was specified, but nonbonded interactions were "
251 "assigned to the CPU. Make no more than one of these choices."));
257 if (!buildSupportsNonbondedOnGpu
&& nonbondedTarget
== TaskTarget::Gpu
)
259 GMX_THROW(InconsistentInputError(
260 "Nonbonded interactions on the GPU were requested with -nb gpu, "
261 "but the GROMACS binary has been built without GPU support. "
262 "Either run without selecting GPU options, or recompile GROMACS "
263 "with GPU support enabled"));
266 // TODO refactor all these TaskTarget::Gpu checks into one place?
267 // e.g. use a subfunction that handles only the cases where
268 // TaskTargets are not Cpu?
269 if (emulateGpuNonbonded
== EmulateGpuNonbonded::Yes
)
271 if (nonbondedTarget
== TaskTarget::Gpu
)
273 GMX_THROW(InconsistentInputError(
274 "Nonbonded interactions on the GPU were required, which is inconsistent "
275 "with choosing emulation. Make no more than one of these choices."));
277 if (!userGpuTaskAssignment
.empty())
280 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
281 "no more than one of these choices."));
287 if (!nonbondedOnGpuIsUseful
)
289 if (nonbondedTarget
== TaskTarget::Gpu
)
291 GMX_THROW(InconsistentInputError(
292 "Nonbonded interactions on the GPU were required, but not supported for these "
293 "simulation settings. Change your settings, or do not require using GPUs."));
299 if (!userGpuTaskAssignment
.empty())
301 // Specifying -gputasks requires specifying everything.
302 if (nonbondedTarget
== TaskTarget::Auto
)
304 GMX_THROW(InconsistentInputError(
305 formatString(g_specifyEverythingFormatString
, "-nb and -ntmpi")));
311 if (nonbondedTarget
== TaskTarget::Gpu
)
313 // We still don't know whether it is an error if no GPUs are found
314 // because we don't know the duty of this rank, yet. For example,
315 // a node with only PME ranks and -pme cpu is OK if there are not
320 // If we get here, then the user permitted GPUs, which we should
321 // use for nonbonded interactions.
322 return gpusWereDetected
;
325 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded
,
326 const TaskTarget pmeTarget
,
327 const std::vector
<int>& userGpuTaskAssignment
,
328 const gmx_hw_info_t
& hardwareInfo
,
329 const t_inputrec
& inputrec
,
330 const gmx_mtop_t
& mtop
,
331 const int numRanksPerSimulation
,
332 const int numPmeRanksPerSimulation
,
333 const bool gpusWereDetected
)
335 if (pmeTarget
== TaskTarget::Cpu
)
340 if (!useGpuForNonbonded
)
342 if (pmeTarget
== TaskTarget::Gpu
)
344 GMX_THROW(NotImplementedError(
345 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
351 if (!pme_gpu_supports_build(&message
))
353 if (pmeTarget
== TaskTarget::Gpu
)
355 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
359 if (!pme_gpu_supports_hardware(hardwareInfo
, &message
))
361 if (pmeTarget
== TaskTarget::Gpu
)
363 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
367 if (!pme_gpu_supports_input(inputrec
, mtop
, &message
))
369 if (pmeTarget
== TaskTarget::Gpu
)
371 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
376 if (pmeTarget
== TaskTarget::Cpu
)
378 if (!userGpuTaskAssignment
.empty())
380 GMX_THROW(InconsistentInputError(
381 "A GPU task assignment was specified, but PME interactions were "
382 "assigned to the CPU. Make no more than one of these choices."));
388 if (!userGpuTaskAssignment
.empty())
390 // Specifying -gputasks requires specifying everything.
391 if (pmeTarget
== TaskTarget::Auto
)
393 GMX_THROW(InconsistentInputError(formatString(
394 g_specifyEverythingFormatString
, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
400 // We still don't know whether it is an error if no GPUs are found
401 // because we don't know the duty of this rank, yet. For example,
402 // a node with only PME ranks and -pme cpu is OK if there are not
405 if (pmeTarget
== TaskTarget::Gpu
)
407 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
408 || (numPmeRanksPerSimulation
> 1))
410 GMX_THROW(NotImplementedError(
411 "PME tasks were required to run on GPUs, but that is not implemented with "
412 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
413 "or permit PME tasks to be assigned to the CPU."));
418 // If we get here, then the user permitted GPUs.
419 if (numRanksPerSimulation
== 1)
421 // PME can run well on a single GPU shared with NB when there
422 // is one rank, so we permit mdrun to try that if we have
424 return gpusWereDetected
;
427 // Not enough support for PME on GPUs for anything else
432 PmeRunMode
determinePmeRunMode(const bool useGpuForPme
, const TaskTarget
& pmeFftTarget
, const t_inputrec
& inputrec
)
434 if (!EEL_PME(inputrec
.coulombtype
))
436 return PmeRunMode::None
;
441 if (pmeFftTarget
== TaskTarget::Cpu
)
443 return PmeRunMode::Mixed
;
447 return PmeRunMode::GPU
;
452 if (pmeFftTarget
== TaskTarget::Gpu
)
455 "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
456 "on CPU you should not be using -pmefft.");
458 return PmeRunMode::CPU
;
462 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded
,
463 const bool useGpuForPme
,
464 const TaskTarget bondedTarget
,
465 const bool canUseGpuForBonded
,
466 const bool usingLJPme
,
467 const bool usingElecPmeOrEwald
,
468 const int numPmeRanksPerSimulation
,
469 const bool gpusWereDetected
)
471 if (bondedTarget
== TaskTarget::Cpu
)
476 if (!canUseGpuForBonded
)
478 if (bondedTarget
== TaskTarget::Gpu
)
480 GMX_THROW(InconsistentInputError(
481 "Bonded interactions on the GPU were required, but not supported for these "
482 "simulation settings. Change your settings, or do not require using GPUs."));
488 if (!useGpuForNonbonded
)
490 if (bondedTarget
== TaskTarget::Gpu
)
492 GMX_THROW(InconsistentInputError(
493 "Bonded interactions on the GPU were required, but this requires that "
494 "short-ranged non-bonded interactions are also run on the GPU. Change "
495 "your settings, or do not require using GPUs."));
501 // TODO If the bonded kernels do not get fused, then performance
502 // overheads might suggest alternative choices here.
504 if (bondedTarget
== TaskTarget::Gpu
)
506 // We still don't know whether it is an error if no GPUs are
511 // If we get here, then the user permitted GPUs, which we should
512 // use for bonded interactions if any were detected and the CPU
513 // is busy, for which we currently only check PME or Ewald.
514 // (It would be better to dynamically assign bondeds based on timings)
515 // Note that here we assume that the auto setting of PME ranks will not
516 // choose seperate PME ranks when nonBonded are assigned to the GPU.
517 bool usingOurCpuForPmeOrEwald
=
518 (usingLJPme
|| (usingElecPmeOrEwald
&& !useGpuForPme
&& numPmeRanksPerSimulation
<= 0));
520 return gpusWereDetected
&& usingOurCpuForPmeOrEwald
;
523 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition
,
524 const bool useUpdateGroups
,
525 const PmeRunMode pmeRunMode
,
526 const bool havePmeOnlyRank
,
527 const bool useGpuForNonbonded
,
528 const TaskTarget updateTarget
,
529 const bool gpusWereDetected
,
530 const t_inputrec
& inputrec
,
531 const gmx_mtop_t
& mtop
,
532 const bool useEssentialDynamics
,
533 const bool doOrientationRestraints
,
534 const bool useReplicaExchange
,
536 const DevelopmentFeatureFlags
& devFlags
,
537 const gmx::MDLogger
& mdlog
)
540 // '-update cpu' overrides the environment variable, '-update auto' does not
541 if (updateTarget
== TaskTarget::Cpu
542 || (updateTarget
== TaskTarget::Auto
&& !devFlags
.forceGpuUpdateDefault
))
547 const bool hasAnyConstraints
= gmx_mtop_interaction_count(mtop
, IF_CONSTRAINT
) > 0;
548 const bool pmeUsesCpu
= (pmeRunMode
== PmeRunMode::CPU
|| pmeRunMode
== PmeRunMode::Mixed
);
550 std::string errorMessage
;
552 if (isDomainDecomposition
)
554 if (!devFlags
.enableGpuHaloExchange
)
556 errorMessage
+= "Domain decomposition without GPU halo exchange is not supported.\n ";
560 if (hasAnyConstraints
&& !useUpdateGroups
)
563 "Domain decomposition is only supported with constraints when update "
565 "are used. This means constraining all bonds is not supported, except for "
566 "small molecules, and box sizes close to half the pair-list cutoff are not "
572 errorMessage
+= "With domain decomposition, PME must run fully on the GPU.\n";
581 errorMessage
+= "With separate PME rank(s), PME must run fully on the GPU.\n";
584 if (!devFlags
.enableGpuPmePPComm
)
586 errorMessage
+= "With separate PME rank(s), PME must use direct communication.\n";
590 if (inputrec
.eConstrAlg
== econtSHAKE
&& hasAnyConstraints
&& gmx_mtop_ftype_count(mtop
, F_CONSTR
) > 0)
592 errorMessage
+= "SHAKE constraints are not supported.\n";
594 // Using the GPU-version of update if:
595 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
596 // 2. Non-bonded interactions are on the GPU.
597 if ((pmeRunMode
== PmeRunMode::CPU
|| pmeRunMode
== PmeRunMode::None
) && !useGpuForNonbonded
)
600 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
603 if (!gpusWereDetected
)
605 errorMessage
+= "Compatible GPUs must have been found.\n";
607 if (GMX_GPU
!= GMX_GPU_CUDA
)
609 errorMessage
+= "Only a CUDA build is supported.\n";
611 if (inputrec
.eI
!= eiMD
)
613 errorMessage
+= "Only the md integrator is supported.\n";
615 if (inputrec
.etc
== etcNOSEHOOVER
)
617 errorMessage
+= "Nose-Hoover temperature coupling is not supported.\n";
619 if (!(inputrec
.epc
== epcNO
|| inputrec
.epc
== epcPARRINELLORAHMAN
|| inputrec
.epc
== epcBERENDSEN
))
621 errorMessage
+= "Only Parrinello-Rahman and Berendsen pressure coupling are supported.\n";
623 if (EEL_PME_EWALD(inputrec
.coulombtype
) && inputrec
.epsilon_surface
!= 0)
625 // The graph is needed, but not supported
626 errorMessage
+= "Ewald surface correction is not supported.\n";
628 if (gmx_mtop_interaction_count(mtop
, IF_VSITE
) > 0)
630 errorMessage
+= "Virtual sites are not supported.\n";
632 if (useEssentialDynamics
)
634 errorMessage
+= "Essential dynamics is not supported.\n";
636 if (inputrec
.bPull
&& pull_have_constraint(inputrec
.pull
))
638 errorMessage
+= "Constraints pulling is not supported.\n";
640 if (doOrientationRestraints
)
642 // The graph is needed, but not supported
643 errorMessage
+= "Orientation restraints are not supported.\n";
645 if (inputrec
.efep
!= efepNO
)
647 // Actually all free-energy options except for mass and constraint perturbation are supported
648 errorMessage
+= "Free energy perturbations are not supported.\n";
650 const auto particleTypes
= gmx_mtop_particletype_count(mtop
);
651 if (particleTypes
[eptShell
] > 0)
653 errorMessage
+= "Shells are not supported.\n";
655 if (useReplicaExchange
)
657 errorMessage
+= "Replica exchange simulations are not supported.\n";
659 if (inputrec
.eSwapCoords
!= eswapNO
)
661 errorMessage
+= "Swapping the coordinates is not supported.\n";
665 errorMessage
+= "Re-run is not supported.\n";
668 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
669 // does not support it, the actual CUDA LINCS code does support it
670 if (gmx_mtop_ftype_count(mtop
, F_CONSTRNC
) > 0)
672 errorMessage
+= "Non-connecting constraints are not supported\n";
674 if (!UpdateConstrainCuda::isNumCoupledConstraintsSupported(mtop
))
677 "The number of coupled constraints is higher than supported in the CUDA LINCS "
681 if (!errorMessage
.empty())
683 if (updateTarget
== TaskTarget::Auto
&& devFlags
.forceGpuUpdateDefault
)
685 GMX_LOG(mdlog
.warning
)
688 "Update task on the GPU was required, by the "
689 "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
690 "condition(s) were not satisfied:");
691 GMX_LOG(mdlog
.warning
).asParagraph().appendText(errorMessage
.c_str());
692 GMX_LOG(mdlog
.warning
).asParagraph().appendText("Will use CPU version of update.");
694 else if (updateTarget
== TaskTarget::Gpu
)
696 std::string prefix
= gmx::formatString(
697 "Update task on the GPU was required,\n"
698 "but the following condition(s) were not satisfied:\n");
699 GMX_THROW(InconsistentInputError((prefix
+ errorMessage
).c_str()));
704 return (updateTarget
== TaskTarget::Gpu
705 || (updateTarget
== TaskTarget::Auto
&& devFlags
.forceGpuUpdateDefault
));