Fix compilation with MSVC
[gromacs.git] / src / gromacs / taskassignment / decidegpuusage.cpp
blobeb88b85d87d8be21e11694a5df8abff2995b6c48
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
5 * Copyright (c) 2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
37 * \brief Defines functionality for deciding whether tasks will run on GPUs.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
43 #include "gmxpre.h"
45 #include "decidegpuusage.h"
47 #include "config.h"
49 #include <cstdlib>
50 #include <cstring>
52 #include <algorithm>
53 #include <string>
55 #include "gromacs/ewald/pme.h"
56 #include "gromacs/hardware/cpuinfo.h"
57 #include "gromacs/hardware/detecthardware.h"
58 #include "gromacs/hardware/hardwaretopology.h"
59 #include "gromacs/hardware/hw_info.h"
60 #include "gromacs/mdlib/gmx_omp_nthreads.h"
61 #include "gromacs/mdlib/update_constrain_gpu.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/inputrec.h"
64 #include "gromacs/mdtypes/md_enums.h"
65 #include "gromacs/mdtypes/mdrunoptions.h"
66 #include "gromacs/pulling/pull.h"
67 #include "gromacs/taskassignment/taskassignment.h"
68 #include "gromacs/topology/mtop_util.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/utility/baseversion.h"
71 #include "gromacs/utility/exceptions.h"
72 #include "gromacs/utility/fatalerror.h"
73 #include "gromacs/utility/gmxassert.h"
74 #include "gromacs/utility/logger.h"
75 #include "gromacs/utility/stringutil.h"
78 namespace gmx
81 namespace
84 //! Helper variable to localise the text of an often repeated message.
85 const char* g_specifyEverythingFormatString =
86 "When you use mdrun -gputasks, %s must be set to non-default "
87 "values, so that the device IDs can be interpreted correctly."
88 #if GMX_GPU
89 " If you simply want to restrict which GPUs are used, then it is "
90 "better to use mdrun -gpu_id. Otherwise, setting the "
91 # if GMX_GPU_CUDA
92 "CUDA_VISIBLE_DEVICES"
93 # elif GMX_GPU_OPENCL
94 // Technically there is no portable way to do this offered by the
95 // OpenCL standard, but the only current relevant case for GROMACS
96 // is AMD OpenCL, which offers this variable.
97 "GPU_DEVICE_ORDINAL"
98 # elif GMX_GPU_SYCL
99 // SYCL-TODO:
100 "How to restrict visible devices in SYCL?"
101 # else
102 # error "Unreachable branch"
103 # endif
104 " environment variable in your bash profile or job "
105 "script may be more convenient."
106 #endif
109 } // namespace
111 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
112 const int numDevicesToUse,
113 const std::vector<int>& userGpuTaskAssignment,
114 const EmulateGpuNonbonded emulateGpuNonbonded,
115 const bool buildSupportsNonbondedOnGpu,
116 const bool nonbondedOnGpuIsUseful,
117 const int numRanksPerSimulation)
119 // First, exclude all cases where we can't run NB on GPUs.
120 if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
121 || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
123 // If the user required NB on GPUs, we issue an error later.
124 return false;
127 // We now know that NB on GPUs makes sense, if we have any.
129 if (!userGpuTaskAssignment.empty())
131 // Specifying -gputasks requires specifying everything.
132 if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
134 GMX_THROW(InconsistentInputError(
135 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
137 return true;
140 if (nonbondedTarget == TaskTarget::Gpu)
142 return true;
145 // Because this is thread-MPI, we already know about the GPUs that
146 // all potential ranks can use, and can use that in a global
147 // decision that will later be consistent.
148 // If we get here, then the user permitted or required GPUs.
149 return (numDevicesToUse > 0);
152 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
153 const TaskTarget pmeTarget,
154 const int numDevicesToUse,
155 const std::vector<int>& userGpuTaskAssignment,
156 const gmx_hw_info_t& hardwareInfo,
157 const t_inputrec& inputrec,
158 const int numRanksPerSimulation,
159 const int numPmeRanksPerSimulation)
161 // First, exclude all cases where we can't run PME on GPUs.
162 if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
163 || !pme_gpu_supports_hardware(hardwareInfo, nullptr) || !pme_gpu_supports_input(inputrec, nullptr))
165 // PME can't run on a GPU. If the user required that, we issue
166 // an error later.
167 return false;
170 // We now know that PME on GPUs might make sense, if we have any.
172 if (!userGpuTaskAssignment.empty())
174 // Follow the user's choice of GPU task assignment, if we
175 // can. Checking that their IDs are for compatible GPUs comes
176 // later.
178 // Specifying -gputasks requires specifying everything.
179 if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
181 GMX_THROW(InconsistentInputError(
182 formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
185 // PME on GPUs is only supported in a single case
186 if (pmeTarget == TaskTarget::Gpu)
188 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
189 || (numPmeRanksPerSimulation > 1))
191 GMX_THROW(InconsistentInputError(
192 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
193 "file and use a single PME rank."));
195 return true;
198 // pmeTarget == TaskTarget::Auto
199 return numRanksPerSimulation == 1;
202 // Because this is thread-MPI, we already know about the GPUs that
203 // all potential ranks can use, and can use that in a global
204 // decision that will later be consistent.
206 if (pmeTarget == TaskTarget::Gpu)
208 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
209 || (numPmeRanksPerSimulation > 1))
211 GMX_THROW(NotImplementedError(
212 "PME tasks were required to run on GPUs, but that is not implemented with "
213 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
214 "or permit PME tasks to be assigned to the CPU."));
216 return true;
219 if (numRanksPerSimulation == 1)
221 // PME can run well on a GPU shared with NB, and we permit
222 // mdrun to default to try that.
223 return numDevicesToUse > 0;
226 if (numRanksPerSimulation < 1)
228 // Full automated mode for thread-MPI (the default). PME can
229 // run well on a GPU shared with NB, and we permit mdrun to
230 // default to it if there is only one GPU available.
231 return (numDevicesToUse == 1);
234 // Not enough support for PME on GPUs for anything else
235 return false;
238 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
239 const std::vector<int>& userGpuTaskAssignment,
240 const EmulateGpuNonbonded emulateGpuNonbonded,
241 const bool buildSupportsNonbondedOnGpu,
242 const bool nonbondedOnGpuIsUseful,
243 const bool gpusWereDetected)
245 if (nonbondedTarget == TaskTarget::Cpu)
247 if (!userGpuTaskAssignment.empty())
249 GMX_THROW(InconsistentInputError(
250 "A GPU task assignment was specified, but nonbonded interactions were "
251 "assigned to the CPU. Make no more than one of these choices."));
254 return false;
257 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
259 GMX_THROW(InconsistentInputError(
260 "Nonbonded interactions on the GPU were requested with -nb gpu, "
261 "but the GROMACS binary has been built without GPU support. "
262 "Either run without selecting GPU options, or recompile GROMACS "
263 "with GPU support enabled"));
266 // TODO refactor all these TaskTarget::Gpu checks into one place?
267 // e.g. use a subfunction that handles only the cases where
268 // TaskTargets are not Cpu?
269 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
271 if (nonbondedTarget == TaskTarget::Gpu)
273 GMX_THROW(InconsistentInputError(
274 "Nonbonded interactions on the GPU were required, which is inconsistent "
275 "with choosing emulation. Make no more than one of these choices."));
277 if (!userGpuTaskAssignment.empty())
279 GMX_THROW(
280 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
281 "no more than one of these choices."));
284 return false;
287 if (!nonbondedOnGpuIsUseful)
289 if (nonbondedTarget == TaskTarget::Gpu)
291 GMX_THROW(InconsistentInputError(
292 "Nonbonded interactions on the GPU were required, but not supported for these "
293 "simulation settings. Change your settings, or do not require using GPUs."));
296 return false;
299 if (!userGpuTaskAssignment.empty())
301 // Specifying -gputasks requires specifying everything.
302 if (nonbondedTarget == TaskTarget::Auto)
304 GMX_THROW(InconsistentInputError(
305 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
308 return true;
311 if (nonbondedTarget == TaskTarget::Gpu)
313 // We still don't know whether it is an error if no GPUs are found
314 // because we don't know the duty of this rank, yet. For example,
315 // a node with only PME ranks and -pme cpu is OK if there are not
316 // GPUs.
317 return true;
320 // If we get here, then the user permitted GPUs, which we should
321 // use for nonbonded interactions.
322 return buildSupportsNonbondedOnGpu && gpusWereDetected;
325 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
326 const TaskTarget pmeTarget,
327 const std::vector<int>& userGpuTaskAssignment,
328 const gmx_hw_info_t& hardwareInfo,
329 const t_inputrec& inputrec,
330 const int numRanksPerSimulation,
331 const int numPmeRanksPerSimulation,
332 const bool gpusWereDetected)
334 if (pmeTarget == TaskTarget::Cpu)
336 return false;
339 if (!useGpuForNonbonded)
341 if (pmeTarget == TaskTarget::Gpu)
343 GMX_THROW(NotImplementedError(
344 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
346 return false;
349 std::string message;
350 if (!pme_gpu_supports_build(&message))
352 if (pmeTarget == TaskTarget::Gpu)
354 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
356 return false;
358 if (!pme_gpu_supports_hardware(hardwareInfo, &message))
360 if (pmeTarget == TaskTarget::Gpu)
362 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
364 return false;
366 if (!pme_gpu_supports_input(inputrec, &message))
368 if (pmeTarget == TaskTarget::Gpu)
370 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
372 return false;
375 if (pmeTarget == TaskTarget::Cpu)
377 if (!userGpuTaskAssignment.empty())
379 GMX_THROW(InconsistentInputError(
380 "A GPU task assignment was specified, but PME interactions were "
381 "assigned to the CPU. Make no more than one of these choices."));
384 return false;
387 if (!userGpuTaskAssignment.empty())
389 // Specifying -gputasks requires specifying everything.
390 if (pmeTarget == TaskTarget::Auto)
392 GMX_THROW(InconsistentInputError(formatString(
393 g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
396 return true;
399 // We still don't know whether it is an error if no GPUs are found
400 // because we don't know the duty of this rank, yet. For example,
401 // a node with only PME ranks and -pme cpu is OK if there are not
402 // GPUs.
404 if (pmeTarget == TaskTarget::Gpu)
406 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
407 || (numPmeRanksPerSimulation > 1))
409 GMX_THROW(NotImplementedError(
410 "PME tasks were required to run on GPUs, but that is not implemented with "
411 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
412 "or permit PME tasks to be assigned to the CPU."));
414 return true;
417 // If we get here, then the user permitted GPUs.
418 if (numRanksPerSimulation == 1)
420 // PME can run well on a single GPU shared with NB when there
421 // is one rank, so we permit mdrun to try that if we have
422 // detected GPUs.
423 return gpusWereDetected;
426 // Not enough support for PME on GPUs for anything else
427 return false;
431 PmeRunMode determinePmeRunMode(const bool useGpuForPme, const TaskTarget& pmeFftTarget, const t_inputrec& inputrec)
433 if (!EEL_PME(inputrec.coulombtype))
435 return PmeRunMode::None;
438 if (useGpuForPme)
440 if (pmeFftTarget == TaskTarget::Cpu)
442 return PmeRunMode::Mixed;
444 else
446 return PmeRunMode::GPU;
449 else
451 if (pmeFftTarget == TaskTarget::Gpu)
453 gmx_fatal(FARGS,
454 "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
455 "on CPU you should not be using -pmefft.");
457 return PmeRunMode::CPU;
461 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded,
462 const bool useGpuForPme,
463 const TaskTarget bondedTarget,
464 const bool canUseGpuForBonded,
465 const bool usingLJPme,
466 const bool usingElecPmeOrEwald,
467 const int numPmeRanksPerSimulation,
468 const bool gpusWereDetected)
470 if (bondedTarget == TaskTarget::Cpu)
472 return false;
475 if (!canUseGpuForBonded)
477 if (bondedTarget == TaskTarget::Gpu)
479 GMX_THROW(InconsistentInputError(
480 "Bonded interactions on the GPU were required, but not supported for these "
481 "simulation settings. Change your settings, or do not require using GPUs."));
484 return false;
487 if (!useGpuForNonbonded)
489 if (bondedTarget == TaskTarget::Gpu)
491 GMX_THROW(InconsistentInputError(
492 "Bonded interactions on the GPU were required, but this requires that "
493 "short-ranged non-bonded interactions are also run on the GPU. Change "
494 "your settings, or do not require using GPUs."));
497 return false;
500 // TODO If the bonded kernels do not get fused, then performance
501 // overheads might suggest alternative choices here.
503 if (bondedTarget == TaskTarget::Gpu)
505 // We still don't know whether it is an error if no GPUs are
506 // found.
507 return true;
510 // If we get here, then the user permitted GPUs, which we should
511 // use for bonded interactions if any were detected and the CPU
512 // is busy, for which we currently only check PME or Ewald.
513 // (It would be better to dynamically assign bondeds based on timings)
514 // Note that here we assume that the auto setting of PME ranks will not
515 // choose seperate PME ranks when nonBonded are assigned to the GPU.
516 bool usingOurCpuForPmeOrEwald =
517 (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
519 return gpusWereDetected && usingOurCpuForPmeOrEwald;
522 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
523 const bool useUpdateGroups,
524 const PmeRunMode pmeRunMode,
525 const bool havePmeOnlyRank,
526 const bool useGpuForNonbonded,
527 const TaskTarget updateTarget,
528 const bool gpusWereDetected,
529 const t_inputrec& inputrec,
530 const gmx_mtop_t& mtop,
531 const bool useEssentialDynamics,
532 const bool doOrientationRestraints,
533 const bool useReplicaExchange,
534 const bool doRerun,
535 const DevelopmentFeatureFlags& devFlags,
536 const gmx::MDLogger& mdlog)
539 // '-update cpu' overrides the environment variable, '-update auto' does not
540 if (updateTarget == TaskTarget::Cpu
541 || (updateTarget == TaskTarget::Auto && !devFlags.forceGpuUpdateDefault))
543 return false;
546 const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0;
547 const bool pmeUsesCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed);
549 std::string errorMessage;
551 if (isDomainDecomposition)
553 if (!devFlags.enableGpuHaloExchange)
555 errorMessage += "Domain decomposition without GPU halo exchange is not supported.\n ";
557 else
559 if (hasAnyConstraints && !useUpdateGroups)
561 errorMessage +=
562 "Domain decomposition is only supported with constraints when update "
563 "groups "
564 "are used. This means constraining all bonds is not supported, except for "
565 "small molecules, and box sizes close to half the pair-list cutoff are not "
566 "supported.\n ";
569 if (pmeUsesCpu)
571 errorMessage += "With domain decomposition, PME must run fully on the GPU.\n";
576 if (havePmeOnlyRank)
578 if (pmeUsesCpu)
580 errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
583 if (!devFlags.enableGpuPmePPComm)
585 errorMessage += "With separate PME rank(s), PME must use direct communication.\n";
589 if (inputrec.useMts)
591 errorMessage += "Multiple time stepping is not supported.\n";
594 if (inputrec.eConstrAlg == econtSHAKE && hasAnyConstraints && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0)
596 errorMessage += "SHAKE constraints are not supported.\n";
598 // Using the GPU-version of update if:
599 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
600 // 2. Non-bonded interactions are on the GPU.
601 if ((pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::None) && !useGpuForNonbonded)
603 errorMessage +=
604 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
607 if (!gpusWereDetected)
609 errorMessage += "Compatible GPUs must have been found.\n";
611 if (!GMX_GPU_CUDA)
613 errorMessage += "Only a CUDA build is supported.\n";
615 if (inputrec.eI != eiMD)
617 errorMessage += "Only the md integrator is supported.\n";
619 if (inputrec.etc == etcNOSEHOOVER)
621 errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
623 if (!(inputrec.epc == epcNO || inputrec.epc == epcPARRINELLORAHMAN
624 || inputrec.epc == epcBERENDSEN || inputrec.epc == epcCRESCALE))
626 errorMessage +=
627 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are "
628 "supported.\n";
630 if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
632 // The graph is needed, but not supported
633 errorMessage += "Ewald surface correction is not supported.\n";
635 if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
637 errorMessage += "Virtual sites are not supported.\n";
639 if (useEssentialDynamics)
641 errorMessage += "Essential dynamics is not supported.\n";
643 if (inputrec.bPull && pull_have_constraint(*inputrec.pull))
645 errorMessage += "Constraints pulling is not supported.\n";
647 if (doOrientationRestraints)
649 // The graph is needed, but not supported
650 errorMessage += "Orientation restraints are not supported.\n";
652 if (inputrec.efep != efepNO
653 && (haveFreeEnergyType(inputrec, efptBONDED) || haveFreeEnergyType(inputrec, efptMASS)))
655 errorMessage += "Free energy perturbation for mass and constraints are not supported.\n";
657 const auto particleTypes = gmx_mtop_particletype_count(mtop);
658 if (particleTypes[eptShell] > 0)
660 errorMessage += "Shells are not supported.\n";
662 if (useReplicaExchange)
664 errorMessage += "Replica exchange simulations are not supported.\n";
666 if (inputrec.eSwapCoords != eswapNO)
668 errorMessage += "Swapping the coordinates is not supported.\n";
670 if (doRerun)
672 errorMessage += "Re-run is not supported.\n";
675 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
676 // does not support it, the actual CUDA LINCS code does support it
677 if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
679 errorMessage += "Non-connecting constraints are not supported\n";
681 if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop))
683 errorMessage +=
684 "The number of coupled constraints is higher than supported in the GPU LINCS "
685 "code.\n";
688 if (!errorMessage.empty())
690 if (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault)
692 GMX_LOG(mdlog.warning)
693 .asParagraph()
694 .appendText(
695 "Update task on the GPU was required, by the "
696 "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
697 "condition(s) were not satisfied:");
698 GMX_LOG(mdlog.warning).asParagraph().appendText(errorMessage.c_str());
699 GMX_LOG(mdlog.warning).asParagraph().appendText("Will use CPU version of update.");
701 else if (updateTarget == TaskTarget::Gpu)
703 std::string prefix = gmx::formatString(
704 "Update task on the GPU was required,\n"
705 "but the following condition(s) were not satisfied:\n");
706 GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
708 return false;
711 return (updateTarget == TaskTarget::Gpu
712 || (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault));
715 bool decideWhetherToUseGpuForHalo(const DevelopmentFeatureFlags& devFlags,
716 bool havePPDomainDecomposition,
717 bool useGpuForNonbonded,
718 bool useModularSimulator,
719 bool doRerun,
720 bool haveEnergyMinimization)
722 return havePPDomainDecomposition && devFlags.enableGpuHaloExchange && useGpuForNonbonded
723 && !useModularSimulator && !doRerun && !haveEnergyMinimization;
726 } // namespace gmx