src/gromacs/hardware/hardwareassign.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "hardwareassign.h"
  38
  39 #include "config.h"
  40
  41 #include <cstring>
  42
  43 #include <algorithm>
  44 #include <string>
  45
  46 #include "gromacs/gmxlib/network.h"
  47 #include "gromacs/gpu_utils/gpu_utils.h"
  48 #include "gromacs/hardware/detecthardware.h"
  49 #include "gromacs/hardware/gpu_hw_info.h"
  50 #include "gromacs/mdtypes/commrec.h"
  51 #include "gromacs/utility/basenetwork.h"
  52 #include "gromacs/utility/exceptions.h"
  53 #include "gromacs/utility/fatalerror.h"
  54 #include "gromacs/utility/gmxassert.h"
  55 #include "gromacs/utility/logger.h"
  56 #include "gromacs/utility/programcontext.h"
  57 #include "gromacs/utility/smalloc.h"
  58 #include "gromacs/utility/stringutil.h"
  59 #include "gromacs/utility/sysinfo.h"
  60
  61 #define HOSTNAMELEN 80
  62
  63 /*! \internal \brief
  64  * Prints GPU information strings on this node into the stderr and log.
  65  * Only used for logging errors in heterogenous MPI configurations.
  66  */
  67 static void print_gpu_detection_stats(const gmx::MDLogger  &mdlog,
  68                                       const gmx_gpu_info_t *gpu_info)
  69 {
  70     char onhost[HOSTNAMELEN+10];
  71     int  ngpu;
  72
  73     if (!gpu_info->bDetectGPUs)
  74     {
  75         /* We skipped the detection, so don't print detection stats */
  76         return;
  77     }
  78
  79     ngpu = gpu_info->n_dev;
  80
  81     /* We only print the detection on one, of possibly multiple, nodes */
  82     std::strncpy(onhost, " on host ", 10);
  83     gmx_gethostname(onhost + 9, HOSTNAMELEN);
  84
  85     if (ngpu > 0)
  86     {
  87         std::string gpuDesc = sprint_gpus(gpu_info);
  88         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
  89                 "%d GPU%s detected%s:\n%s",
  90                 ngpu, (ngpu > 1) ? "s" : "", onhost, gpuDesc.c_str());
  91     }
  92     else
  93     {
  94         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted("No GPUs detected%s", onhost);
  95     }
  96     // FIXME: This currently only logs on the master rank, which defeats the purpose.
  97     // A new MDLogger option is required for printing to stderr on all ranks.
  98     // There is also a question of MPI reduction of the outputs, see Redmine issue #1505.
  99 }
 100
 101 /*! \internal \brief
 102  * This function is responsible for mapping the GPUs to the processes on a single node
 103  * (filling the gpu_opt->dev_use array).
 104  *
 105  * \param[in,out]    gpu_opt              Input/output GPU assignment data.
 106  * \param[in]        nrank                Number of PP GPU ranks on the node.
 107  * \param[in]        rank                 Index of PP GPU rank on the node.
 108  *
 109  * This selects the GPUs we will use. This is an operation local to each physical node.
 110  * If we have less MPI ranks than GPUs, we will waste some GPUs.
 111  */
 112 static void assign_rank_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
 113 {
 114     GMX_RELEASE_ASSERT(gpu_opt, "Invalid gpu_opt pointer passed");
 115     GMX_RELEASE_ASSERT(nrank >= 1,
 116                        gmx::formatString("Invalid limit (%d) for the number of GPUs (detected %d compatible GPUs)",
 117                                          rank, gpu_opt->n_dev_compatible).c_str());
 118
 119     if (gpu_opt->n_dev_compatible == 0)
 120     {
 121         char host[HOSTNAMELEN];
 122
 123         gmx_gethostname(host, HOSTNAMELEN);
 124         gmx_fatal(FARGS, "A GPU was requested on host %s, but no compatible GPUs were detected. All nodes with PP ranks need to have GPUs. If you intended to use GPU acceleration in a parallel run, you can either avoid using the nodes that don't have GPUs or place PME ranks on these nodes.", host);
 125     }
 126
 127     int nshare;
 128
 129     nshare = 1;
 130     if (nrank > gpu_opt->n_dev_compatible)
 131     {
 132         if (nrank % gpu_opt->n_dev_compatible == 0)
 133         {
 134             nshare = gmx_gpu_sharing_supported() ? nrank/gpu_opt->n_dev_compatible : 1;
 135         }
 136         else
 137         {
 138             if (rank == 0)
 139             {
 140                 gmx_fatal(FARGS, "The number of MPI ranks (%d) in a physical node is not a multiple of the number of GPUs (%d). Select a different number of MPI ranks or use the -gpu_id option to manually specify the GPU to be used.",
 141                           nrank, gpu_opt->n_dev_compatible);
 142             }
 143
 144 #if GMX_MPI
 145             /* We use a global barrier to prevent ranks from continuing with
 146              * an invalid setup.
 147              */
 148             MPI_Barrier(MPI_COMM_WORLD);
 149 #endif
 150         }
 151     }
 152
 153     /* Here we will waste GPUs when nrank < gpu_opt->n_dev_compatible */
 154     gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_compatible*nshare, nrank);
 155     if (!gmx_multiple_gpu_per_node_supported())
 156     {
 157         gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_use, 1);
 158     }
 159     snew(gpu_opt->dev_use, gpu_opt->n_dev_use);
 160     for (int i = 0; i != gpu_opt->n_dev_use; ++i)
 161     {
 162         /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
 163         gpu_opt->dev_use[i] = gpu_opt->dev_compatible[i/nshare];
 164     }
 165 }
 166
 167 /*! \brief Return whether all selected GPUs are compatible.
 168  *
 169  * Given the list of selected GPU device IDs in \c gpu_opt and
 170  * detected GPUs in \c gpu_info, return whether all selected GPUs are
 171  * compatible. If not, place a suitable string in \c errorMessage.
 172  *
 173  * \param[in]   gpu_info      pointer to structure holding GPU information
 174  * \param[in]   gpu_opt       pointer to structure holding GPU options
 175  * \param[out]  errorMessage  pointer to string to hold a possible error message (is not updated when returning true)
 176  * \returns                   true if every requested GPU is compatible
 177  */
 178 static bool checkGpuSelection(const gmx_gpu_info_t *gpu_info,
 179                               const gmx_gpu_opt_t  *gpu_opt,
 180                               std::string          *errorMessage)
 181 {
 182     GMX_ASSERT(gpu_info, "Invalid gpu_info");
 183
 184     bool        allOK   = true;
 185     std::string message = "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n";
 186     for (int i = 0; i < gpu_opt->n_dev_use; i++)
 187     {
 188         GMX_ASSERT(gpu_opt, "Invalid gpu_opt");
 189         GMX_ASSERT(gpu_opt->dev_use, "Invalid gpu_opt->dev_use");
 190
 191         int id     = gpu_opt->dev_use[i];
 192         if (!isGpuCompatible(gpu_info, id))
 193         {
 194             allOK    = false;
 195             message += gmx::formatString("    GPU #%d: %s\n",
 196                                          id,
 197                                          getGpuCompatibilityDescription(gpu_info, id));
 198         }
 199     }
 200     if (!allOK && errorMessage)
 201     {
 202         *errorMessage = message;
 203     }
 204     return allOK;
 205 }
 206
 207 /*! \brief Select the compatible GPUs
 208  *
 209  * This function filters gpu_info->gpu_dev for compatible gpus based
 210  * on the previously run compatibility tests. Sets
 211  * gpu_info->dev_compatible and gpu_info->n_dev_compatible.
 212  *
 213  * \param[in]     gpu_info    pointer to structure holding GPU information
 214  * \param[out]    gpu_opt     pointer to structure holding GPU options
 215  */
 216 static void pickCompatibleGpus(const gmx_gpu_info_t *gpu_info,
 217                                gmx_gpu_opt_t        *gpu_opt)
 218 {
 219     GMX_ASSERT(gpu_info, "Invalid gpu_info");
 220     GMX_ASSERT(gpu_opt, "Invalid gpu_opt");
 221
 222     // Possible minor over-allocation here, but not important for anything
 223     gpu_opt->n_dev_compatible = 0;
 224     snew(gpu_opt->dev_compatible, gpu_info->n_dev);
 225     for (int i = 0; i < gpu_info->n_dev; i++)
 226     {
 227         GMX_ASSERT(gpu_info->gpu_dev, "Invalid gpu_info->gpu_dev");
 228         if (isGpuCompatible(gpu_info, i))
 229         {
 230             gpu_opt->dev_compatible[gpu_opt->n_dev_compatible] = i;
 231             gpu_opt->n_dev_compatible++;
 232         }
 233     }
 234 }
 235
 236 void gmx_select_rank_gpu_ids(const gmx::MDLogger &mdlog, const t_commrec *cr,
 237                              const gmx_gpu_info_t *gpu_info,
 238                              gmx_bool bForceUseGPU,
 239                              gmx_gpu_opt_t *gpu_opt)
 240 {
 241     /* Bail if binary is not compiled with GPU acceleration, but this is either
 242      * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
 243     if (bForceUseGPU && (GMX_GPU == GMX_GPU_NONE))
 244     {
 245         gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!",
 246                   gmx::getProgramContext().displayName());
 247     }
 248
 249     if (!(cr->duty & DUTY_PP))
 250     {
 251         /* Our rank is not doing PP, we don't use a GPU */
 252         return;
 253     }
 254
 255     if (gpu_opt->bUserSet)
 256     {
 257         /* Check the GPU IDs passed by the user.
 258          * (GPU IDs have been parsed by gmx_parse_gpu_ids before)
 259          */
 260         std::string errorMessage;
 261         if (!checkGpuSelection(gpu_info, gpu_opt, &errorMessage))
 262         {
 263             const bool canHaveHeterogeneousNodes = GMX_LIB_MPI && PAR(cr);
 264             if (canHaveHeterogeneousNodes)
 265             {
 266                 print_gpu_detection_stats(mdlog, gpu_info);
 267             }
 268             gmx_fatal(FARGS, errorMessage.c_str());
 269         }
 270     }
 271     else if (getenv("GMX_EMULATE_GPU") == nullptr)
 272     {
 273         pickCompatibleGpus(gpu_info, gpu_opt);
 274         assign_rank_gpu_ids(gpu_opt, cr->nrank_pp_intranode, cr->rank_pp_intranode);
 275     }
 276
 277     /* If the user asked for a GPU, check whether we have a GPU */
 278     if (bForceUseGPU && gpu_info->n_dev_compatible == 0)
 279     {
 280         gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
 281     }
 282 }