src/gromacs/hardware/hardwareassign.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "hardwareassign.h"
  38
  39 #include "config.h"
  40
  41 #include <cstring>
  42
  43 #include <algorithm>
  44 #include <string>
  45
  46 #include "gromacs/gmxlib/network.h"
  47 #include "gromacs/gpu_utils/gpu_utils.h"
  48 #include "gromacs/hardware/detecthardware.h"
  49 #include "gromacs/hardware/gpu_hw_info.h"
  50 #include "gromacs/mdtypes/commrec.h"
  51 #include "gromacs/utility/basenetwork.h"
  52 #include "gromacs/utility/exceptions.h"
  53 #include "gromacs/utility/fatalerror.h"
  54 #include "gromacs/utility/gmxassert.h"
  55 #include "gromacs/utility/logger.h"
  56 #include "gromacs/utility/smalloc.h"
  57 #include "gromacs/utility/stringutil.h"
  58 #include "gromacs/utility/sysinfo.h"
  59
  60 #define HOSTNAMELEN 80
  61
  62 /*! \internal \brief
  63  * Prints GPU information strings on this node into the stderr and log.
  64  * Only used for logging errors in heterogenous MPI configurations.
  65  */
  66 static void print_gpu_detection_stats(const gmx::MDLogger  &mdlog,
  67                                       const gmx_gpu_info_t *gpu_info)
  68 {
  69     char onhost[HOSTNAMELEN+10];
  70     int  ngpu;
  71
  72     if (!gpu_info->bDetectGPUs)
  73     {
  74         /* We skipped the detection, so don't print detection stats */
  75         return;
  76     }
  77
  78     ngpu = gpu_info->n_dev;
  79
  80     /* We only print the detection on one, of possibly multiple, nodes */
  81     std::strncpy(onhost, " on host ", 10);
  82     gmx_gethostname(onhost + 9, HOSTNAMELEN);
  83
  84     if (ngpu > 0)
  85     {
  86         std::string gpuDesc = sprint_gpus(gpu_info);
  87         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
  88                 "%d GPU%s detected%s:\n%s",
  89                 ngpu, (ngpu > 1) ? "s" : "", onhost, gpuDesc.c_str());
  90     }
  91     else
  92     {
  93         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted("No GPUs detected%s", onhost);
  94     }
  95     // FIXME: This currently only logs on the master rank, which defeats the purpose.
  96     // A new MDLogger option is required for printing to stderr on all ranks.
  97     // There is also a question of MPI reduction of the outputs, see Redmine issue #1505.
  98 }
  99
 100 /*! \internal \brief
 101  * This function is responsible for mapping the GPUs to the processes on a single node
 102  * (filling the gpu_opt->dev_use array).
 103  *
 104  * \param[in]        compatibleGpus       Vector of GPUs that are compatible
 105  * \param[in,out]    gpu_opt              Input/output GPU assignment data.
 106  * \param[in]        nrank                Number of PP GPU ranks on the node.
 107  * \param[in]        rank                 Index of PP GPU rank on the node.
 108  *
 109  * This selects the GPUs we will use. This is an operation local to each physical node.
 110  * If we have less MPI ranks than GPUs, we will waste some GPUs.
 111  */
 112 static void assign_rank_gpu_ids(const std::vector<int> &compatibleGpus,
 113                                 gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
 114 {
 115     int numCompatibleGpus = static_cast<int>(compatibleGpus.size());
 116     GMX_RELEASE_ASSERT(gpu_opt, "Invalid gpu_opt pointer passed");
 117     GMX_RELEASE_ASSERT(nrank >= 1,
 118                        gmx::formatString("Invalid limit (%d) for the number of GPUs (detected %d compatible GPUs)",
 119                                          rank, numCompatibleGpus).c_str());
 120
 121     if (numCompatibleGpus == 0)
 122     {
 123         char host[HOSTNAMELEN];
 124
 125         gmx_gethostname(host, HOSTNAMELEN);
 126         gmx_fatal(FARGS, "A GPU was requested on host %s, but no compatible GPUs were detected. All nodes with PP ranks need to have GPUs. If you intended to use GPU acceleration in a parallel run, you can either avoid using the nodes that don't have GPUs or place PME ranks on these nodes.", host);
 127     }
 128
 129     int nshare;
 130
 131     nshare = 1;
 132     if (nrank > numCompatibleGpus)
 133     {
 134         if (nrank % numCompatibleGpus == 0)
 135         {
 136             nshare = gmx_gpu_sharing_supported() ? nrank/numCompatibleGpus : 1;
 137         }
 138         else
 139         {
 140             if (rank == 0)
 141             {
 142                 gmx_fatal(FARGS, "The number of MPI ranks (%d) in a physical node is not a multiple of the number of GPUs (%d). Select a different number of MPI ranks or use the -gpu_id option to manually specify the GPU to be used.",
 143                           nrank, numCompatibleGpus);
 144             }
 145
 146 #if GMX_MPI
 147             /* We use a global barrier to prevent ranks from continuing with
 148              * an invalid setup.
 149              */
 150             MPI_Barrier(MPI_COMM_WORLD);
 151 #endif
 152         }
 153     }
 154
 155     /* Here we will waste GPUs when nrank < numCompatibleGpus */
 156     gpu_opt->n_dev_use = std::min(numCompatibleGpus*nshare, nrank);
 157     if (!gmx_multiple_gpu_per_node_supported())
 158     {
 159         gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_use, 1);
 160     }
 161     snew(gpu_opt->dev_use, gpu_opt->n_dev_use);
 162     for (int i = 0; i != gpu_opt->n_dev_use; ++i)
 163     {
 164         /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
 165         gpu_opt->dev_use[i] = compatibleGpus[i/nshare];
 166     }
 167 }
 168
 169 /*! \brief Return whether all selected GPUs are compatible.
 170  *
 171  * Given the list of selected GPU device IDs in \c gpu_opt and
 172  * detected GPUs in \c gpu_info, return whether all selected GPUs are
 173  * compatible. If not, place a suitable string in \c errorMessage.
 174  *
 175  * \param[in]   gpu_info      pointer to structure holding GPU information
 176  * \param[in]   gpu_opt       pointer to structure holding GPU options
 177  * \param[out]  errorMessage  pointer to string to hold a possible error message (is not updated when returning true)
 178  * \returns                   true if every requested GPU is compatible
 179  */
 180 static bool checkGpuSelection(const gmx_gpu_info_t *gpu_info,
 181                               const gmx_gpu_opt_t  *gpu_opt,
 182                               std::string          *errorMessage)
 183 {
 184     GMX_ASSERT(gpu_info, "Invalid gpu_info");
 185
 186     bool        allOK   = true;
 187     std::string message = "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n";
 188     for (int i = 0; i < gpu_opt->n_dev_use; i++)
 189     {
 190         GMX_ASSERT(gpu_opt, "Invalid gpu_opt");
 191         GMX_ASSERT(gpu_opt->dev_use, "Invalid gpu_opt->dev_use");
 192
 193         int id     = gpu_opt->dev_use[i];
 194         if (!isGpuCompatible(gpu_info, id))
 195         {
 196             allOK    = false;
 197             message += gmx::formatString("    GPU #%d: %s\n",
 198                                          id,
 199                                          getGpuCompatibilityDescription(gpu_info, id));
 200         }
 201     }
 202     if (!allOK && errorMessage)
 203     {
 204         *errorMessage = message;
 205     }
 206     return allOK;
 207 }
 208
 209 std::vector<int> getCompatibleGpus(const gmx_gpu_info_t *gpu_info)
 210 {
 211     GMX_ASSERT(gpu_info, "Invalid gpu_info");
 212
 213     // Possible minor over-allocation here, but not important for anything
 214     std::vector<int> compatibleGpus;
 215     compatibleGpus.reserve(gpu_info->n_dev);
 216     for (int i = 0; i < gpu_info->n_dev; i++)
 217     {
 218         if (isGpuCompatible(gpu_info, i))
 219         {
 220             compatibleGpus.push_back(i);
 221         }
 222     }
 223     return compatibleGpus;
 224 }
 225
 226 void gmx_select_rank_gpu_ids(const gmx::MDLogger &mdlog, const t_commrec *cr,
 227                              const gmx_gpu_info_t *gpu_info,
 228                              bool userSetGpuIds,
 229                              gmx_gpu_opt_t *gpu_opt)
 230 {
 231     if (!(cr->duty & DUTY_PP))
 232     {
 233         /* Our rank is not doing PP, we don't use a GPU */
 234         return;
 235     }
 236
 237     if (userSetGpuIds)
 238     {
 239         /* Check the GPU IDs passed by the user.
 240          * (GPU IDs have been parsed by gmx_parse_gpu_ids before)
 241          */
 242         std::string errorMessage;
 243         if (!checkGpuSelection(gpu_info, gpu_opt, &errorMessage))
 244         {
 245             const bool canHaveHeterogeneousNodes = GMX_LIB_MPI && PAR(cr);
 246             if (canHaveHeterogeneousNodes)
 247             {
 248                 print_gpu_detection_stats(mdlog, gpu_info);
 249             }
 250             gmx_fatal(FARGS, errorMessage.c_str());
 251         }
 252     }
 253     else
 254     {
 255         auto compatibleGpus = getCompatibleGpus(gpu_info);
 256         assign_rank_gpu_ids(compatibleGpus, gpu_opt, cr->nrank_pp_intranode, cr->rank_pp_intranode);
 257     }
 258 }