src/gromacs/taskassignment/hardwareassign.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "hardwareassign.h"
  38
  39 #include "config.h"
  40
  41 #include <cstring>
  42
  43 #include <algorithm>
  44 #include <set>
  45 #include <string>
  46 #include <vector>
  47
  48 #include "gromacs/gmxlib/network.h"
  49 #include "gromacs/gpu_utils/gpu_utils.h"
  50 #include "gromacs/hardware/gpu_hw_info.h"
  51 #include "gromacs/hardware/hw_info.h"
  52 #include "gromacs/mdtypes/commrec.h"
  53 #include "gromacs/taskassignment/usergpuids.h"
  54 #include "gromacs/utility/cstringutil.h"
  55 #include "gromacs/utility/exceptions.h"
  56 #include "gromacs/utility/fatalerror.h"
  57 #include "gromacs/utility/gmxassert.h"
  58 #include "gromacs/utility/logger.h"
  59 #include "gromacs/utility/smalloc.h"
  60 #include "gromacs/utility/stringutil.h"
  61 #include "gromacs/utility/sysinfo.h"
  62
  63 #define HOSTNAMELEN 80
  64
  65 namespace gmx
  66 {
  67
  68 /*! \brief This function is responsible for the automated mapping the
  69  * GPUs to the processes on a single node.
  70  *
  71  * This selects the GPUs we will use. This is an operation local to each physical node.
  72  * If we have less MPI ranks than GPUs, we will waste some GPUs.
  73  *
  74  * \param[in]        compatibleGpus       Vector of GPUs that are compatible
  75  * \param[in]        nrank                Number of PP GPU ranks on the node.
  76  * \param[in]        rank                 Index of PP GPU rank on the node.
  77  *
  78  * \returns The assignment of GPU tasks on ranks of this node to GPU devices on this node.
  79  */
  80 static std::vector<int> assign_rank_gpu_ids(const std::vector<int> &compatibleGpus,
  81                                             int nrank, int rank)
  82 {
  83     int numCompatibleGpus = static_cast<int>(compatibleGpus.size());
  84     GMX_RELEASE_ASSERT(nrank >= 1,
  85                        gmx::formatString("Invalid limit (%d) for the number of GPUs (detected %d compatible GPUs)",
  86                                          rank, numCompatibleGpus).c_str());
  87
  88     if (numCompatibleGpus == 0)
  89     {
  90         char host[HOSTNAMELEN];
  91
  92         gmx_gethostname(host, HOSTNAMELEN);
  93         gmx_fatal(FARGS, "A GPU was requested on host %s, but no compatible GPUs were detected. All nodes with PP ranks need to have GPUs. If you intended to use GPU acceleration in a parallel run, you can either avoid using the nodes that don't have GPUs or place PME ranks on these nodes.", host);
  94     }
  95
  96     int nshare;
  97
  98     nshare = 1;
  99     if (nrank > numCompatibleGpus)
 100     {
 101         if (nrank % numCompatibleGpus == 0)
 102         {
 103             nshare = nrank/numCompatibleGpus;
 104         }
 105         else
 106         {
 107             if (rank == 0)
 108             {
 109                 gmx_fatal(FARGS, "The number of MPI ranks (%d) in a physical node is not a multiple of the number of GPUs (%d). Select a different number of MPI ranks or use the -gpu_id option to manually specify the GPU to be used.",
 110                           nrank, numCompatibleGpus);
 111             }
 112
 113 #if GMX_MPI
 114             /* We use a global barrier to prevent ranks from continuing with
 115              * an invalid setup.
 116              */
 117             MPI_Barrier(MPI_COMM_WORLD);
 118 #endif
 119         }
 120     }
 121
 122     /* Here we will waste GPUs when nrank < numCompatibleGpus */
 123     std::vector<int> taskAssignment;
 124     taskAssignment.resize(std::min(numCompatibleGpus*nshare, nrank));
 125     for (size_t i = 0; i != taskAssignment.size(); ++i)
 126     {
 127         /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
 128         taskAssignment[i] = compatibleGpus[i/nshare];
 129     }
 130     return taskAssignment;
 131 }
 132
 133 std::vector<int> mapPpRanksToGpus(bool                    rankCanUseGpu,
 134                                   const t_commrec        *cr,
 135                                   const gmx_gpu_info_t   &gpu_info,
 136                                   const std::vector<int> &compatibleGpus,
 137                                   const std::vector<int> &userGpuIds)
 138 {
 139     std::vector<int> taskAssignment;
 140
 141     if (!rankCanUseGpu)
 142     {
 143         return taskAssignment;
 144     }
 145
 146     if (!userGpuIds.empty())
 147     {
 148         checkUserGpuIds(gpu_info, compatibleGpus, userGpuIds);
 149         taskAssignment = userGpuIds;
 150     }
 151     else
 152     {
 153         taskAssignment = assign_rank_gpu_ids(compatibleGpus, cr->nrank_pp_intranode, cr->rank_pp_intranode);
 154     }
 155     return taskAssignment;
 156 }
 157
 158 } // namespace
 159
 160 /*! \brief Return the number of PP rank pairs that share a GPU device between them.
 161  *
 162  * Sharing GPUs among multiple PP ranks is possible via either user or
 163  * automated selection. */
 164 static int gmx_count_gpu_dev_shared(const std::vector<int> &gpuTaskAssignment,
 165                                     bool                    userSetGpuIds)
 166 {
 167     int      same_count    = 0;
 168
 169     if (userSetGpuIds)
 170     {
 171         GMX_RELEASE_ASSERT(!gpuTaskAssignment.empty(),
 172                            "The user cannot choose an empty set of GPU IDs, code is wrong somewhere");
 173         size_t ngpu = gpuTaskAssignment.size();
 174
 175         for (size_t i = 0; i < ngpu - 1; i++)
 176         {
 177             for (size_t j = i + 1; j < ngpu; j++)
 178             {
 179                 same_count      += (gpuTaskAssignment[i] ==
 180                                     gpuTaskAssignment[j]);
 181             }
 182         }
 183     }
 184
 185     return same_count;
 186 }
 187
 188 /* Count and return the number of unique GPUs (per node) selected.
 189  *
 190  * As sharing GPUs among multiple PP ranks is possible, the number of
 191  * GPUs used (per node) can be different from the number of GPU IDs
 192  * used.
 193  */
 194 static size_t gmx_count_gpu_dev_unique(const std::vector<int> &gpuTaskAssignment)
 195 {
 196     std::set<int> uniqIds;
 197     for (const auto &deviceId : gpuTaskAssignment)
 198     {
 199         uniqIds.insert(deviceId);
 200     }
 201     return uniqIds.size();
 202 }
 203
 204 void reportGpuUsage(const gmx::MDLogger    &mdlog,
 205                     const gmx_gpu_info_t   &gpu_info,
 206                     bool                    userSetGpuIds,
 207                     const std::vector<int> &gpuTaskAssignment,
 208                     size_t                  numPpRanks,
 209                     bool                    bPrintHostName)
 210 {
 211     if (gpuTaskAssignment.empty())
 212     {
 213         return;
 214     }
 215
 216     std::string output;
 217     {
 218         std::string gpuIdsString =
 219             formatAndJoin(gpuTaskAssignment, ",", gmx::StringFormatter("%d"));
 220         size_t      numGpusInUse = gmx_count_gpu_dev_unique(gpuTaskAssignment);
 221         bool        bPluralGpus  = numGpusInUse > 1;
 222
 223         if (bPrintHostName)
 224         {
 225             char host[STRLEN];
 226             gmx_gethostname(host, STRLEN);
 227             output += gmx::formatString("On host %s ", host);
 228         }
 229         output += gmx::formatString("%zu GPU%s %sselected for this run.\n"
 230                                     "Mapping of GPU ID%s to the %d PP rank%s in this node: %s\n",
 231                                     numGpusInUse, bPluralGpus ? "s" : "",
 232                                     userSetGpuIds ? "user-" : "auto-",
 233                                     bPluralGpus ? "s" : "",
 234                                     numPpRanks,
 235                                     (numPpRanks > 1) ? "s" : "",
 236                                     gpuIdsString.c_str());
 237     }
 238
 239     int same_count = gmx_count_gpu_dev_shared(gpuTaskAssignment, userSetGpuIds);
 240
 241     if (same_count > 0)
 242     {
 243         output += gmx::formatString("NOTE: You assigned %s to multiple ranks.\n",
 244                                     same_count > 1 ? "GPU IDs" : "a GPU ID");
 245     }
 246
 247     if (static_cast<size_t>(gpu_info.n_dev_compatible) > numPpRanks)
 248     {
 249         /* TODO In principle, this warning could be warranted only on
 250          * ranks on some nodes, but we lack the infrastructure to do a
 251          * good job of reporting that. */
 252         output += gmx::formatString("NOTE: potentially sub-optimal launch configuration using fewer\n"
 253                                     "      PP ranks on a node than GPUs available on that node.\n");
 254     }
 255
 256     /* NOTE: this print is only for and on one physical node */
 257     GMX_LOG(mdlog.warning).appendText(output);
 258 }