src/gromacs/hardware/printhardware.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "printhardware.h"
  38
  39 #include "config.h"
  40
  41 #include <cstdlib>
  42
  43 #include <algorithm>
  44 #include <string>
  45 #include <vector>
  46
  47 #include "gromacs/gpu_utils/gpu_utils.h"
  48 #include "gromacs/hardware/cpuinfo.h"
  49 #include "gromacs/hardware/gpu_hw_info.h"
  50 #include "gromacs/hardware/hardwaretopology.h"
  51 #include "gromacs/hardware/hw_info.h"
  52 #include "gromacs/mdtypes/commrec.h"
  53 #include "gromacs/simd/support.h"
  54 #include "gromacs/utility/basedefinitions.h"
  55 #include "gromacs/utility/basenetwork.h"
  56 #include "gromacs/utility/cstringutil.h"
  57 #include "gromacs/utility/fatalerror.h"
  58 #include "gromacs/utility/gmxassert.h"
  59 #include "gromacs/utility/logger.h"
  60 #include "gromacs/utility/programcontext.h"
  61 #include "gromacs/utility/stringutil.h"
  62 #include "gromacs/utility/sysinfo.h"
  63
  64 //! Constant used to help minimize preprocessed code
  65 static const bool bGPUBinary     = GMX_GPU != GMX_GPU_NONE;
  66
  67 /*! \brief Return the number of PP rank pairs that share a GPU device between them.
  68  *
  69  * Sharing GPUs among multiple PP ranks is possible via either user or
  70  * automated selection. */
  71 static int gmx_count_gpu_dev_shared(const std::vector<int> &gpuTaskAssignment,
  72                                     bool                    userSetGpuIds)
  73 {
  74     int      same_count    = 0;
  75
  76     if (userSetGpuIds)
  77     {
  78         GMX_RELEASE_ASSERT(!gpuTaskAssignment.empty(),
  79                            "The user cannot choose an empty set of GPU IDs, code is wrong somewhere");
  80         size_t ngpu = gpuTaskAssignment.size();
  81
  82         for (size_t i = 0; i < ngpu - 1; i++)
  83         {
  84             for (size_t j = i + 1; j < ngpu; j++)
  85             {
  86                 same_count      += (gpuTaskAssignment[i] ==
  87                                     gpuTaskAssignment[j]);
  88             }
  89         }
  90     }
  91
  92     return same_count;
  93 }
  94
  95 /* Count and return the number of unique GPUs (per node) selected.
  96  *
  97  * As sharing GPUs among multiple PP ranks is possible, the number of
  98  * GPUs used (per node) can be different from the number of GPU IDs
  99  * used.
 100  */
 101 static size_t gmx_count_gpu_dev_unique(const std::vector<int> &gpuTaskAssignment)
 102 {
 103     std::set<int> uniqIds;
 104     for (const auto &deviceId : gpuTaskAssignment)
 105     {
 106         uniqIds.insert(deviceId);
 107     }
 108     return uniqIds.size();
 109 }
 110
 111 /*! \internal \brief
 112  * Returns the GPU information text, one GPU per line.
 113  */
 114 static std::string sprint_gpus(const gmx_gpu_info_t &gpu_info)
 115 {
 116     char                     stmp[STRLEN];
 117     std::vector<std::string> gpuStrings;
 118     for (int i = 0; i < gpu_info.n_dev; i++)
 119     {
 120         get_gpu_device_info_string(stmp, gpu_info, i);
 121         gpuStrings.push_back(gmx::formatString("    %s", stmp));
 122     }
 123     return gmx::joinStrings(gpuStrings, "\n");
 124 }
 125
 126 void reportGpuUsage(const gmx::MDLogger    &mdlog,
 127                     const gmx_gpu_info_t   &gpu_info,
 128                     bool                    userSetGpuIds,
 129                     const std::vector<int> &gpuTaskAssignment,
 130                     size_t                  numPpRanks,
 131                     bool                    bPrintHostName)
 132 {
 133     if (gpuTaskAssignment.empty())
 134     {
 135         return;
 136     }
 137
 138     std::string output;
 139     {
 140         std::string gpuIdsString =
 141             formatAndJoin(gpuTaskAssignment, ",", gmx::StringFormatter("%d"));
 142         size_t      numGpusInUse = gmx_count_gpu_dev_unique(gpuTaskAssignment);
 143         bool        bPluralGpus  = numGpusInUse > 1;
 144
 145         if (bPrintHostName)
 146         {
 147             char host[STRLEN];
 148             gmx_gethostname(host, STRLEN);
 149             output += gmx::formatString("On host %s", host);
 150         }
 151         output += gmx::formatString("%zu GPU%s %sselected for this run.\n"
 152                                     "Mapping of GPU ID%s to the %d PP rank%s in this node: %s\n",
 153                                     numGpusInUse, bPluralGpus ? "s" : "",
 154                                     userSetGpuIds ? "user-" : "auto-",
 155                                     bPluralGpus ? "s" : "",
 156                                     numPpRanks,
 157                                     (numPpRanks > 1) ? "s" : "",
 158                                     gpuIdsString.c_str());
 159     }
 160
 161     int same_count = gmx_count_gpu_dev_shared(gpuTaskAssignment, userSetGpuIds);
 162
 163     if (same_count > 0)
 164     {
 165         output += gmx::formatString("NOTE: You assigned %s to multiple ranks.\n",
 166                                     same_count > 1 ? "GPU IDs" : "a GPU ID");
 167     }
 168
 169     if (static_cast<size_t>(gpu_info.n_dev_compatible) > numPpRanks)
 170     {
 171         /* TODO In principle, this warning could be warranted only on
 172          * ranks on some nodes, but we lack the infrastructure to do a
 173          * good job of reporting that. */
 174         output += gmx::formatString("NOTE: potentially sub-optimal launch configuration using fewer\n"
 175                                     "      PP ranks on a node than GPUs available on that node.\n");
 176     }
 177
 178     /* NOTE: this print is only for and on one physical node */
 179     GMX_LOG(mdlog.warning).appendText(output);
 180 }
 181
 182 /* Give a suitable fatal error or warning if the build configuration
 183    and runtime CPU do not match. */
 184 static void
 185 check_use_of_rdtscp_on_this_cpu(const gmx::MDLogger   &mdlog,
 186                                 const gmx::CpuInfo    &cpuInfo)
 187 {
 188     bool        binaryUsesRdtscp = HAVE_RDTSCP;
 189
 190     const char *programName = gmx::getProgramContext().displayName();
 191
 192     if (cpuInfo.supportLevel() < gmx::CpuInfo::SupportLevel::Features)
 193     {
 194         if (binaryUsesRdtscp)
 195         {
 196             GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 197                     "The %s executable was compiled to use the rdtscp CPU instruction. "
 198                     "We cannot detect the features of your current CPU, but will proceed anyway. "
 199                     "If you get a crash, rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
 200                     programName);
 201         }
 202     }
 203     else
 204     {
 205         bool cpuHasRdtscp = cpuInfo.feature(gmx::CpuInfo::Feature::X86_Rdtscp);
 206
 207         if (!cpuHasRdtscp && binaryUsesRdtscp)
 208         {
 209             gmx_fatal(FARGS, "The %s executable was compiled to use the rdtscp CPU instruction. "
 210                       "However, this is not supported by the current hardware and continuing would lead to a crash. "
 211                       "Please rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
 212                       programName);
 213         }
 214
 215         if (cpuHasRdtscp && !binaryUsesRdtscp)
 216         {
 217             GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 218                     "The current CPU can measure timings more accurately than the code in\n"
 219                     "%s was configured to use. This might affect your simulation\n"
 220                     "speed as accurate timings are needed for load-balancing.\n"
 221                     "Please consider rebuilding %s with the GMX_USE_RDTSCP=ON CMake option.",
 222                     programName, programName);
 223         }
 224     }
 225 }
 226
 227 static std::string detected_hardware_string(const gmx_hw_info_t *hwinfo,
 228                                             bool                 bFullCpuInfo)
 229 {
 230     std::string                  s;
 231
 232     const gmx::CpuInfo          &cpuInfo = *hwinfo->cpuInfo;
 233     const gmx::HardwareTopology &hwTop   = *hwinfo->hardwareTopology;
 234
 235     s  = gmx::formatString("\n");
 236     s += gmx::formatString("Running on %d node%s with total",
 237                            hwinfo->nphysicalnode,
 238                            hwinfo->nphysicalnode == 1 ? "" : "s");
 239     if (hwinfo->ncore_tot > 0)
 240     {
 241         s += gmx::formatString(" %d cores,", hwinfo->ncore_tot);
 242     }
 243     s += gmx::formatString(" %d logical cores", hwinfo->nhwthread_tot);
 244     if (hwinfo->gpu_info.bDetectGPUs)
 245     {
 246         s += gmx::formatString(", %d compatible GPU%s",
 247                                hwinfo->ngpu_compatible_tot,
 248                                hwinfo->ngpu_compatible_tot == 1 ? "" : "s");
 249     }
 250     else if (bGPUBinary)
 251     {
 252         s += gmx::formatString(" (GPU detection deactivated)");
 253     }
 254     s += gmx::formatString("\n");
 255
 256     if (hwinfo->nphysicalnode > 1)
 257     {
 258         /* Print per node hardware feature counts */
 259         if (hwinfo->ncore_max > 0)
 260         {
 261             s += gmx::formatString("  Cores per node:           %2d", hwinfo->ncore_min);
 262             if (hwinfo->ncore_max > hwinfo->ncore_min)
 263             {
 264                 s += gmx::formatString(" - %2d", hwinfo->ncore_max);
 265             }
 266             s += gmx::formatString("\n");
 267         }
 268         s += gmx::formatString("  Logical cores per node:   %2d", hwinfo->nhwthread_min);
 269         if (hwinfo->nhwthread_max > hwinfo->nhwthread_min)
 270         {
 271             s += gmx::formatString(" - %2d", hwinfo->nhwthread_max);
 272         }
 273         s += gmx::formatString("\n");
 274         if (bGPUBinary)
 275         {
 276             s += gmx::formatString("  Compatible GPUs per node: %2d",
 277                                    hwinfo->ngpu_compatible_min);
 278             if (hwinfo->ngpu_compatible_max > hwinfo->ngpu_compatible_min)
 279             {
 280                 s += gmx::formatString(" - %2d", hwinfo->ngpu_compatible_max);
 281             }
 282             s += gmx::formatString("\n");
 283             if (hwinfo->ngpu_compatible_tot > 0)
 284             {
 285                 if (hwinfo->bIdenticalGPUs)
 286                 {
 287                     s += gmx::formatString("  All nodes have identical type(s) of GPUs\n");
 288                 }
 289                 else
 290                 {
 291                     /* This message will also appear with identical GPU types
 292                      * when at least one node has no GPU.
 293                      */
 294                     s += gmx::formatString("  Different nodes have different type(s) and/or order of GPUs\n");
 295                 }
 296             }
 297         }
 298     }
 299
 300 #if GMX_LIB_MPI
 301     int  rank;
 302     char host[STRLEN];
 303
 304     gmx_gethostname(host, STRLEN);
 305
 306     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 307
 308     // TODO Use a wrapper around MPI_Get_processor_name instead.
 309     s += gmx::formatString("Hardware detected on host %s (the node of MPI rank %d):\n",
 310                            host, rank);
 311 #else
 312     s += gmx::formatString("Hardware detected:\n");
 313 #endif
 314     s += gmx::formatString("  CPU info:\n");
 315
 316     s += gmx::formatString("    Vendor: %s\n", cpuInfo.vendorString().c_str());
 317
 318     s += gmx::formatString("    Brand:  %s\n", cpuInfo.brandString().c_str());
 319
 320     if (bFullCpuInfo)
 321     {
 322         s += gmx::formatString("    Family: %d   Model: %d   Stepping: %d\n",
 323                                cpuInfo.family(), cpuInfo.model(), cpuInfo.stepping());
 324
 325         s += gmx::formatString("    Features:");
 326         for (auto &f : cpuInfo.featureSet())
 327         {
 328             s += gmx::formatString(" %s", cpuInfo.featureString(f).c_str());;
 329         }
 330         s += gmx::formatString("\n");
 331     }
 332
 333     s += gmx::formatString("    SIMD instructions most likely to fit this hardware: %s",
 334                            gmx::simdString(static_cast<gmx::SimdType>(hwinfo->simd_suggest_min)).c_str());
 335
 336     if (hwinfo->simd_suggest_max > hwinfo->simd_suggest_min)
 337     {
 338         s += gmx::formatString(" - %s", gmx::simdString(static_cast<gmx::SimdType>(hwinfo->simd_suggest_max)).c_str());
 339     }
 340     s += gmx::formatString("\n");
 341
 342     s += gmx::formatString("    SIMD instructions selected at GROMACS compile time: %s\n",
 343                            gmx::simdString(gmx::simdCompiled()).c_str());
 344
 345     s += gmx::formatString("\n");
 346
 347     s += gmx::formatString("  Hardware topology: ");
 348     switch (hwTop.supportLevel())
 349     {
 350         case gmx::HardwareTopology::SupportLevel::None:
 351             s += gmx::formatString("None\n");
 352             break;
 353         case gmx::HardwareTopology::SupportLevel::LogicalProcessorCount:
 354             s += gmx::formatString("Only logical processor count\n");
 355             break;
 356         case gmx::HardwareTopology::SupportLevel::Basic:
 357             s += gmx::formatString("Basic\n");
 358             break;
 359         case gmx::HardwareTopology::SupportLevel::Full:
 360             s += gmx::formatString("Full\n");
 361             break;
 362         case gmx::HardwareTopology::SupportLevel::FullWithDevices:
 363             s += gmx::formatString("Full, with devices\n");
 364             break;
 365     }
 366
 367     if (!hwTop.isThisSystem())
 368     {
 369         s += gmx::formatString("  NOTE: Hardware topology cached or synthetic, not detected.\n");
 370         if (char *p = std::getenv("HWLOC_XMLFILE"))
 371         {
 372             s += gmx::formatString("        HWLOC_XMLFILE=%s\n", p);
 373         }
 374     }
 375
 376     if (bFullCpuInfo)
 377     {
 378         if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic)
 379         {
 380             s += gmx::formatString("    Sockets, cores, and logical processors:\n");
 381
 382             for (auto &socket : hwTop.machine().sockets)
 383             {
 384                 s += gmx::formatString("      Socket %2d:", socket.id);
 385                 for (auto &c : socket.cores)
 386                 {
 387                     s += gmx::formatString(" [");
 388                     for (auto &t : c.hwThreads)
 389                     {
 390                         s += gmx::formatString(" %3d", t.logicalProcessorId);
 391                     }
 392                     s += gmx::formatString("]");
 393                 }
 394                 s += gmx::formatString("\n");
 395             }
 396         }
 397         if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Full)
 398         {
 399             s += gmx::formatString("    Numa nodes:\n");
 400             for (auto &n : hwTop.machine().numa.nodes)
 401             {
 402                 s += gmx::formatString("      Node %2d (%" GMX_PRIu64 " bytes mem):", n.id, n.memory);
 403                 for (auto &l : n.logicalProcessorId)
 404                 {
 405                     s += gmx::formatString(" %3d", l);
 406                 }
 407                 s += gmx::formatString("\n");
 408             }
 409             s += gmx::formatString("      Latency:\n          ");
 410             for (std::size_t j = 0; j < hwTop.machine().numa.nodes.size(); j++)
 411             {
 412                 s += gmx::formatString(" %5d", j);
 413             }
 414             s += gmx::formatString("\n");
 415             for (std::size_t i = 0; i < hwTop.machine().numa.nodes.size(); i++)
 416             {
 417                 s += gmx::formatString("     %5d", i);
 418                 for (std::size_t j = 0; j < hwTop.machine().numa.nodes.size(); j++)
 419                 {
 420                     s += gmx::formatString(" %5.2f", hwTop.machine().numa.relativeLatency[i][j]);
 421                 }
 422                 s += gmx::formatString("\n");
 423             }
 424
 425
 426             s += gmx::formatString("    Caches:\n");
 427             for (auto &c : hwTop.machine().caches)
 428             {
 429                 s += gmx::formatString("      L%d: %" GMX_PRIu64 " bytes, linesize %d bytes, assoc. %d, shared %d ways\n",
 430                                        c.level, c.size, c.linesize, c.associativity, c.shared);
 431             }
 432         }
 433         if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::FullWithDevices)
 434         {
 435             s += gmx::formatString("    PCI devices:\n");
 436             for (auto &d : hwTop.machine().devices)
 437             {
 438                 s += gmx::formatString("      %04x:%02x:%02x.%1x  Id: %04x:%04x  Class: 0x%04x  Numa: %d\n",
 439                                        d.domain, d.bus, d.dev, d.func, d.vendorId, d.deviceId, d.classId, d.numaNodeId);
 440             }
 441         }
 442     }
 443
 444     if (bGPUBinary && (hwinfo->ngpu_compatible_tot > 0 ||
 445                        hwinfo->gpu_info.n_dev > 0))
 446     {
 447         s += gmx::formatString("  GPU info:\n");
 448         s += gmx::formatString("    Number of GPUs detected: %d\n",
 449                                hwinfo->gpu_info.n_dev);
 450         if (hwinfo->gpu_info.n_dev > 0)
 451         {
 452             s += sprint_gpus(hwinfo->gpu_info) + "\n";
 453         }
 454     }
 455     return s;
 456 }
 457
 458 void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr,
 459                                  const gmx::MDLogger &mdlog,
 460                                  const gmx_hw_info_t *hwinfo)
 461 {
 462     const gmx::CpuInfo &cpuInfo = *hwinfo->cpuInfo;
 463
 464     if (fplog != nullptr)
 465     {
 466         std::string detected;
 467
 468         detected = detected_hardware_string(hwinfo, TRUE);
 469
 470         fprintf(fplog, "%s\n", detected.c_str());
 471     }
 472
 473     if (MULTIMASTER(cr))
 474     {
 475         std::string detected;
 476
 477         detected = detected_hardware_string(hwinfo, FALSE);
 478
 479         fprintf(stderr, "%s\n", detected.c_str());
 480     }
 481
 482     /* Check the compiled SIMD instruction set against that of the node
 483      * with the lowest SIMD level support (skip if SIMD detection did not work)
 484      */
 485     if (cpuInfo.supportLevel() >= gmx::CpuInfo::SupportLevel::Features)
 486     {
 487         gmx::simdCheck(static_cast<gmx::SimdType>(hwinfo->simd_suggest_min), fplog, MULTIMASTER(cr));
 488     }
 489
 490     /* For RDTSCP we only check on our local node and skip the MPI reduction */
 491     check_use_of_rdtscp_on_this_cpu(mdlog, cpuInfo);
 492 }