src/gromacs/gmxlib/gmx_detect_hardware.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "gmx_detect_hardware.h"
  38
  39 #include "config.h"
  40
  41 #include <cerrno>
  42 #include <cstdlib>
  43 #include <cstring>
  44
  45 #include <algorithm>
  46 #include <string>
  47 #include <vector>
  48
  49 #ifdef HAVE_UNISTD_H
  50 /* For sysconf */
  51 #include <unistd.h>
  52 #endif
  53 #ifdef GMX_NATIVE_WINDOWS
  54 #include <windows.h>
  55 #endif
  56
  57 #include "thread_mpi/threads.h"
  58
  59 #include "gromacs/gmxlib/md_logging.h"
  60 #include "gromacs/gmxlib/network.h"
  61 #include "gromacs/gpu_utils/gpu_utils.h"
  62 #include "gromacs/hardware/cpuinfo.h"
  63 #include "gromacs/hardware/gpu_hw_info.h"
  64 #include "gromacs/hardware/hardwaretopology.h"
  65 #include "gromacs/hardware/hw_info.h"
  66 #include "gromacs/mdtypes/commrec.h"
  67 #include "gromacs/mdtypes/md_enums.h"
  68 #include "gromacs/simd/support.h"
  69 #include "gromacs/utility/arrayref.h"
  70 #include "gromacs/utility/basedefinitions.h"
  71 #include "gromacs/utility/basenetwork.h"
  72 #include "gromacs/utility/baseversion.h"
  73 #include "gromacs/utility/cstringutil.h"
  74 #include "gromacs/utility/exceptions.h"
  75 #include "gromacs/utility/fatalerror.h"
  76 #include "gromacs/utility/gmxassert.h"
  77 #include "gromacs/utility/gmxomp.h"
  78 #include "gromacs/utility/programcontext.h"
  79 #include "gromacs/utility/smalloc.h"
  80 #include "gromacs/utility/stringutil.h"
  81 #include "gromacs/utility/sysinfo.h"
  82
  83 static const bool bGPUBinary = GMX_GPU != GMX_GPU_NONE;
  84
  85 /* Note that some of the following arrays must match the "GPU support
  86  * enumeration" in src/config.h.cmakein, so that GMX_GPU looks up an
  87  * array entry. */
  88
  89 /* CUDA supports everything. Our current OpenCL implementation only
  90  * supports using exactly one GPU per PP rank, so sharing is
  91  * impossible */
  92 static const bool gpuSharingSupport[] = { false, true, false };
  93 static const bool bGpuSharingSupported = gpuSharingSupport[GMX_GPU];
  94
  95 /* CUDA supports everything. Our current OpenCL implementation seems
  96  * to handle concurrency correctly with thread-MPI. The AMD OpenCL
  97  * runtime does not seem to support creating a context from more than
  98  * one real MPI rank on the same node (it segfaults when you try).
  99  */
 100 static const bool multiGpuSupport[] = {
 101     false, true,
 102 #ifdef GMX_THREAD_MPI
 103     true,
 104 #else
 105     false, /* Real MPI and no MPI */
 106 #endif
 107 };
 108 static const bool bMultiGpuPerNodeSupported = multiGpuSupport[GMX_GPU];
 109
 110 /* Names of the GPU detection/check results (see e_gpu_detect_res_t in hw_info.h). */
 111 const char * const gpu_detect_res_str[egpuNR] =
 112 {
 113     "compatible", "inexistent", "incompatible", "insane"
 114 };
 115
 116 static const char * invalid_gpuid_hint =
 117     "A delimiter-free sequence of valid numeric IDs of available GPUs is expected.";
 118
 119 /* The globally shared hwinfo structure. */
 120 static gmx_hw_info_t      *hwinfo_g;
 121 /* A reference counter for the hwinfo structure */
 122 static int                 n_hwinfo = 0;
 123 /* A lock to protect the hwinfo structure */
 124 static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER;
 125
 126 #define HOSTNAMELEN 80
 127
 128 /* FW decl. */
 129 static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank);
 130 static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
 131                                     const gmx_gpu_opt_t  *gpu_opt);
 132
 133 gmx_bool gmx_multiple_gpu_per_node_supported()
 134 {
 135     return bMultiGpuPerNodeSupported;
 136 }
 137
 138 gmx_bool gmx_gpu_sharing_supported()
 139 {
 140     return bGpuSharingSupported;
 141 }
 142
 143 static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info)
 144 {
 145     int      i, ndev;
 146     char     stmp[STRLEN];
 147
 148     ndev = gpu_info->n_dev;
 149
 150     sbuf[0] = '\0';
 151     for (i = 0; i < ndev; i++)
 152     {
 153         get_gpu_device_info_string(stmp, gpu_info, i);
 154         strcat(sbuf, "    ");
 155         strcat(sbuf, stmp);
 156         if (i < ndev - 1)
 157         {
 158             strcat(sbuf, "\n");
 159         }
 160     }
 161 }
 162
 163 static void print_gpu_detection_stats(FILE                 *fplog,
 164                                       const gmx_gpu_info_t *gpu_info,
 165                                       const t_commrec      *cr)
 166 {
 167     char onhost[HOSTNAMELEN+10], stmp[STRLEN];
 168     int  ngpu;
 169
 170     if (!gpu_info->bDetectGPUs)
 171     {
 172         /* We skipped the detection, so don't print detection stats */
 173         return;
 174     }
 175
 176     ngpu = gpu_info->n_dev;
 177
 178 #if defined GMX_MPI && !defined GMX_THREAD_MPI
 179     /* We only print the detection on one, of possibly multiple, nodes */
 180     std::strncpy(onhost, " on host ", 10);
 181     gmx_gethostname(onhost + 9, HOSTNAMELEN);
 182 #else
 183     /* We detect all relevant GPUs */
 184     std::strncpy(onhost, "", 1);
 185 #endif
 186
 187     if (ngpu > 0)
 188     {
 189         sprint_gpus(stmp, gpu_info);
 190         md_print_warn(cr, fplog, "%d GPU%s detected%s:\n%s\n",
 191                       ngpu, (ngpu > 1) ? "s" : "", onhost, stmp);
 192     }
 193     else
 194     {
 195         md_print_warn(cr, fplog, "No GPUs detected%s\n", onhost);
 196     }
 197 }
 198
 199 /*! \brief Helper function for reporting GPU usage information
 200  * in the mdrun log file
 201  *
 202  * \param[in] gpu_info       Pointer to per-node GPU info struct
 203  * \param[in] gpu_opt        Pointer to per-node GPU options struct
 204  * \param[in] numPpRanks     Number of PP ranks per node
 205  * \param[in] bPrintHostName Print the hostname in the usage information
 206  * \return                   String to write to the log file
 207  * \throws                   std::bad_alloc if out of memory */
 208 static std::string
 209 makeGpuUsageReport(const gmx_gpu_info_t *gpu_info,
 210                    const gmx_gpu_opt_t  *gpu_opt,
 211                    size_t                numPpRanks,
 212                    bool                  bPrintHostName)
 213 {
 214     int  ngpu_use  = gpu_opt->n_dev_use;
 215     int  ngpu_comp = gpu_info->n_dev_compatible;
 216     char host[HOSTNAMELEN];
 217
 218     if (bPrintHostName)
 219     {
 220         gmx_gethostname(host, HOSTNAMELEN);
 221     }
 222
 223     /* Issue a note if GPUs are available but not used */
 224     if (ngpu_comp > 0 && ngpu_use < 1)
 225     {
 226         return gmx::formatString("%d compatible GPU%s detected in the system, but none will be used.\n"
 227                                  "Consider trying GPU acceleration with the Verlet scheme!\n",
 228                                  ngpu_comp, (ngpu_comp > 1) ? "s" : "");
 229     }
 230
 231     std::string output;
 232     if (!gpu_opt->bUserSet)
 233     {
 234         // gpu_opt->dev_compatible is only populated during auto-selection
 235         std::string gpuIdsString =
 236             formatAndJoin(gmx::constArrayRefFromArray(gpu_opt->dev_compatible,
 237                                                       gpu_opt->n_dev_compatible),
 238                           ",", gmx::StringFormatter("%d"));
 239         bool bPluralGpus = gpu_opt->n_dev_compatible > 1;
 240
 241         if (bPrintHostName)
 242         {
 243             output += gmx::formatString("On host %s ", host);
 244         }
 245         output += gmx::formatString("%d compatible GPU%s %s present, with ID%s %s\n",
 246                                     gpu_opt->n_dev_compatible,
 247                                     bPluralGpus ? "s" : "",
 248                                     bPluralGpus ? "are" : "is",
 249                                     bPluralGpus ? "s" : "",
 250                                     gpuIdsString.c_str());
 251     }
 252
 253     {
 254         std::vector<int> gpuIdsInUse;
 255         for (int i = 0; i < ngpu_use; i++)
 256         {
 257             gpuIdsInUse.push_back(get_gpu_device_id(gpu_info, gpu_opt, i));
 258         }
 259         std::string gpuIdsString =
 260             formatAndJoin(gpuIdsInUse, ",", gmx::StringFormatter("%d"));
 261         int         numGpusInUse = gmx_count_gpu_dev_unique(gpu_info, gpu_opt);
 262         bool        bPluralGpus  = numGpusInUse > 1;
 263
 264         if (bPrintHostName)
 265         {
 266             output += gmx::formatString("On host %s ", host);
 267         }
 268         output += gmx::formatString("%d GPU%s %sselected for this run.\n"
 269                                     "Mapping of GPU ID%s to the %d PP rank%s in this node: %s\n",
 270                                     numGpusInUse, bPluralGpus ? "s" : "",
 271                                     gpu_opt->bUserSet ? "user-" : "auto-",
 272                                     bPluralGpus ? "s" : "",
 273                                     numPpRanks,
 274                                     (numPpRanks > 1) ? "s" : "",
 275                                     gpuIdsString.c_str());
 276     }
 277
 278     return output;
 279 }
 280
 281 /* Give a suitable fatal error or warning if the build configuration
 282    and runtime CPU do not match. */
 283 static void
 284 check_use_of_rdtscp_on_this_cpu(FILE                  *fplog,
 285                                 const t_commrec       *cr,
 286                                 const gmx::CpuInfo    &cpuInfo)
 287 {
 288 #ifdef HAVE_RDTSCP
 289     bool binaryUsesRdtscp = TRUE;
 290 #else
 291     bool binaryUsesRdtscp = FALSE;
 292 #endif
 293
 294     const char *programName = gmx::getProgramContext().displayName();
 295
 296     if (cpuInfo.supportLevel() < gmx::CpuInfo::SupportLevel::Features)
 297     {
 298         if (binaryUsesRdtscp)
 299         {
 300             md_print_warn(cr, fplog, "The %s executable was compiled to use the rdtscp CPU instruction. "
 301                           "We cannot detect the features of your current CPU, but will proceed anyway. "
 302                           "If you get a crash, rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
 303                           programName);
 304         }
 305     }
 306     else
 307     {
 308         bool cpuHasRdtscp = cpuInfo.feature(gmx::CpuInfo::Feature::X86_Rdtscp);
 309
 310         if (!cpuHasRdtscp && binaryUsesRdtscp)
 311         {
 312             gmx_fatal(FARGS, "The %s executable was compiled to use the rdtscp CPU instruction. "
 313                       "However, this is not supported by the current hardware and continuing would lead to a crash. "
 314                       "Please rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
 315                       programName);
 316         }
 317
 318         if (cpuHasRdtscp && !binaryUsesRdtscp)
 319         {
 320             md_print_warn(cr, fplog, "The current CPU can measure timings more accurately than the code in\n"
 321                           "%s was configured to use. This might affect your simulation\n"
 322                           "speed as accurate timings are needed for load-balancing.\n"
 323                           "Please consider rebuilding %s with the GMX_USE_RDTSCP=ON CMake option.\n",
 324                           programName, programName);
 325         }
 326     }
 327 }
 328
 329 void gmx_check_hw_runconf_consistency(FILE                *fplog,
 330                                       const gmx_hw_info_t *hwinfo,
 331                                       const t_commrec     *cr,
 332                                       const gmx_hw_opt_t  *hw_opt,
 333                                       gmx_bool             bUseGPU)
 334 {
 335     int      npppn;
 336     char     th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
 337     gmx_bool btMPI, bMPI, bNthreadsAuto, bEmulateGPU;
 338
 339     GMX_RELEASE_ASSERT(hwinfo, "hwinfo must be a non-NULL pointer");
 340     GMX_RELEASE_ASSERT(cr, "cr must be a non-NULL pointer");
 341
 342     /* Below we only do consistency checks for PP and GPUs,
 343      * this is irrelevant for PME only nodes, so in that case we return
 344      * here.
 345      */
 346     if (!(cr->duty & DUTY_PP))
 347     {
 348         return;
 349     }
 350
 351 #if defined(GMX_THREAD_MPI)
 352     bMPI          = FALSE;
 353     btMPI         = TRUE;
 354     bNthreadsAuto = (hw_opt->nthreads_tmpi < 1);
 355 #elif defined(GMX_LIB_MPI)
 356     bMPI          = TRUE;
 357     btMPI         = FALSE;
 358     bNthreadsAuto = FALSE;
 359 #else
 360     bMPI          = FALSE;
 361     btMPI         = FALSE;
 362     bNthreadsAuto = FALSE;
 363 #endif
 364
 365     /* GPU emulation detection is done later, but we need here as well
 366      * -- uncool, but there's no elegant workaround */
 367     bEmulateGPU       = (getenv("GMX_EMULATE_GPU") != NULL);
 368
 369     if (hwinfo->gpu_info.n_dev_compatible > 0)
 370     {
 371         std::string gpuUseageReport;
 372         try
 373         {
 374             gpuUseageReport = makeGpuUsageReport(&hwinfo->gpu_info,
 375                                                  &hw_opt->gpu_opt,
 376                                                  cr->nrank_pp_intranode,
 377                                                  bMPI && cr->nnodes > 1);
 378         }
 379         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 380
 381         /* NOTE: this print is only for and on one physical node */
 382         md_print_info(cr, fplog, "%s\n", gpuUseageReport.c_str());
 383     }
 384
 385     /* Need to ensure that we have enough GPUs:
 386      * - need one GPU per PP node
 387      * - no GPU oversubscription with tMPI
 388      * */
 389     /* number of PP processes per node */
 390     npppn = cr->nrank_pp_intranode;
 391
 392     pernode[0]           = '\0';
 393     th_or_proc_plural[0] = '\0';
 394     if (btMPI)
 395     {
 396         sprintf(th_or_proc, "thread-MPI thread");
 397         if (npppn > 1)
 398         {
 399             sprintf(th_or_proc_plural, "s");
 400         }
 401     }
 402     else if (bMPI)
 403     {
 404         sprintf(th_or_proc, "MPI process");
 405         if (npppn > 1)
 406         {
 407             sprintf(th_or_proc_plural, "es");
 408         }
 409         sprintf(pernode, " per node");
 410     }
 411     else
 412     {
 413         /* neither MPI nor tMPI */
 414         sprintf(th_or_proc, "process");
 415     }
 416
 417     if (bUseGPU && hwinfo->gpu_info.n_dev_compatible > 0 &&
 418         !bEmulateGPU)
 419     {
 420         int  ngpu_comp, ngpu_use;
 421         char gpu_comp_plural[2], gpu_use_plural[2];
 422
 423         ngpu_comp = hwinfo->gpu_info.n_dev_compatible;
 424         ngpu_use  = hw_opt->gpu_opt.n_dev_use;
 425
 426         sprintf(gpu_comp_plural, "%s", (ngpu_comp > 1) ? "s" : "");
 427         sprintf(gpu_use_plural,  "%s", (ngpu_use > 1) ? "s" : "");
 428
 429         const char *programName = gmx::getProgramContext().displayName();
 430
 431         /* number of tMPI threads auto-adjusted */
 432         if (btMPI && bNthreadsAuto)
 433         {
 434             if (hw_opt->gpu_opt.bUserSet && npppn < ngpu_use)
 435             {
 436                 /* The user manually provided more GPUs than threads we
 437                    could automatically start. */
 438                 gmx_fatal(FARGS,
 439                           "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
 440                           "%s requires one PP tread-MPI thread per GPU; use fewer GPUs.",
 441                           ngpu_use, gpu_use_plural,
 442                           npppn, th_or_proc_plural,
 443                           programName);
 444             }
 445
 446             if (!hw_opt->gpu_opt.bUserSet && npppn < ngpu_comp)
 447             {
 448                 /* There are more GPUs than tMPI threads; we have
 449                    limited the number GPUs used. */
 450                 md_print_warn(cr, fplog,
 451                               "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
 452                               "      %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.\n",
 453                               ngpu_comp, gpu_comp_plural,
 454                               npppn, th_or_proc_plural,
 455                               programName, npppn,
 456                               npppn > 1 ? "s" : "");
 457             }
 458         }
 459
 460         if (hw_opt->gpu_opt.bUserSet)
 461         {
 462             if (ngpu_use != npppn)
 463             {
 464                 gmx_fatal(FARGS,
 465                           "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
 466                           "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
 467                           th_or_proc, btMPI ? "s" : "es", pernode,
 468                           programName, npppn, th_or_proc,
 469                           th_or_proc_plural, pernode,
 470                           ngpu_use, gpu_use_plural);
 471             }
 472         }
 473         else
 474         {
 475             /* TODO Should we have a gpu_opt->n_dev_supported field? */
 476             if (ngpu_comp > npppn && gmx_multiple_gpu_per_node_supported())
 477             {
 478                 md_print_warn(cr, fplog,
 479                               "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
 480                               "      PP %s%s%s than GPU%s available.\n"
 481                               "      Each PP %s can use only one GPU, %d GPU%s%s will be used.\n",
 482                               programName, th_or_proc,
 483                               th_or_proc_plural, pernode, gpu_comp_plural,
 484                               th_or_proc, npppn, gpu_use_plural, pernode);
 485             }
 486
 487             if (ngpu_use != npppn)
 488             {
 489                 /* Avoid duplicate error messages.
 490                  * Unfortunately we can only do this at the physical node
 491                  * level, since the hardware setup and MPI process count
 492                  * might differ between physical nodes.
 493                  */
 494                 if (cr->rank_pp_intranode == 0)
 495                 {
 496                     std::string reasonForLimit;
 497                     if (ngpu_comp > 1 &&
 498                         ngpu_use == 1 &&
 499                         !gmx_multiple_gpu_per_node_supported())
 500                     {
 501                         reasonForLimit  = "can be used by ";
 502                         reasonForLimit += getGpuImplementationString();
 503                         reasonForLimit += " in GROMACS";
 504                     }
 505                     else
 506                     {
 507                         reasonForLimit = "was detected";
 508                     }
 509                     gmx_fatal(FARGS,
 510                               "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
 511                               "%s was started with %d PP %s%s%s, but only %d GPU%s %s.",
 512                               th_or_proc, btMPI ? "s" : "es", pernode,
 513                               programName, npppn, th_or_proc,
 514                               th_or_proc_plural, pernode,
 515                               ngpu_use, gpu_use_plural, reasonForLimit.c_str());
 516                 }
 517             }
 518         }
 519
 520         {
 521             int      same_count;
 522
 523             same_count = gmx_count_gpu_dev_shared(&hw_opt->gpu_opt);
 524
 525             if (same_count > 0)
 526             {
 527                 md_print_info(cr, fplog,
 528                               "NOTE: You assigned %s to multiple %s%s.\n",
 529                               same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
 530             }
 531         }
 532     }
 533
 534 #ifdef GMX_MPI
 535     if (PAR(cr))
 536     {
 537         /* Avoid other ranks to continue after
 538            inconsistency */
 539         MPI_Barrier(cr->mpi_comm_mygroup);
 540     }
 541 #endif
 542
 543 }
 544
 545 /* Return 0 if none of the GPU (per node) are shared among PP ranks.
 546  *
 547  * Sharing GPUs among multiple PP ranks is possible when the user passes
 548  * GPU IDs. Here we check for sharing and return a non-zero value when
 549  * this is detected. Note that the return value represents the number of
 550  * PP rank pairs that share a device.
 551  */
 552 int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt)
 553 {
 554     int      same_count    = 0;
 555     int      ngpu          = gpu_opt->n_dev_use;
 556
 557     if (gpu_opt->bUserSet)
 558     {
 559         int      i, j;
 560
 561         for (i = 0; i < ngpu - 1; i++)
 562         {
 563             for (j = i + 1; j < ngpu; j++)
 564             {
 565                 same_count      += (gpu_opt->dev_use[i] ==
 566                                     gpu_opt->dev_use[j]);
 567             }
 568         }
 569     }
 570
 571     return same_count;
 572 }
 573
 574 /* Count and return the number of unique GPUs (per node) selected.
 575  *
 576  * As sharing GPUs among multiple PP ranks is possible when the user passes
 577  * GPU IDs, the number of GPUs user (per node) can be different from the
 578  * number of GPU IDs selected.
 579  */
 580 static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
 581                                     const gmx_gpu_opt_t  *gpu_opt)
 582 {
 583     int  i, uniq_count, ngpu;
 584     int *uniq_ids;
 585
 586     GMX_RELEASE_ASSERT(gpu_info, "gpu_info must be a non-NULL pointer");
 587     GMX_RELEASE_ASSERT(gpu_opt, "gpu_opt must be a non-NULL pointer");
 588
 589     ngpu = gpu_info->n_dev;
 590
 591     uniq_count  = 0;
 592
 593     snew(uniq_ids, ngpu);
 594
 595     /* Each element in uniq_ids will be set to 0 or 1. The n-th element set
 596      * to 1 indicates that the respective GPU was selected to be used. */
 597     for (i = 0; i < gpu_opt->n_dev_use; i++)
 598     {
 599         int device_id;
 600
 601         device_id           = gmx_gpu_sharing_supported() ? get_gpu_device_id(gpu_info, gpu_opt, i) : i;
 602         uniq_ids[device_id] = 1;
 603     }
 604     /* Count the devices used. */
 605     for (i = 0; i < ngpu; i++)
 606     {
 607         uniq_count += uniq_ids[i];
 608     }
 609
 610     sfree(uniq_ids);
 611
 612     return uniq_count;
 613 }
 614
 615 static int get_ncores(const gmx::HardwareTopology &hwTop)
 616 {
 617     if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::None)
 618     {
 619         return hwTop.machine().logicalProcessorCount;
 620     }
 621     else
 622     {
 623         return 0;
 624     }
 625 }
 626
 627 /* Return the number of hardware threads supported by the current CPU.
 628  * We assume that this is equal with the number of "processors"
 629  * reported to be online by the OS at the time of the call. The
 630  * definition of "processor" is according to an old POSIX standard.
 631  *
 632  * On e.g. Arm, the Linux kernel can use advanced power saving features where
 633  * processors are brought online/offline dynamically. This will cause
 634  * _SC_NPROCESSORS_ONLN to report 1 at the beginning of the run. For this
 635  * reason we now first try to use the number of configured processors, but
 636  * also warn if they mismatch.
 637  *
 638  * Note that the number of hardware threads is generally greater than
 639  * the number of cores (e.g. x86 hyper-threading, Power). Managing the
 640  * mapping of software threads to hardware threads is managed
 641  * elsewhere.
 642  */
 643 static int get_nthreads_hw_avail(FILE gmx_unused *fplog, const t_commrec gmx_unused *cr)
 644 {
 645     int ret = 0;
 646
 647 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
 648     /* Windows */
 649     SYSTEM_INFO sysinfo;
 650     GetSystemInfo( &sysinfo );
 651     ret = sysinfo.dwNumberOfProcessors;
 652 #elif defined HAVE_SYSCONF
 653     /* We are probably on Unix.
 654      * Now check if we have the argument to use before executing the call
 655      */
 656 #if defined(_SC_NPROCESSORS_CONF)
 657     ret = sysconf(_SC_NPROCESSORS_CONF);
 658 #    if defined(_SC_NPROCESSORS_ONLN)
 659     if (ret != sysconf(_SC_NPROCESSORS_ONLN))
 660     {
 661         md_print_warn(cr, fplog,
 662                       "%d CPUs configured, but only %d of them are online.\n"
 663                       "This can happen on embedded platforms (e.g. ARM) where the OS shuts some cores\n"
 664                       "off to save power, and will turn them back on later when the load increases.\n"
 665                       "However, this will likely mean GROMACS cannot pin threads to those cores. You\n"
 666                       "will likely see much better performance by forcing all cores to be online, and\n"
 667                       "making sure they run at their full clock frequency.", ret, sysconf(_SC_NPROCESSORS_ONLN));
 668     }
 669 #    endif
 670 #elif defined(_SC_NPROC_CONF)
 671     ret = sysconf(_SC_NPROC_CONF);
 672 #elif defined(_SC_NPROCESSORS_ONLN)
 673     ret = sysconf(_SC_NPROCESSORS_ONLN);
 674 #elif defined(_SC_NPROC_ONLN)
 675     ret = sysconf(_SC_NPROC_ONLN);
 676 #else
 677 #    warning "No valid sysconf argument value found. Executables will not be able to determine the number of logical cores: mdrun will use 1 thread by default!"
 678 #endif /* End of check for sysconf argument values */
 679
 680 #else
 681     /* Neither windows nor Unix. No fscking idea how many hardware threads we have! */
 682     ret = -1;
 683 #endif
 684
 685     if (debug)
 686     {
 687         fprintf(debug, "Detected %d hardware threads to use.\n", ret);
 688     }
 689
 690 #ifdef GMX_OPENMP
 691     if (ret != gmx_omp_get_num_procs())
 692     {
 693         md_print_warn(cr, fplog,
 694                       "Number of logical cores detected (%d) does not match the number reported by OpenMP (%d).\n"
 695                       "Consider setting the launch configuration manually!",
 696                       ret, gmx_omp_get_num_procs());
 697     }
 698 #endif
 699
 700     return ret;
 701 }
 702
 703 static void gmx_detect_gpus(FILE *fplog, const t_commrec *cr)
 704 {
 705 #ifdef GMX_LIB_MPI
 706     int              rank_world;
 707     MPI_Comm         physicalnode_comm;
 708 #endif
 709     int              rank_local;
 710
 711     /* Under certain circumstances MPI ranks on the same physical node
 712      * can not simultaneously access the same GPU(s). Therefore we run
 713      * the detection only on one MPI rank per node and broadcast the info.
 714      * Note that with thread-MPI only a single thread runs this code.
 715      *
 716      * TODO: We should also do CPU hardware detection only once on each
 717      * physical node and broadcast it, instead of do it on every MPI rank.
 718      */
 719 #ifdef GMX_LIB_MPI
 720     /* A split of MPI_COMM_WORLD over physical nodes is only required here,
 721      * so we create and destroy it locally.
 722      */
 723     MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
 724     MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(),
 725                    rank_world, &physicalnode_comm);
 726     MPI_Comm_rank(physicalnode_comm, &rank_local);
 727 #else
 728     /* Here there should be only one process, check this */
 729     GMX_RELEASE_ASSERT(cr->nnodes == 1 && cr->sim_nodeid == 0, "Only a single (master) process should execute here");
 730
 731     rank_local = 0;
 732 #endif
 733
 734     if (rank_local == 0)
 735     {
 736         char detection_error[STRLEN] = "", sbuf[STRLEN];
 737
 738         if (detect_gpus(&hwinfo_g->gpu_info, detection_error) != 0)
 739         {
 740             if (detection_error[0] != '\0')
 741             {
 742                 sprintf(sbuf, ":\n      %s\n", detection_error);
 743             }
 744             else
 745             {
 746                 sprintf(sbuf, ".");
 747             }
 748             md_print_warn(cr, fplog,
 749                           "NOTE: Error occurred during GPU detection%s"
 750                           "      Can not use GPU acceleration, will fall back to CPU kernels.\n",
 751                           sbuf);
 752         }
 753     }
 754
 755 #ifdef GMX_LIB_MPI
 756     /* Broadcast the GPU info to the other ranks within this node */
 757     MPI_Bcast(&hwinfo_g->gpu_info.n_dev, 1, MPI_INT, 0, physicalnode_comm);
 758
 759     if (hwinfo_g->gpu_info.n_dev > 0)
 760     {
 761         int dev_size;
 762
 763         dev_size = hwinfo_g->gpu_info.n_dev*sizeof_gpu_dev_info();
 764
 765         if (rank_local > 0)
 766         {
 767             hwinfo_g->gpu_info.gpu_dev =
 768                 (struct gmx_device_info_t *)malloc(dev_size);
 769         }
 770         MPI_Bcast(hwinfo_g->gpu_info.gpu_dev, dev_size, MPI_BYTE,
 771                   0, physicalnode_comm);
 772         MPI_Bcast(&hwinfo_g->gpu_info.n_dev_compatible, 1, MPI_INT,
 773                   0, physicalnode_comm);
 774     }
 775
 776     MPI_Comm_free(&physicalnode_comm);
 777 #endif
 778 }
 779
 780 static void gmx_collect_hardware_mpi(const gmx::CpuInfo &cpuInfo)
 781 {
 782 #ifdef GMX_LIB_MPI
 783     int  rank_id;
 784     int  nrank, rank, ncore, nhwthread, ngpu, i;
 785     int  gpu_hash;
 786     int *buf, *all;
 787
 788     rank_id   = gmx_physicalnode_id_hash();
 789     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 790     MPI_Comm_size(MPI_COMM_WORLD, &nrank);
 791     ncore     = hwinfo_g->ncore;
 792     nhwthread = hwinfo_g->nthreads_hw_avail;
 793     ngpu      = hwinfo_g->gpu_info.n_dev_compatible;
 794     /* Create a unique hash of the GPU type(s) in this node */
 795     gpu_hash  = 0;
 796     /* Here it might be better to only loop over the compatible GPU, but we
 797      * don't have that information available and it would also require
 798      * removing the device ID from the device info string.
 799      */
 800     for (i = 0; i < hwinfo_g->gpu_info.n_dev; i++)
 801     {
 802         char stmp[STRLEN];
 803
 804         /* Since the device ID is incorporated in the hash, the order of
 805          * the GPUs affects the hash. Also two identical GPUs won't give
 806          * a gpu_hash of zero after XORing.
 807          */
 808         get_gpu_device_info_string(stmp, &hwinfo_g->gpu_info, i);
 809         gpu_hash ^= gmx_string_fullhash_func(stmp, gmx_string_hash_init);
 810     }
 811
 812     snew(buf, nrank);
 813     snew(all, nrank);
 814     buf[rank] = rank_id;
 815
 816     MPI_Allreduce(buf, all, nrank, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 817
 818     gmx_bool bFound;
 819     int      nnode0, ncore0, nhwthread0, ngpu0, r;
 820
 821     bFound     = FALSE;
 822     ncore0     = 0;
 823     nnode0     = 0;
 824     nhwthread0 = 0;
 825     ngpu0      = 0;
 826     for (r = 0; r < nrank; r++)
 827     {
 828         if (all[r] == rank_id)
 829         {
 830             if (!bFound && r == rank)
 831             {
 832                 /* We are the first rank in this physical node */
 833                 nnode0     = 1;
 834                 ncore0     = ncore;
 835                 nhwthread0 = nhwthread;
 836                 ngpu0      = ngpu;
 837             }
 838             bFound = TRUE;
 839         }
 840     }
 841
 842     sfree(buf);
 843     sfree(all);
 844
 845     int sum[4], maxmin[10];
 846
 847     {
 848         int buf[4];
 849
 850         /* Sum values from only intra-rank 0 so we get the sum over all nodes */
 851         buf[0] = nnode0;
 852         buf[1] = ncore0;
 853         buf[2] = nhwthread0;
 854         buf[3] = ngpu0;
 855
 856         MPI_Allreduce(buf, sum, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
 857     }
 858
 859     {
 860         int buf[10];
 861
 862         /* Store + and - values for all ranks,
 863          * so we can get max+min with one MPI call.
 864          */
 865         buf[0] = ncore;
 866         buf[1] = nhwthread;
 867         buf[2] = ngpu;
 868         buf[3] = static_cast<int>(gmx::simdSuggested(cpuInfo));
 869         buf[4] = gpu_hash;
 870         buf[5] = -buf[0];
 871         buf[6] = -buf[1];
 872         buf[7] = -buf[2];
 873         buf[8] = -buf[3];
 874         buf[9] = -buf[4];
 875
 876         MPI_Allreduce(buf, maxmin, 10, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
 877     }
 878
 879     hwinfo_g->nphysicalnode       = sum[0];
 880     hwinfo_g->ncore_tot           = sum[1];
 881     hwinfo_g->ncore_min           = -maxmin[5];
 882     hwinfo_g->ncore_max           = maxmin[0];
 883     hwinfo_g->nhwthread_tot       = sum[2];
 884     hwinfo_g->nhwthread_min       = -maxmin[6];
 885     hwinfo_g->nhwthread_max       = maxmin[1];
 886     hwinfo_g->ngpu_compatible_tot = sum[3];
 887     hwinfo_g->ngpu_compatible_min = -maxmin[7];
 888     hwinfo_g->ngpu_compatible_max = maxmin[2];
 889     hwinfo_g->simd_suggest_min    = -maxmin[8];
 890     hwinfo_g->simd_suggest_max    = maxmin[3];
 891     hwinfo_g->bIdenticalGPUs      = (maxmin[4] == -maxmin[9]);
 892 #else
 893     /* All ranks use the same pointer, protect it with a mutex */
 894     tMPI_Thread_mutex_lock(&hw_info_lock);
 895     hwinfo_g->nphysicalnode       = 1;
 896     hwinfo_g->ncore_tot           = hwinfo_g->ncore;
 897     hwinfo_g->ncore_min           = hwinfo_g->ncore;
 898     hwinfo_g->ncore_max           = hwinfo_g->ncore;
 899     hwinfo_g->nhwthread_tot       = hwinfo_g->nthreads_hw_avail;
 900     hwinfo_g->nhwthread_min       = hwinfo_g->nthreads_hw_avail;
 901     hwinfo_g->nhwthread_max       = hwinfo_g->nthreads_hw_avail;
 902     hwinfo_g->ngpu_compatible_tot = hwinfo_g->gpu_info.n_dev_compatible;
 903     hwinfo_g->ngpu_compatible_min = hwinfo_g->gpu_info.n_dev_compatible;
 904     hwinfo_g->ngpu_compatible_max = hwinfo_g->gpu_info.n_dev_compatible;
 905     hwinfo_g->simd_suggest_min    = static_cast<int>(simdSuggested(cpuInfo));
 906     hwinfo_g->simd_suggest_max    = static_cast<int>(simdSuggested(cpuInfo));
 907     hwinfo_g->bIdenticalGPUs      = TRUE;
 908     tMPI_Thread_mutex_unlock(&hw_info_lock);
 909 #endif
 910 }
 911
 912 gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
 913                                    gmx_bool bDetectGPUs)
 914 {
 915     int ret;
 916
 917     /* make sure no one else is doing the same thing */
 918     ret = tMPI_Thread_mutex_lock(&hw_info_lock);
 919     if (ret != 0)
 920     {
 921         gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
 922     }
 923
 924     /* only initialize the hwinfo structure if it is not already initalized */
 925     if (n_hwinfo == 0)
 926     {
 927         snew(hwinfo_g, 1);
 928
 929         hwinfo_g->cpuInfo             = new gmx::CpuInfo(gmx::CpuInfo::detect());
 930         hwinfo_g->hardwareTopology    = new gmx::HardwareTopology(gmx::HardwareTopology::detect());
 931
 932         /* get the number of cores, will be 0 when not detected */
 933         hwinfo_g->ncore             = get_ncores(*hwinfo_g->hardwareTopology);
 934
 935         /* detect number of hardware threads */
 936         hwinfo_g->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
 937
 938         /* detect GPUs */
 939         hwinfo_g->gpu_info.n_dev            = 0;
 940         hwinfo_g->gpu_info.n_dev_compatible = 0;
 941         hwinfo_g->gpu_info.gpu_dev          = NULL;
 942
 943         /* Run the detection if the binary was compiled with GPU support
 944          * and we requested detection.
 945          */
 946         hwinfo_g->gpu_info.bDetectGPUs =
 947             (bGPUBinary && bDetectGPUs &&
 948              getenv("GMX_DISABLE_GPU_DETECTION") == NULL);
 949         if (hwinfo_g->gpu_info.bDetectGPUs)
 950         {
 951             gmx_detect_gpus(fplog, cr);
 952         }
 953     }
 954     /* increase the reference counter */
 955     n_hwinfo++;
 956
 957     ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
 958     if (ret != 0)
 959     {
 960         gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
 961     }
 962
 963     gmx_collect_hardware_mpi(*hwinfo_g->cpuInfo);
 964
 965     return hwinfo_g;
 966 }
 967
 968 static std::string detected_hardware_string(const gmx_hw_info_t *hwinfo,
 969                                             bool                 bFullCpuInfo)
 970 {
 971     std::string         s;
 972
 973     const gmx::CpuInfo &cpuInfo = *hwinfo_g->cpuInfo;
 974
 975     s  = gmx::formatString("\n");
 976     s += gmx::formatString("Running on %d node%s with total",
 977                            hwinfo->nphysicalnode,
 978                            hwinfo->nphysicalnode == 1 ? "" : "s");
 979     if (hwinfo->ncore_tot > 0)
 980     {
 981         s += gmx::formatString(" %d cores,", hwinfo->ncore_tot);
 982     }
 983     s += gmx::formatString(" %d logical cores", hwinfo->nhwthread_tot);
 984     if (hwinfo->gpu_info.bDetectGPUs)
 985     {
 986         s += gmx::formatString(", %d compatible GPU%s",
 987                                hwinfo->ngpu_compatible_tot,
 988                                hwinfo->ngpu_compatible_tot == 1 ? "" : "s");
 989     }
 990     else if (bGPUBinary)
 991     {
 992         s += gmx::formatString(" (GPU detection deactivated)");
 993     }
 994     s += gmx::formatString("\n");
 995
 996     if (hwinfo->nphysicalnode > 1)
 997     {
 998         /* Print per node hardware feature counts */
 999         if (hwinfo->ncore_max > 0)
1000         {
1001             s += gmx::formatString("  Cores per node:           %2d", hwinfo->ncore_min);
1002             if (hwinfo->ncore_max > hwinfo->ncore_min)
1003             {
1004                 s += gmx::formatString(" - %2d", hwinfo->ncore_max);
1005             }
1006             s += gmx::formatString("\n");
1007         }
1008         s += gmx::formatString("  Logical cores per node:   %2d", hwinfo->nhwthread_min);
1009         if (hwinfo->nhwthread_max > hwinfo->nhwthread_min)
1010         {
1011             s += gmx::formatString(" - %2d", hwinfo->nhwthread_max);
1012         }
1013         s += gmx::formatString("\n");
1014         if (bGPUBinary)
1015         {
1016             s += gmx::formatString("  Compatible GPUs per node: %2d",
1017                                    hwinfo->ngpu_compatible_min);
1018             if (hwinfo->ngpu_compatible_max > hwinfo->ngpu_compatible_min)
1019             {
1020                 s += gmx::formatString(" - %2d", hwinfo->ngpu_compatible_max);
1021             }
1022             s += gmx::formatString("\n");
1023             if (hwinfo->ngpu_compatible_tot > 0)
1024             {
1025                 if (hwinfo->bIdenticalGPUs)
1026                 {
1027                     s += gmx::formatString("  All nodes have identical type(s) of GPUs\n");
1028                 }
1029                 else
1030                 {
1031                     /* This message will also appear with identical GPU types
1032                      * when at least one node has no GPU.
1033                      */
1034                     s += gmx::formatString("  Different nodes have different type(s) and/or order of GPUs\n");
1035                 }
1036             }
1037         }
1038     }
1039
1040 #ifdef GMX_LIB_MPI
1041     char host[HOSTNAMELEN];
1042     int  rank;
1043
1044     gmx_gethostname(host, HOSTNAMELEN);
1045     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
1046
1047     s += gmx::formatString("Hardware detected on host %s (the node of MPI rank %d):\n",
1048                            host, rank);
1049 #else
1050     s += gmx::formatString("Hardware detected:\n");
1051 #endif
1052     s += gmx::formatString("  CPU info:\n");
1053
1054     s += gmx::formatString("    Vendor: %s\n", cpuInfo.vendorString().c_str());
1055
1056     s += gmx::formatString("    Brand:  %s\n", cpuInfo.brandString().c_str());
1057
1058     if (bFullCpuInfo)
1059     {
1060         s += gmx::formatString("    Family: %d   Model: %d   Stepping: %d\n",
1061                                cpuInfo.family(), cpuInfo.model(), cpuInfo.stepping());
1062
1063         s += gmx::formatString("    Features:");
1064         for (auto &f : cpuInfo.featureSet())
1065         {
1066             s += gmx::formatString(" %s", cpuInfo.featureString(f).c_str());;
1067         }
1068         s += gmx::formatString("\n");
1069     }
1070
1071     s += gmx::formatString("    SIMD instructions most likely to fit this hardware: %s",
1072                            gmx::simdString(static_cast<gmx::SimdType>(hwinfo->simd_suggest_min)).c_str());
1073
1074     if (hwinfo->simd_suggest_max > hwinfo->simd_suggest_min)
1075     {
1076         s += gmx::formatString(" - %s", gmx::simdString(static_cast<gmx::SimdType>(hwinfo->simd_suggest_max)).c_str());
1077     }
1078     s += gmx::formatString("\n");
1079
1080     s += gmx::formatString("    SIMD instructions selected at GROMACS compile time: %s\n",
1081                            gmx::simdString(gmx::simdCompiled()).c_str());
1082
1083     if (bGPUBinary && (hwinfo->ngpu_compatible_tot > 0 ||
1084                        hwinfo->gpu_info.n_dev > 0))
1085     {
1086         s += gmx::formatString("  GPU info:\n");
1087         s += gmx::formatString("    Number of GPUs detected: %d\n",
1088                                hwinfo->gpu_info.n_dev);
1089         if (hwinfo->gpu_info.n_dev > 0)
1090         {
1091             char buf[STRLEN];
1092
1093             sprint_gpus(buf, &hwinfo->gpu_info);
1094             s += gmx::formatString("%s\n", buf);
1095         }
1096     }
1097     return s;
1098 }
1099
1100 void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr,
1101                                  const gmx_hw_info_t *hwinfo)
1102 {
1103     const gmx::CpuInfo &cpuInfo = *hwinfo_g->cpuInfo;
1104
1105     if (fplog != NULL)
1106     {
1107         std::string detected;
1108
1109         detected = detected_hardware_string(hwinfo, TRUE);
1110
1111         fprintf(fplog, "%s\n", detected.c_str());
1112     }
1113
1114     if (MULTIMASTER(cr))
1115     {
1116         std::string detected;
1117
1118         detected = detected_hardware_string(hwinfo, FALSE);
1119
1120         fprintf(stderr, "%s\n", detected.c_str());
1121     }
1122
1123     /* Check the compiled SIMD instruction set against that of the node
1124      * with the lowest SIMD level support (skip if SIMD detection did not work)
1125      */
1126     if (cpuInfo.supportLevel() >= gmx::CpuInfo::SupportLevel::Features)
1127     {
1128         gmx::simdCheck(static_cast<gmx::SimdType>(hwinfo->simd_suggest_min), fplog, MULTIMASTER(cr));
1129     }
1130
1131     /* For RDTSCP we only check on our local node and skip the MPI reduction */
1132     check_use_of_rdtscp_on_this_cpu(fplog, cr, cpuInfo);
1133 }
1134
1135 //! \brief Return if any GPU ID (e.g in a user-supplied string) is repeated
1136 static gmx_bool anyGpuIdIsRepeated(const gmx_gpu_opt_t *gpu_opt)
1137 {
1138     /* Loop over IDs in the string */
1139     for (int i = 0; i < gpu_opt->n_dev_use - 1; ++i)
1140     {
1141         /* Look for the ID in location i in the following part of the
1142            string */
1143         for (int j = i + 1; j < gpu_opt->n_dev_use; ++j)
1144         {
1145             if (gpu_opt->dev_use[i] == gpu_opt->dev_use[j])
1146             {
1147                 /* Same ID found in locations i and j */
1148                 return TRUE;
1149             }
1150         }
1151     }
1152
1153     return FALSE;
1154 }
1155
1156 void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
1157 {
1158     char *env;
1159
1160     if (gpu_opt->gpu_id != NULL && !bGPUBinary)
1161     {
1162         gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!",
1163                   gmx::getProgramContext().displayName());
1164     }
1165
1166     env = getenv("GMX_GPU_ID");
1167     if (env != NULL && gpu_opt->gpu_id != NULL)
1168     {
1169         gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
1170     }
1171     if (env == NULL)
1172     {
1173         env = gpu_opt->gpu_id;
1174     }
1175
1176     /* parse GPU IDs if the user passed any */
1177     if (env != NULL)
1178     {
1179         /* Parse a "plain" or comma-separated GPU ID string which contains a
1180          * sequence of digits corresponding to GPU IDs; the order will
1181          * indicate the process/tMPI thread - GPU assignment. */
1182         parse_digits_from_string(env, &gpu_opt->n_dev_use, &gpu_opt->dev_use);
1183
1184         if (!gmx_multiple_gpu_per_node_supported() && 1 < gpu_opt->n_dev_use)
1185         {
1186             gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per node", getGpuImplementationString());
1187         }
1188         if (!gmx_gpu_sharing_supported() && anyGpuIdIsRepeated(gpu_opt))
1189         {
1190             gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per GPU", getGpuImplementationString());
1191         }
1192         if (gpu_opt->n_dev_use == 0)
1193         {
1194             gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n",
1195                       invalid_gpuid_hint);
1196         }
1197
1198         gpu_opt->bUserSet = TRUE;
1199     }
1200 }
1201
1202 void gmx_select_gpu_ids(FILE *fplog, const t_commrec *cr,
1203                         const gmx_gpu_info_t *gpu_info,
1204                         gmx_bool bForceUseGPU,
1205                         gmx_gpu_opt_t *gpu_opt)
1206 {
1207     int              i;
1208     char             sbuf[STRLEN], stmp[STRLEN];
1209
1210     /* Bail if binary is not compiled with GPU acceleration, but this is either
1211      * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
1212     if (bForceUseGPU && !bGPUBinary)
1213     {
1214         gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!",
1215                   gmx::getProgramContext().displayName());
1216     }
1217
1218     if (!(cr->duty & DUTY_PP))
1219     {
1220         /* Our rank is not doing PP, we don't use a GPU */
1221         return;
1222     }
1223
1224     if (gpu_opt->bUserSet)
1225     {
1226         /* Check the GPU IDs passed by the user.
1227          * (GPU IDs have been parsed by gmx_parse_gpu_ids before)
1228          */
1229         int *checkres;
1230         int  res;
1231
1232         snew(checkres, gpu_opt->n_dev_use);
1233
1234         res = check_selected_gpus(checkres, gpu_info, gpu_opt);
1235
1236         if (!res)
1237         {
1238             print_gpu_detection_stats(fplog, gpu_info, cr);
1239
1240             sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
1241             for (i = 0; i < gpu_opt->n_dev_use; i++)
1242             {
1243                 if (checkres[i] != egpuCompatible)
1244                 {
1245                     sprintf(stmp, "    GPU #%d: %s\n",
1246                             gpu_opt->dev_use[i],
1247                             gpu_detect_res_str[checkres[i]]);
1248                     strcat(sbuf, stmp);
1249                 }
1250             }
1251             gmx_fatal(FARGS, "%s", sbuf);
1252         }
1253
1254         sfree(checkres);
1255     }
1256     else if (getenv("GMX_EMULATE_GPU") == NULL)
1257     {
1258         pick_compatible_gpus(&hwinfo_g->gpu_info, gpu_opt);
1259         set_gpu_ids(gpu_opt, cr->nrank_pp_intranode, cr->rank_pp_intranode);
1260     }
1261
1262     /* If the user asked for a GPU, check whether we have a GPU */
1263     if (bForceUseGPU && gpu_info->n_dev_compatible == 0)
1264     {
1265         gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
1266     }
1267 }
1268
1269 /* Select the GPUs we will use. This is an operation local to each physical
1270  * node. If we have less MPI ranks than GPUs, we will waste some GPUs.
1271  * nrank and rank are the rank count and id for PP processes in our node.
1272  */
1273 static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
1274 {
1275     GMX_RELEASE_ASSERT(gpu_opt, "Invalid gpu_opt pointer passed");
1276     GMX_RELEASE_ASSERT(nrank >= 1,
1277                        gmx::formatString("Invalid limit (%d) for the number of GPUs (detected %d compatible GPUs)",
1278                                          rank, gpu_opt->n_dev_compatible).c_str());
1279
1280     if (gpu_opt->n_dev_compatible == 0)
1281     {
1282         char host[HOSTNAMELEN];
1283
1284         gmx_gethostname(host, HOSTNAMELEN);
1285         gmx_fatal(FARGS, "A GPU was requested on host %s, but no compatible GPUs were detected. All nodes with PP ranks need to have GPUs. If you intended to use GPU acceleration in a parallel run, you can either avoid using the nodes that don't have GPUs or place PME ranks on these nodes.", host);
1286     }
1287
1288     int nshare;
1289
1290     nshare = 1;
1291     if (nrank > gpu_opt->n_dev_compatible)
1292     {
1293         if (nrank % gpu_opt->n_dev_compatible == 0)
1294         {
1295             nshare = gmx_gpu_sharing_supported() ? nrank/gpu_opt->n_dev_compatible : 1;
1296         }
1297         else
1298         {
1299             if (rank == 0)
1300             {
1301                 gmx_fatal(FARGS, "The number of MPI ranks (%d) in a physical node is not a multiple of the number of GPUs (%d). Select a different number of MPI ranks or use the -gpu_id option to manually specify the GPU to be used.",
1302                           nrank, gpu_opt->n_dev_compatible);
1303             }
1304
1305 #ifdef GMX_MPI
1306             /* We use a global barrier to prevent ranks from continuing with
1307              * an invalid setup.
1308              */
1309             MPI_Barrier(MPI_COMM_WORLD);
1310 #endif
1311         }
1312     }
1313
1314     /* Here we will waste GPUs when nrank < gpu_opt->n_dev_compatible */
1315     gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_compatible*nshare, nrank);
1316     if (!gmx_multiple_gpu_per_node_supported())
1317     {
1318         gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_use, 1);
1319     }
1320     snew(gpu_opt->dev_use, gpu_opt->n_dev_use);
1321     for (int i = 0; i != gpu_opt->n_dev_use; ++i)
1322     {
1323         /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
1324         gpu_opt->dev_use[i] = gpu_opt->dev_compatible[i/nshare];
1325     }
1326 }
1327
1328 void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
1329 {
1330     int ret;
1331
1332     ret = tMPI_Thread_mutex_lock(&hw_info_lock);
1333     if (ret != 0)
1334     {
1335         gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
1336     }
1337
1338     /* decrease the reference counter */
1339     n_hwinfo--;
1340
1341
1342     if (hwinfo != hwinfo_g)
1343     {
1344         gmx_incons("hwinfo < hwinfo_g");
1345     }
1346
1347     if (n_hwinfo < 0)
1348     {
1349         gmx_incons("n_hwinfo < 0");
1350     }
1351
1352     if (n_hwinfo == 0)
1353     {
1354         delete hwinfo_g->cpuInfo;
1355         delete hwinfo_g->hardwareTopology;
1356         free_gpu_info(&hwinfo_g->gpu_info);
1357         sfree(hwinfo_g);
1358     }
1359
1360     ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
1361     if (ret != 0)
1362     {
1363         gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
1364     }
1365 }