Move gpu_utils etc. out of gmxlib
[gromacs.git] / src / gromacs / gmxlib / gmx_detect_hardware.cpp
blobf4edd0eabbd9a7de45c10fe2bb87b697f6af9dc1
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #include "gmxpre.h"
37 #include "gmx_detect_hardware.h"
39 #include "config.h"
41 #include <cerrno>
42 #include <cstdlib>
43 #include <cstring>
45 #include <algorithm>
46 #include <string>
47 #include <vector>
49 #ifdef HAVE_UNISTD_H
50 /* For sysconf */
51 #include <unistd.h>
52 #endif
53 #ifdef GMX_NATIVE_WINDOWS
54 #include <windows.h>
55 #endif
57 #include "thread_mpi/threads.h"
59 #include "gromacs/gmxlib/md_logging.h"
60 #include "gromacs/gmxlib/network.h"
61 #include "gromacs/gpu_utils/gpu_utils.h"
62 #include "gromacs/hardware/cpuinfo.h"
63 #include "gromacs/hardware/gpu_hw_info.h"
64 #include "gromacs/hardware/hardwaretopology.h"
65 #include "gromacs/hardware/hw_info.h"
66 #include "gromacs/mdtypes/commrec.h"
67 #include "gromacs/mdtypes/md_enums.h"
68 #include "gromacs/simd/support.h"
69 #include "gromacs/utility/arrayref.h"
70 #include "gromacs/utility/basedefinitions.h"
71 #include "gromacs/utility/basenetwork.h"
72 #include "gromacs/utility/baseversion.h"
73 #include "gromacs/utility/cstringutil.h"
74 #include "gromacs/utility/exceptions.h"
75 #include "gromacs/utility/fatalerror.h"
76 #include "gromacs/utility/gmxassert.h"
77 #include "gromacs/utility/gmxomp.h"
78 #include "gromacs/utility/programcontext.h"
79 #include "gromacs/utility/smalloc.h"
80 #include "gromacs/utility/stringutil.h"
81 #include "gromacs/utility/sysinfo.h"
83 static const bool bGPUBinary = GMX_GPU != GMX_GPU_NONE;
85 /* Note that some of the following arrays must match the "GPU support
86 * enumeration" in src/config.h.cmakein, so that GMX_GPU looks up an
87 * array entry. */
89 /* CUDA supports everything. Our current OpenCL implementation only
90 * supports using exactly one GPU per PP rank, so sharing is
91 * impossible */
92 static const bool gpuSharingSupport[] = { false, true, false };
93 static const bool bGpuSharingSupported = gpuSharingSupport[GMX_GPU];
95 /* CUDA supports everything. Our current OpenCL implementation seems
96 * to handle concurrency correctly with thread-MPI. The AMD OpenCL
97 * runtime does not seem to support creating a context from more than
98 * one real MPI rank on the same node (it segfaults when you try).
100 static const bool multiGpuSupport[] = {
101 false, true,
102 #ifdef GMX_THREAD_MPI
103 true,
104 #else
105 false, /* Real MPI and no MPI */
106 #endif
108 static const bool bMultiGpuPerNodeSupported = multiGpuSupport[GMX_GPU];
110 /* Names of the GPU detection/check results (see e_gpu_detect_res_t in hw_info.h). */
111 const char * const gpu_detect_res_str[egpuNR] =
113 "compatible", "inexistent", "incompatible", "insane"
116 static const char * invalid_gpuid_hint =
117 "A delimiter-free sequence of valid numeric IDs of available GPUs is expected.";
119 /* The globally shared hwinfo structure. */
120 static gmx_hw_info_t *hwinfo_g;
121 /* A reference counter for the hwinfo structure */
122 static int n_hwinfo = 0;
123 /* A lock to protect the hwinfo structure */
124 static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER;
126 #define HOSTNAMELEN 80
128 /* FW decl. */
129 static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank);
130 static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
131 const gmx_gpu_opt_t *gpu_opt);
133 gmx_bool gmx_multiple_gpu_per_node_supported()
135 return bMultiGpuPerNodeSupported;
138 gmx_bool gmx_gpu_sharing_supported()
140 return bGpuSharingSupported;
143 static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info)
145 int i, ndev;
146 char stmp[STRLEN];
148 ndev = gpu_info->n_dev;
150 sbuf[0] = '\0';
151 for (i = 0; i < ndev; i++)
153 get_gpu_device_info_string(stmp, gpu_info, i);
154 strcat(sbuf, " ");
155 strcat(sbuf, stmp);
156 if (i < ndev - 1)
158 strcat(sbuf, "\n");
163 static void print_gpu_detection_stats(FILE *fplog,
164 const gmx_gpu_info_t *gpu_info,
165 const t_commrec *cr)
167 char onhost[HOSTNAMELEN+10], stmp[STRLEN];
168 int ngpu;
170 if (!gpu_info->bDetectGPUs)
172 /* We skipped the detection, so don't print detection stats */
173 return;
176 ngpu = gpu_info->n_dev;
178 #if defined GMX_MPI && !defined GMX_THREAD_MPI
179 /* We only print the detection on one, of possibly multiple, nodes */
180 std::strncpy(onhost, " on host ", 10);
181 gmx_gethostname(onhost + 9, HOSTNAMELEN);
182 #else
183 /* We detect all relevant GPUs */
184 std::strncpy(onhost, "", 1);
185 #endif
187 if (ngpu > 0)
189 sprint_gpus(stmp, gpu_info);
190 md_print_warn(cr, fplog, "%d GPU%s detected%s:\n%s\n",
191 ngpu, (ngpu > 1) ? "s" : "", onhost, stmp);
193 else
195 md_print_warn(cr, fplog, "No GPUs detected%s\n", onhost);
199 /*! \brief Helper function for reporting GPU usage information
200 * in the mdrun log file
202 * \param[in] gpu_info Pointer to per-node GPU info struct
203 * \param[in] gpu_opt Pointer to per-node GPU options struct
204 * \param[in] numPpRanks Number of PP ranks per node
205 * \param[in] bPrintHostName Print the hostname in the usage information
206 * \return String to write to the log file
207 * \throws std::bad_alloc if out of memory */
208 static std::string
209 makeGpuUsageReport(const gmx_gpu_info_t *gpu_info,
210 const gmx_gpu_opt_t *gpu_opt,
211 size_t numPpRanks,
212 bool bPrintHostName)
214 int ngpu_use = gpu_opt->n_dev_use;
215 int ngpu_comp = gpu_info->n_dev_compatible;
216 char host[HOSTNAMELEN];
218 if (bPrintHostName)
220 gmx_gethostname(host, HOSTNAMELEN);
223 /* Issue a note if GPUs are available but not used */
224 if (ngpu_comp > 0 && ngpu_use < 1)
226 return gmx::formatString("%d compatible GPU%s detected in the system, but none will be used.\n"
227 "Consider trying GPU acceleration with the Verlet scheme!\n",
228 ngpu_comp, (ngpu_comp > 1) ? "s" : "");
231 std::string output;
232 if (!gpu_opt->bUserSet)
234 // gpu_opt->dev_compatible is only populated during auto-selection
235 std::string gpuIdsString =
236 formatAndJoin(gmx::constArrayRefFromArray(gpu_opt->dev_compatible,
237 gpu_opt->n_dev_compatible),
238 ",", gmx::StringFormatter("%d"));
239 bool bPluralGpus = gpu_opt->n_dev_compatible > 1;
241 if (bPrintHostName)
243 output += gmx::formatString("On host %s ", host);
245 output += gmx::formatString("%d compatible GPU%s %s present, with ID%s %s\n",
246 gpu_opt->n_dev_compatible,
247 bPluralGpus ? "s" : "",
248 bPluralGpus ? "are" : "is",
249 bPluralGpus ? "s" : "",
250 gpuIdsString.c_str());
254 std::vector<int> gpuIdsInUse;
255 for (int i = 0; i < ngpu_use; i++)
257 gpuIdsInUse.push_back(get_gpu_device_id(gpu_info, gpu_opt, i));
259 std::string gpuIdsString =
260 formatAndJoin(gpuIdsInUse, ",", gmx::StringFormatter("%d"));
261 int numGpusInUse = gmx_count_gpu_dev_unique(gpu_info, gpu_opt);
262 bool bPluralGpus = numGpusInUse > 1;
264 if (bPrintHostName)
266 output += gmx::formatString("On host %s ", host);
268 output += gmx::formatString("%d GPU%s %sselected for this run.\n"
269 "Mapping of GPU ID%s to the %d PP rank%s in this node: %s\n",
270 numGpusInUse, bPluralGpus ? "s" : "",
271 gpu_opt->bUserSet ? "user-" : "auto-",
272 bPluralGpus ? "s" : "",
273 numPpRanks,
274 (numPpRanks > 1) ? "s" : "",
275 gpuIdsString.c_str());
278 return output;
281 /* Give a suitable fatal error or warning if the build configuration
282 and runtime CPU do not match. */
283 static void
284 check_use_of_rdtscp_on_this_cpu(FILE *fplog,
285 const t_commrec *cr,
286 const gmx::CpuInfo &cpuInfo)
288 #ifdef HAVE_RDTSCP
289 bool binaryUsesRdtscp = TRUE;
290 #else
291 bool binaryUsesRdtscp = FALSE;
292 #endif
294 const char *programName = gmx::getProgramContext().displayName();
296 if (cpuInfo.supportLevel() < gmx::CpuInfo::SupportLevel::Features)
298 if (binaryUsesRdtscp)
300 md_print_warn(cr, fplog, "The %s executable was compiled to use the rdtscp CPU instruction. "
301 "We cannot detect the features of your current CPU, but will proceed anyway. "
302 "If you get a crash, rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
303 programName);
306 else
308 bool cpuHasRdtscp = cpuInfo.feature(gmx::CpuInfo::Feature::X86_Rdtscp);
310 if (!cpuHasRdtscp && binaryUsesRdtscp)
312 gmx_fatal(FARGS, "The %s executable was compiled to use the rdtscp CPU instruction. "
313 "However, this is not supported by the current hardware and continuing would lead to a crash. "
314 "Please rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
315 programName);
318 if (cpuHasRdtscp && !binaryUsesRdtscp)
320 md_print_warn(cr, fplog, "The current CPU can measure timings more accurately than the code in\n"
321 "%s was configured to use. This might affect your simulation\n"
322 "speed as accurate timings are needed for load-balancing.\n"
323 "Please consider rebuilding %s with the GMX_USE_RDTSCP=ON CMake option.\n",
324 programName, programName);
329 void gmx_check_hw_runconf_consistency(FILE *fplog,
330 const gmx_hw_info_t *hwinfo,
331 const t_commrec *cr,
332 const gmx_hw_opt_t *hw_opt,
333 gmx_bool bUseGPU)
335 int npppn;
336 char th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
337 gmx_bool btMPI, bMPI, bNthreadsAuto, bEmulateGPU;
339 GMX_RELEASE_ASSERT(hwinfo, "hwinfo must be a non-NULL pointer");
340 GMX_RELEASE_ASSERT(cr, "cr must be a non-NULL pointer");
342 /* Below we only do consistency checks for PP and GPUs,
343 * this is irrelevant for PME only nodes, so in that case we return
344 * here.
346 if (!(cr->duty & DUTY_PP))
348 return;
351 #if defined(GMX_THREAD_MPI)
352 bMPI = FALSE;
353 btMPI = TRUE;
354 bNthreadsAuto = (hw_opt->nthreads_tmpi < 1);
355 #elif defined(GMX_LIB_MPI)
356 bMPI = TRUE;
357 btMPI = FALSE;
358 bNthreadsAuto = FALSE;
359 #else
360 bMPI = FALSE;
361 btMPI = FALSE;
362 bNthreadsAuto = FALSE;
363 #endif
365 /* GPU emulation detection is done later, but we need here as well
366 * -- uncool, but there's no elegant workaround */
367 bEmulateGPU = (getenv("GMX_EMULATE_GPU") != NULL);
369 if (hwinfo->gpu_info.n_dev_compatible > 0)
371 std::string gpuUseageReport;
374 gpuUseageReport = makeGpuUsageReport(&hwinfo->gpu_info,
375 &hw_opt->gpu_opt,
376 cr->nrank_pp_intranode,
377 bMPI && cr->nnodes > 1);
379 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
381 /* NOTE: this print is only for and on one physical node */
382 md_print_info(cr, fplog, "%s\n", gpuUseageReport.c_str());
385 /* Need to ensure that we have enough GPUs:
386 * - need one GPU per PP node
387 * - no GPU oversubscription with tMPI
388 * */
389 /* number of PP processes per node */
390 npppn = cr->nrank_pp_intranode;
392 pernode[0] = '\0';
393 th_or_proc_plural[0] = '\0';
394 if (btMPI)
396 sprintf(th_or_proc, "thread-MPI thread");
397 if (npppn > 1)
399 sprintf(th_or_proc_plural, "s");
402 else if (bMPI)
404 sprintf(th_or_proc, "MPI process");
405 if (npppn > 1)
407 sprintf(th_or_proc_plural, "es");
409 sprintf(pernode, " per node");
411 else
413 /* neither MPI nor tMPI */
414 sprintf(th_or_proc, "process");
417 if (bUseGPU && hwinfo->gpu_info.n_dev_compatible > 0 &&
418 !bEmulateGPU)
420 int ngpu_comp, ngpu_use;
421 char gpu_comp_plural[2], gpu_use_plural[2];
423 ngpu_comp = hwinfo->gpu_info.n_dev_compatible;
424 ngpu_use = hw_opt->gpu_opt.n_dev_use;
426 sprintf(gpu_comp_plural, "%s", (ngpu_comp > 1) ? "s" : "");
427 sprintf(gpu_use_plural, "%s", (ngpu_use > 1) ? "s" : "");
429 const char *programName = gmx::getProgramContext().displayName();
431 /* number of tMPI threads auto-adjusted */
432 if (btMPI && bNthreadsAuto)
434 if (hw_opt->gpu_opt.bUserSet && npppn < ngpu_use)
436 /* The user manually provided more GPUs than threads we
437 could automatically start. */
438 gmx_fatal(FARGS,
439 "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
440 "%s requires one PP tread-MPI thread per GPU; use fewer GPUs.",
441 ngpu_use, gpu_use_plural,
442 npppn, th_or_proc_plural,
443 programName);
446 if (!hw_opt->gpu_opt.bUserSet && npppn < ngpu_comp)
448 /* There are more GPUs than tMPI threads; we have
449 limited the number GPUs used. */
450 md_print_warn(cr, fplog,
451 "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
452 " %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.\n",
453 ngpu_comp, gpu_comp_plural,
454 npppn, th_or_proc_plural,
455 programName, npppn,
456 npppn > 1 ? "s" : "");
460 if (hw_opt->gpu_opt.bUserSet)
462 if (ngpu_use != npppn)
464 gmx_fatal(FARGS,
465 "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
466 "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
467 th_or_proc, btMPI ? "s" : "es", pernode,
468 programName, npppn, th_or_proc,
469 th_or_proc_plural, pernode,
470 ngpu_use, gpu_use_plural);
473 else
475 /* TODO Should we have a gpu_opt->n_dev_supported field? */
476 if (ngpu_comp > npppn && gmx_multiple_gpu_per_node_supported())
478 md_print_warn(cr, fplog,
479 "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
480 " PP %s%s%s than GPU%s available.\n"
481 " Each PP %s can use only one GPU, %d GPU%s%s will be used.\n",
482 programName, th_or_proc,
483 th_or_proc_plural, pernode, gpu_comp_plural,
484 th_or_proc, npppn, gpu_use_plural, pernode);
487 if (ngpu_use != npppn)
489 /* Avoid duplicate error messages.
490 * Unfortunately we can only do this at the physical node
491 * level, since the hardware setup and MPI process count
492 * might differ between physical nodes.
494 if (cr->rank_pp_intranode == 0)
496 std::string reasonForLimit;
497 if (ngpu_comp > 1 &&
498 ngpu_use == 1 &&
499 !gmx_multiple_gpu_per_node_supported())
501 reasonForLimit = "can be used by ";
502 reasonForLimit += getGpuImplementationString();
503 reasonForLimit += " in GROMACS";
505 else
507 reasonForLimit = "was detected";
509 gmx_fatal(FARGS,
510 "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
511 "%s was started with %d PP %s%s%s, but only %d GPU%s %s.",
512 th_or_proc, btMPI ? "s" : "es", pernode,
513 programName, npppn, th_or_proc,
514 th_or_proc_plural, pernode,
515 ngpu_use, gpu_use_plural, reasonForLimit.c_str());
521 int same_count;
523 same_count = gmx_count_gpu_dev_shared(&hw_opt->gpu_opt);
525 if (same_count > 0)
527 md_print_info(cr, fplog,
528 "NOTE: You assigned %s to multiple %s%s.\n",
529 same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
534 #ifdef GMX_MPI
535 if (PAR(cr))
537 /* Avoid other ranks to continue after
538 inconsistency */
539 MPI_Barrier(cr->mpi_comm_mygroup);
541 #endif
545 /* Return 0 if none of the GPU (per node) are shared among PP ranks.
547 * Sharing GPUs among multiple PP ranks is possible when the user passes
548 * GPU IDs. Here we check for sharing and return a non-zero value when
549 * this is detected. Note that the return value represents the number of
550 * PP rank pairs that share a device.
552 int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt)
554 int same_count = 0;
555 int ngpu = gpu_opt->n_dev_use;
557 if (gpu_opt->bUserSet)
559 int i, j;
561 for (i = 0; i < ngpu - 1; i++)
563 for (j = i + 1; j < ngpu; j++)
565 same_count += (gpu_opt->dev_use[i] ==
566 gpu_opt->dev_use[j]);
571 return same_count;
574 /* Count and return the number of unique GPUs (per node) selected.
576 * As sharing GPUs among multiple PP ranks is possible when the user passes
577 * GPU IDs, the number of GPUs user (per node) can be different from the
578 * number of GPU IDs selected.
580 static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
581 const gmx_gpu_opt_t *gpu_opt)
583 int i, uniq_count, ngpu;
584 int *uniq_ids;
586 GMX_RELEASE_ASSERT(gpu_info, "gpu_info must be a non-NULL pointer");
587 GMX_RELEASE_ASSERT(gpu_opt, "gpu_opt must be a non-NULL pointer");
589 ngpu = gpu_info->n_dev;
591 uniq_count = 0;
593 snew(uniq_ids, ngpu);
595 /* Each element in uniq_ids will be set to 0 or 1. The n-th element set
596 * to 1 indicates that the respective GPU was selected to be used. */
597 for (i = 0; i < gpu_opt->n_dev_use; i++)
599 int device_id;
601 device_id = gmx_gpu_sharing_supported() ? get_gpu_device_id(gpu_info, gpu_opt, i) : i;
602 uniq_ids[device_id] = 1;
604 /* Count the devices used. */
605 for (i = 0; i < ngpu; i++)
607 uniq_count += uniq_ids[i];
610 sfree(uniq_ids);
612 return uniq_count;
615 static int get_ncores(const gmx::HardwareTopology &hwTop)
617 if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::None)
619 return hwTop.machine().logicalProcessorCount;
621 else
623 return 0;
627 /* Return the number of hardware threads supported by the current CPU.
628 * We assume that this is equal with the number of "processors"
629 * reported to be online by the OS at the time of the call. The
630 * definition of "processor" is according to an old POSIX standard.
632 * On e.g. Arm, the Linux kernel can use advanced power saving features where
633 * processors are brought online/offline dynamically. This will cause
634 * _SC_NPROCESSORS_ONLN to report 1 at the beginning of the run. For this
635 * reason we now first try to use the number of configured processors, but
636 * also warn if they mismatch.
638 * Note that the number of hardware threads is generally greater than
639 * the number of cores (e.g. x86 hyper-threading, Power). Managing the
640 * mapping of software threads to hardware threads is managed
641 * elsewhere.
643 static int get_nthreads_hw_avail(FILE gmx_unused *fplog, const t_commrec gmx_unused *cr)
645 int ret = 0;
647 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
648 /* Windows */
649 SYSTEM_INFO sysinfo;
650 GetSystemInfo( &sysinfo );
651 ret = sysinfo.dwNumberOfProcessors;
652 #elif defined HAVE_SYSCONF
653 /* We are probably on Unix.
654 * Now check if we have the argument to use before executing the call
656 #if defined(_SC_NPROCESSORS_CONF)
657 ret = sysconf(_SC_NPROCESSORS_CONF);
658 # if defined(_SC_NPROCESSORS_ONLN)
659 if (ret != sysconf(_SC_NPROCESSORS_ONLN))
661 md_print_warn(cr, fplog,
662 "%d CPUs configured, but only %d of them are online.\n"
663 "This can happen on embedded platforms (e.g. ARM) where the OS shuts some cores\n"
664 "off to save power, and will turn them back on later when the load increases.\n"
665 "However, this will likely mean GROMACS cannot pin threads to those cores. You\n"
666 "will likely see much better performance by forcing all cores to be online, and\n"
667 "making sure they run at their full clock frequency.", ret, sysconf(_SC_NPROCESSORS_ONLN));
669 # endif
670 #elif defined(_SC_NPROC_CONF)
671 ret = sysconf(_SC_NPROC_CONF);
672 #elif defined(_SC_NPROCESSORS_ONLN)
673 ret = sysconf(_SC_NPROCESSORS_ONLN);
674 #elif defined(_SC_NPROC_ONLN)
675 ret = sysconf(_SC_NPROC_ONLN);
676 #else
677 # warning "No valid sysconf argument value found. Executables will not be able to determine the number of logical cores: mdrun will use 1 thread by default!"
678 #endif /* End of check for sysconf argument values */
680 #else
681 /* Neither windows nor Unix. No fscking idea how many hardware threads we have! */
682 ret = -1;
683 #endif
685 if (debug)
687 fprintf(debug, "Detected %d hardware threads to use.\n", ret);
690 #ifdef GMX_OPENMP
691 if (ret != gmx_omp_get_num_procs())
693 md_print_warn(cr, fplog,
694 "Number of logical cores detected (%d) does not match the number reported by OpenMP (%d).\n"
695 "Consider setting the launch configuration manually!",
696 ret, gmx_omp_get_num_procs());
698 #endif
700 return ret;
703 static void gmx_detect_gpus(FILE *fplog, const t_commrec *cr)
705 #ifdef GMX_LIB_MPI
706 int rank_world;
707 MPI_Comm physicalnode_comm;
708 #endif
709 int rank_local;
711 /* Under certain circumstances MPI ranks on the same physical node
712 * can not simultaneously access the same GPU(s). Therefore we run
713 * the detection only on one MPI rank per node and broadcast the info.
714 * Note that with thread-MPI only a single thread runs this code.
716 * TODO: We should also do CPU hardware detection only once on each
717 * physical node and broadcast it, instead of do it on every MPI rank.
719 #ifdef GMX_LIB_MPI
720 /* A split of MPI_COMM_WORLD over physical nodes is only required here,
721 * so we create and destroy it locally.
723 MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
724 MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(),
725 rank_world, &physicalnode_comm);
726 MPI_Comm_rank(physicalnode_comm, &rank_local);
727 #else
728 /* Here there should be only one process, check this */
729 GMX_RELEASE_ASSERT(cr->nnodes == 1 && cr->sim_nodeid == 0, "Only a single (master) process should execute here");
731 rank_local = 0;
732 #endif
734 if (rank_local == 0)
736 char detection_error[STRLEN] = "", sbuf[STRLEN];
738 if (detect_gpus(&hwinfo_g->gpu_info, detection_error) != 0)
740 if (detection_error[0] != '\0')
742 sprintf(sbuf, ":\n %s\n", detection_error);
744 else
746 sprintf(sbuf, ".");
748 md_print_warn(cr, fplog,
749 "NOTE: Error occurred during GPU detection%s"
750 " Can not use GPU acceleration, will fall back to CPU kernels.\n",
751 sbuf);
755 #ifdef GMX_LIB_MPI
756 /* Broadcast the GPU info to the other ranks within this node */
757 MPI_Bcast(&hwinfo_g->gpu_info.n_dev, 1, MPI_INT, 0, physicalnode_comm);
759 if (hwinfo_g->gpu_info.n_dev > 0)
761 int dev_size;
763 dev_size = hwinfo_g->gpu_info.n_dev*sizeof_gpu_dev_info();
765 if (rank_local > 0)
767 hwinfo_g->gpu_info.gpu_dev =
768 (struct gmx_device_info_t *)malloc(dev_size);
770 MPI_Bcast(hwinfo_g->gpu_info.gpu_dev, dev_size, MPI_BYTE,
771 0, physicalnode_comm);
772 MPI_Bcast(&hwinfo_g->gpu_info.n_dev_compatible, 1, MPI_INT,
773 0, physicalnode_comm);
776 MPI_Comm_free(&physicalnode_comm);
777 #endif
780 static void gmx_collect_hardware_mpi(const gmx::CpuInfo &cpuInfo)
782 #ifdef GMX_LIB_MPI
783 int rank_id;
784 int nrank, rank, ncore, nhwthread, ngpu, i;
785 int gpu_hash;
786 int *buf, *all;
788 rank_id = gmx_physicalnode_id_hash();
789 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
790 MPI_Comm_size(MPI_COMM_WORLD, &nrank);
791 ncore = hwinfo_g->ncore;
792 nhwthread = hwinfo_g->nthreads_hw_avail;
793 ngpu = hwinfo_g->gpu_info.n_dev_compatible;
794 /* Create a unique hash of the GPU type(s) in this node */
795 gpu_hash = 0;
796 /* Here it might be better to only loop over the compatible GPU, but we
797 * don't have that information available and it would also require
798 * removing the device ID from the device info string.
800 for (i = 0; i < hwinfo_g->gpu_info.n_dev; i++)
802 char stmp[STRLEN];
804 /* Since the device ID is incorporated in the hash, the order of
805 * the GPUs affects the hash. Also two identical GPUs won't give
806 * a gpu_hash of zero after XORing.
808 get_gpu_device_info_string(stmp, &hwinfo_g->gpu_info, i);
809 gpu_hash ^= gmx_string_fullhash_func(stmp, gmx_string_hash_init);
812 snew(buf, nrank);
813 snew(all, nrank);
814 buf[rank] = rank_id;
816 MPI_Allreduce(buf, all, nrank, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
818 gmx_bool bFound;
819 int nnode0, ncore0, nhwthread0, ngpu0, r;
821 bFound = FALSE;
822 ncore0 = 0;
823 nnode0 = 0;
824 nhwthread0 = 0;
825 ngpu0 = 0;
826 for (r = 0; r < nrank; r++)
828 if (all[r] == rank_id)
830 if (!bFound && r == rank)
832 /* We are the first rank in this physical node */
833 nnode0 = 1;
834 ncore0 = ncore;
835 nhwthread0 = nhwthread;
836 ngpu0 = ngpu;
838 bFound = TRUE;
842 sfree(buf);
843 sfree(all);
845 int sum[4], maxmin[10];
848 int buf[4];
850 /* Sum values from only intra-rank 0 so we get the sum over all nodes */
851 buf[0] = nnode0;
852 buf[1] = ncore0;
853 buf[2] = nhwthread0;
854 buf[3] = ngpu0;
856 MPI_Allreduce(buf, sum, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
860 int buf[10];
862 /* Store + and - values for all ranks,
863 * so we can get max+min with one MPI call.
865 buf[0] = ncore;
866 buf[1] = nhwthread;
867 buf[2] = ngpu;
868 buf[3] = static_cast<int>(gmx::simdSuggested(cpuInfo));
869 buf[4] = gpu_hash;
870 buf[5] = -buf[0];
871 buf[6] = -buf[1];
872 buf[7] = -buf[2];
873 buf[8] = -buf[3];
874 buf[9] = -buf[4];
876 MPI_Allreduce(buf, maxmin, 10, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
879 hwinfo_g->nphysicalnode = sum[0];
880 hwinfo_g->ncore_tot = sum[1];
881 hwinfo_g->ncore_min = -maxmin[5];
882 hwinfo_g->ncore_max = maxmin[0];
883 hwinfo_g->nhwthread_tot = sum[2];
884 hwinfo_g->nhwthread_min = -maxmin[6];
885 hwinfo_g->nhwthread_max = maxmin[1];
886 hwinfo_g->ngpu_compatible_tot = sum[3];
887 hwinfo_g->ngpu_compatible_min = -maxmin[7];
888 hwinfo_g->ngpu_compatible_max = maxmin[2];
889 hwinfo_g->simd_suggest_min = -maxmin[8];
890 hwinfo_g->simd_suggest_max = maxmin[3];
891 hwinfo_g->bIdenticalGPUs = (maxmin[4] == -maxmin[9]);
892 #else
893 /* All ranks use the same pointer, protect it with a mutex */
894 tMPI_Thread_mutex_lock(&hw_info_lock);
895 hwinfo_g->nphysicalnode = 1;
896 hwinfo_g->ncore_tot = hwinfo_g->ncore;
897 hwinfo_g->ncore_min = hwinfo_g->ncore;
898 hwinfo_g->ncore_max = hwinfo_g->ncore;
899 hwinfo_g->nhwthread_tot = hwinfo_g->nthreads_hw_avail;
900 hwinfo_g->nhwthread_min = hwinfo_g->nthreads_hw_avail;
901 hwinfo_g->nhwthread_max = hwinfo_g->nthreads_hw_avail;
902 hwinfo_g->ngpu_compatible_tot = hwinfo_g->gpu_info.n_dev_compatible;
903 hwinfo_g->ngpu_compatible_min = hwinfo_g->gpu_info.n_dev_compatible;
904 hwinfo_g->ngpu_compatible_max = hwinfo_g->gpu_info.n_dev_compatible;
905 hwinfo_g->simd_suggest_min = static_cast<int>(simdSuggested(cpuInfo));
906 hwinfo_g->simd_suggest_max = static_cast<int>(simdSuggested(cpuInfo));
907 hwinfo_g->bIdenticalGPUs = TRUE;
908 tMPI_Thread_mutex_unlock(&hw_info_lock);
909 #endif
912 gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
913 gmx_bool bDetectGPUs)
915 int ret;
917 /* make sure no one else is doing the same thing */
918 ret = tMPI_Thread_mutex_lock(&hw_info_lock);
919 if (ret != 0)
921 gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
924 /* only initialize the hwinfo structure if it is not already initalized */
925 if (n_hwinfo == 0)
927 snew(hwinfo_g, 1);
929 hwinfo_g->cpuInfo = new gmx::CpuInfo(gmx::CpuInfo::detect());
930 hwinfo_g->hardwareTopology = new gmx::HardwareTopology(gmx::HardwareTopology::detect());
932 /* get the number of cores, will be 0 when not detected */
933 hwinfo_g->ncore = get_ncores(*hwinfo_g->hardwareTopology);
935 /* detect number of hardware threads */
936 hwinfo_g->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
938 /* detect GPUs */
939 hwinfo_g->gpu_info.n_dev = 0;
940 hwinfo_g->gpu_info.n_dev_compatible = 0;
941 hwinfo_g->gpu_info.gpu_dev = NULL;
943 /* Run the detection if the binary was compiled with GPU support
944 * and we requested detection.
946 hwinfo_g->gpu_info.bDetectGPUs =
947 (bGPUBinary && bDetectGPUs &&
948 getenv("GMX_DISABLE_GPU_DETECTION") == NULL);
949 if (hwinfo_g->gpu_info.bDetectGPUs)
951 gmx_detect_gpus(fplog, cr);
954 /* increase the reference counter */
955 n_hwinfo++;
957 ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
958 if (ret != 0)
960 gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
963 gmx_collect_hardware_mpi(*hwinfo_g->cpuInfo);
965 return hwinfo_g;
968 static std::string detected_hardware_string(const gmx_hw_info_t *hwinfo,
969 bool bFullCpuInfo)
971 std::string s;
973 const gmx::CpuInfo &cpuInfo = *hwinfo_g->cpuInfo;
975 s = gmx::formatString("\n");
976 s += gmx::formatString("Running on %d node%s with total",
977 hwinfo->nphysicalnode,
978 hwinfo->nphysicalnode == 1 ? "" : "s");
979 if (hwinfo->ncore_tot > 0)
981 s += gmx::formatString(" %d cores,", hwinfo->ncore_tot);
983 s += gmx::formatString(" %d logical cores", hwinfo->nhwthread_tot);
984 if (hwinfo->gpu_info.bDetectGPUs)
986 s += gmx::formatString(", %d compatible GPU%s",
987 hwinfo->ngpu_compatible_tot,
988 hwinfo->ngpu_compatible_tot == 1 ? "" : "s");
990 else if (bGPUBinary)
992 s += gmx::formatString(" (GPU detection deactivated)");
994 s += gmx::formatString("\n");
996 if (hwinfo->nphysicalnode > 1)
998 /* Print per node hardware feature counts */
999 if (hwinfo->ncore_max > 0)
1001 s += gmx::formatString(" Cores per node: %2d", hwinfo->ncore_min);
1002 if (hwinfo->ncore_max > hwinfo->ncore_min)
1004 s += gmx::formatString(" - %2d", hwinfo->ncore_max);
1006 s += gmx::formatString("\n");
1008 s += gmx::formatString(" Logical cores per node: %2d", hwinfo->nhwthread_min);
1009 if (hwinfo->nhwthread_max > hwinfo->nhwthread_min)
1011 s += gmx::formatString(" - %2d", hwinfo->nhwthread_max);
1013 s += gmx::formatString("\n");
1014 if (bGPUBinary)
1016 s += gmx::formatString(" Compatible GPUs per node: %2d",
1017 hwinfo->ngpu_compatible_min);
1018 if (hwinfo->ngpu_compatible_max > hwinfo->ngpu_compatible_min)
1020 s += gmx::formatString(" - %2d", hwinfo->ngpu_compatible_max);
1022 s += gmx::formatString("\n");
1023 if (hwinfo->ngpu_compatible_tot > 0)
1025 if (hwinfo->bIdenticalGPUs)
1027 s += gmx::formatString(" All nodes have identical type(s) of GPUs\n");
1029 else
1031 /* This message will also appear with identical GPU types
1032 * when at least one node has no GPU.
1034 s += gmx::formatString(" Different nodes have different type(s) and/or order of GPUs\n");
1040 #ifdef GMX_LIB_MPI
1041 char host[HOSTNAMELEN];
1042 int rank;
1044 gmx_gethostname(host, HOSTNAMELEN);
1045 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
1047 s += gmx::formatString("Hardware detected on host %s (the node of MPI rank %d):\n",
1048 host, rank);
1049 #else
1050 s += gmx::formatString("Hardware detected:\n");
1051 #endif
1052 s += gmx::formatString(" CPU info:\n");
1054 s += gmx::formatString(" Vendor: %s\n", cpuInfo.vendorString().c_str());
1056 s += gmx::formatString(" Brand: %s\n", cpuInfo.brandString().c_str());
1058 if (bFullCpuInfo)
1060 s += gmx::formatString(" Family: %d Model: %d Stepping: %d\n",
1061 cpuInfo.family(), cpuInfo.model(), cpuInfo.stepping());
1063 s += gmx::formatString(" Features:");
1064 for (auto &f : cpuInfo.featureSet())
1066 s += gmx::formatString(" %s", cpuInfo.featureString(f).c_str());;
1068 s += gmx::formatString("\n");
1071 s += gmx::formatString(" SIMD instructions most likely to fit this hardware: %s",
1072 gmx::simdString(static_cast<gmx::SimdType>(hwinfo->simd_suggest_min)).c_str());
1074 if (hwinfo->simd_suggest_max > hwinfo->simd_suggest_min)
1076 s += gmx::formatString(" - %s", gmx::simdString(static_cast<gmx::SimdType>(hwinfo->simd_suggest_max)).c_str());
1078 s += gmx::formatString("\n");
1080 s += gmx::formatString(" SIMD instructions selected at GROMACS compile time: %s\n",
1081 gmx::simdString(gmx::simdCompiled()).c_str());
1083 if (bGPUBinary && (hwinfo->ngpu_compatible_tot > 0 ||
1084 hwinfo->gpu_info.n_dev > 0))
1086 s += gmx::formatString(" GPU info:\n");
1087 s += gmx::formatString(" Number of GPUs detected: %d\n",
1088 hwinfo->gpu_info.n_dev);
1089 if (hwinfo->gpu_info.n_dev > 0)
1091 char buf[STRLEN];
1093 sprint_gpus(buf, &hwinfo->gpu_info);
1094 s += gmx::formatString("%s\n", buf);
1097 return s;
1100 void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr,
1101 const gmx_hw_info_t *hwinfo)
1103 const gmx::CpuInfo &cpuInfo = *hwinfo_g->cpuInfo;
1105 if (fplog != NULL)
1107 std::string detected;
1109 detected = detected_hardware_string(hwinfo, TRUE);
1111 fprintf(fplog, "%s\n", detected.c_str());
1114 if (MULTIMASTER(cr))
1116 std::string detected;
1118 detected = detected_hardware_string(hwinfo, FALSE);
1120 fprintf(stderr, "%s\n", detected.c_str());
1123 /* Check the compiled SIMD instruction set against that of the node
1124 * with the lowest SIMD level support (skip if SIMD detection did not work)
1126 if (cpuInfo.supportLevel() >= gmx::CpuInfo::SupportLevel::Features)
1128 gmx::simdCheck(static_cast<gmx::SimdType>(hwinfo->simd_suggest_min), fplog, MULTIMASTER(cr));
1131 /* For RDTSCP we only check on our local node and skip the MPI reduction */
1132 check_use_of_rdtscp_on_this_cpu(fplog, cr, cpuInfo);
1135 //! \brief Return if any GPU ID (e.g in a user-supplied string) is repeated
1136 static gmx_bool anyGpuIdIsRepeated(const gmx_gpu_opt_t *gpu_opt)
1138 /* Loop over IDs in the string */
1139 for (int i = 0; i < gpu_opt->n_dev_use - 1; ++i)
1141 /* Look for the ID in location i in the following part of the
1142 string */
1143 for (int j = i + 1; j < gpu_opt->n_dev_use; ++j)
1145 if (gpu_opt->dev_use[i] == gpu_opt->dev_use[j])
1147 /* Same ID found in locations i and j */
1148 return TRUE;
1153 return FALSE;
1156 void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
1158 char *env;
1160 if (gpu_opt->gpu_id != NULL && !bGPUBinary)
1162 gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!",
1163 gmx::getProgramContext().displayName());
1166 env = getenv("GMX_GPU_ID");
1167 if (env != NULL && gpu_opt->gpu_id != NULL)
1169 gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
1171 if (env == NULL)
1173 env = gpu_opt->gpu_id;
1176 /* parse GPU IDs if the user passed any */
1177 if (env != NULL)
1179 /* Parse a "plain" or comma-separated GPU ID string which contains a
1180 * sequence of digits corresponding to GPU IDs; the order will
1181 * indicate the process/tMPI thread - GPU assignment. */
1182 parse_digits_from_string(env, &gpu_opt->n_dev_use, &gpu_opt->dev_use);
1184 if (!gmx_multiple_gpu_per_node_supported() && 1 < gpu_opt->n_dev_use)
1186 gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per node", getGpuImplementationString());
1188 if (!gmx_gpu_sharing_supported() && anyGpuIdIsRepeated(gpu_opt))
1190 gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per GPU", getGpuImplementationString());
1192 if (gpu_opt->n_dev_use == 0)
1194 gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n",
1195 invalid_gpuid_hint);
1198 gpu_opt->bUserSet = TRUE;
1202 void gmx_select_gpu_ids(FILE *fplog, const t_commrec *cr,
1203 const gmx_gpu_info_t *gpu_info,
1204 gmx_bool bForceUseGPU,
1205 gmx_gpu_opt_t *gpu_opt)
1207 int i;
1208 char sbuf[STRLEN], stmp[STRLEN];
1210 /* Bail if binary is not compiled with GPU acceleration, but this is either
1211 * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
1212 if (bForceUseGPU && !bGPUBinary)
1214 gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!",
1215 gmx::getProgramContext().displayName());
1218 if (!(cr->duty & DUTY_PP))
1220 /* Our rank is not doing PP, we don't use a GPU */
1221 return;
1224 if (gpu_opt->bUserSet)
1226 /* Check the GPU IDs passed by the user.
1227 * (GPU IDs have been parsed by gmx_parse_gpu_ids before)
1229 int *checkres;
1230 int res;
1232 snew(checkres, gpu_opt->n_dev_use);
1234 res = check_selected_gpus(checkres, gpu_info, gpu_opt);
1236 if (!res)
1238 print_gpu_detection_stats(fplog, gpu_info, cr);
1240 sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
1241 for (i = 0; i < gpu_opt->n_dev_use; i++)
1243 if (checkres[i] != egpuCompatible)
1245 sprintf(stmp, " GPU #%d: %s\n",
1246 gpu_opt->dev_use[i],
1247 gpu_detect_res_str[checkres[i]]);
1248 strcat(sbuf, stmp);
1251 gmx_fatal(FARGS, "%s", sbuf);
1254 sfree(checkres);
1256 else if (getenv("GMX_EMULATE_GPU") == NULL)
1258 pick_compatible_gpus(&hwinfo_g->gpu_info, gpu_opt);
1259 set_gpu_ids(gpu_opt, cr->nrank_pp_intranode, cr->rank_pp_intranode);
1262 /* If the user asked for a GPU, check whether we have a GPU */
1263 if (bForceUseGPU && gpu_info->n_dev_compatible == 0)
1265 gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
1269 /* Select the GPUs we will use. This is an operation local to each physical
1270 * node. If we have less MPI ranks than GPUs, we will waste some GPUs.
1271 * nrank and rank are the rank count and id for PP processes in our node.
1273 static void set_gpu_ids(gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
1275 GMX_RELEASE_ASSERT(gpu_opt, "Invalid gpu_opt pointer passed");
1276 GMX_RELEASE_ASSERT(nrank >= 1,
1277 gmx::formatString("Invalid limit (%d) for the number of GPUs (detected %d compatible GPUs)",
1278 rank, gpu_opt->n_dev_compatible).c_str());
1280 if (gpu_opt->n_dev_compatible == 0)
1282 char host[HOSTNAMELEN];
1284 gmx_gethostname(host, HOSTNAMELEN);
1285 gmx_fatal(FARGS, "A GPU was requested on host %s, but no compatible GPUs were detected. All nodes with PP ranks need to have GPUs. If you intended to use GPU acceleration in a parallel run, you can either avoid using the nodes that don't have GPUs or place PME ranks on these nodes.", host);
1288 int nshare;
1290 nshare = 1;
1291 if (nrank > gpu_opt->n_dev_compatible)
1293 if (nrank % gpu_opt->n_dev_compatible == 0)
1295 nshare = gmx_gpu_sharing_supported() ? nrank/gpu_opt->n_dev_compatible : 1;
1297 else
1299 if (rank == 0)
1301 gmx_fatal(FARGS, "The number of MPI ranks (%d) in a physical node is not a multiple of the number of GPUs (%d). Select a different number of MPI ranks or use the -gpu_id option to manually specify the GPU to be used.",
1302 nrank, gpu_opt->n_dev_compatible);
1305 #ifdef GMX_MPI
1306 /* We use a global barrier to prevent ranks from continuing with
1307 * an invalid setup.
1309 MPI_Barrier(MPI_COMM_WORLD);
1310 #endif
1314 /* Here we will waste GPUs when nrank < gpu_opt->n_dev_compatible */
1315 gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_compatible*nshare, nrank);
1316 if (!gmx_multiple_gpu_per_node_supported())
1318 gpu_opt->n_dev_use = std::min(gpu_opt->n_dev_use, 1);
1320 snew(gpu_opt->dev_use, gpu_opt->n_dev_use);
1321 for (int i = 0; i != gpu_opt->n_dev_use; ++i)
1323 /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
1324 gpu_opt->dev_use[i] = gpu_opt->dev_compatible[i/nshare];
1328 void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
1330 int ret;
1332 ret = tMPI_Thread_mutex_lock(&hw_info_lock);
1333 if (ret != 0)
1335 gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
1338 /* decrease the reference counter */
1339 n_hwinfo--;
1342 if (hwinfo != hwinfo_g)
1344 gmx_incons("hwinfo < hwinfo_g");
1347 if (n_hwinfo < 0)
1349 gmx_incons("n_hwinfo < 0");
1352 if (n_hwinfo == 0)
1354 delete hwinfo_g->cpuInfo;
1355 delete hwinfo_g->hardwareTopology;
1356 free_gpu_info(&hwinfo_g->gpu_info);
1357 sfree(hwinfo_g);
1360 ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
1361 if (ret != 0)
1363 gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));