1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of GROMACS.
7 * Written by the Gromacs development team under coordination of
8 * David van der Spoel, Berk Hess, and Erik Lindahl.
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * To help us fund GROMACS development, we humbly ask that you cite
16 * the research papers on the package. Check out http://www.gromacs.org
19 * GROup of MAchos and Cynical Suckers
29 #include "types/enums.h"
30 #include "types/hw_info.h"
31 #include "types/commrec.h"
32 #include "gmx_fatal.h"
33 #include "gmx_fatal_collective.h"
35 #include "gpu_utils.h"
37 #include "gmx_detect_hardware.h"
39 #include "md_logging.h"
41 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
45 /* Although we can't have more than 10 GPU different ID-s passed by the user as
46 * the id-s are assumed to be represented by single digits, as multiple
47 * processes can share a GPU, we can end up with more than 10 IDs.
48 * To account for potential extreme cases we'll set the limit to a pretty
49 * ridiculous number. */
50 static unsigned int max_gpu_ids_user
= 64;
53 void limit_num_gpus_used(gmx_hw_info_t
*hwinfo
, int count
);
55 static void sprint_gpus(char *sbuf
, const gmx_gpu_info_t
*gpu_info
, gmx_bool bPrintAll
)
60 ndev
= gpu_info
->ncuda_dev
;
63 for (i
= 0; i
< ndev
; i
++)
65 get_gpu_device_info_string(stmp
, gpu_info
, i
);
75 static void print_gpu_detection_stats(FILE *fplog
,
76 const gmx_gpu_info_t
*gpu_info
,
79 char onhost
[266],stmp
[STRLEN
];
82 ngpu
= gpu_info
->ncuda_dev
;
84 #if defined GMX_MPI && !defined GMX_THREAD_MPI
85 /* We only print the detection on one, of possibly multiple, nodes */
86 strncpy(onhost
," on host ",10);
87 gmx_gethostname(onhost
+9,256);
89 /* We detect all relevant GPUs */
95 sprint_gpus(stmp
, gpu_info
, TRUE
);
96 md_print_warn(cr
, fplog
, "%d GPU%s detected%s:\n%s\n",
97 ngpu
, (ngpu
> 1) ? "s" : "", onhost
, stmp
);
101 md_print_warn(cr
, fplog
, "No GPUs detected%s\n", onhost
);
105 static void print_gpu_use_stats(FILE *fplog
,
106 const gmx_gpu_info_t
*gpu_info
,
109 char sbuf
[STRLEN
], stmp
[STRLEN
];
110 int i
, ngpu
, ngpu_all
;
112 ngpu
= gpu_info
->ncuda_dev_use
;
113 ngpu_all
= gpu_info
->ncuda_dev
;
115 /* Issue note if GPUs are available but not used */
116 if (ngpu_all
> 0 && ngpu
< 1)
119 "%d compatible GPU%s detected in the system, but none will be used.\n"
120 "Consider trying GPU acceleration with the Verlet scheme!",
121 ngpu_all
, (ngpu_all
> 1) ? "s" : "");
125 sprintf(sbuf
, "%d GPU%s %sselected to be used for this run: ",
126 ngpu
, (ngpu
> 1) ? "s" : "",
127 gpu_info
->bUserSet
? "user-" : "auto-");
128 for (i
= 0; i
< ngpu
; i
++)
130 sprintf(stmp
, "#%d", get_gpu_device_id(gpu_info
, i
));
138 md_print_info(cr
, fplog
, "%s\n\n", sbuf
);
141 /* Parse a "plain" GPU ID string which contains a sequence of digits corresponding
142 * to GPU IDs; the order will indicate the process/tMPI thread - GPU assignment. */
143 static void parse_gpu_id_plain_string(const char *idstr
, int *nid
, int *idlist
)
148 len_idstr
= strlen(idstr
);
150 if (len_idstr
> max_gpu_ids_user
)
152 gmx_fatal(FARGS
,"%d GPU IDs provided, but only at most %d are supported",
153 len_idstr
, max_gpu_ids_user
);
158 for (i
= 0; i
< *nid
; i
++)
160 if (idstr
[i
] < '0' || idstr
[i
] > '9')
162 gmx_fatal(FARGS
, "Invalid character in GPU ID string: '%c'\n", idstr
[i
]);
164 idlist
[i
] = idstr
[i
] - '0';
168 static void parse_gpu_id_csv_string(const char *idstr
, int *nid
, int *idlist
)
170 /* XXX implement cvs format to support more than 10 different GPUs in a box. */
171 gmx_incons("Not implemented yet");
174 void gmx_check_hw_runconf_consistency(FILE *fplog
, gmx_hw_info_t
*hwinfo
,
175 const t_commrec
*cr
, int ntmpi_requested
,
178 int npppn
, ntmpi_pp
, ngpu
;
179 char sbuf
[STRLEN
], th_or_proc
[STRLEN
], th_or_proc_plural
[STRLEN
], pernode
[STRLEN
];
181 gmx_bool bGPUBin
, btMPI
, bMPI
, bMaxMpiThreadsSet
, bNthreadsAuto
, bEmulateGPU
;
186 btMPI
= bMPI
= FALSE
;
187 bNthreadsAuto
= FALSE
;
188 #if defined(GMX_THREAD_MPI)
190 bNthreadsAuto
= (ntmpi_requested
< 1);
191 #elif defined(GMX_LIB_MPI)
201 /* GPU emulation detection is done later, but we need here as well
202 * -- uncool, but there's no elegant workaround */
203 bEmulateGPU
= (getenv("GMX_EMULATE_GPU") != NULL
);
204 bMaxMpiThreadsSet
= (getenv("GMX_MAX_MPI_THREADS") != NULL
);
208 /* check the acceleration mdrun is compiled with against hardware capabilities */
209 /* TODO: Here we assume homogeneous hardware which is not necessarily the case!
210 * Might not hurt to add an extra check over MPI. */
211 gmx_cpuid_acceleration_check(hwinfo
->cpuid_info
, fplog
);
214 /* Below we only do consistency checks for PP and GPUs,
215 * this is irrelevant for PME only nodes, so in that case we return here.
217 if (!(cr
->duty
& DUTY_PP
))
222 /* Need to ensure that we have enough GPUs:
223 * - need one GPU per PP node
224 * - no GPU oversubscription with tMPI
225 * => keep on the GPU support, otherwise turn off (or bail if forced)
227 /* number of PP processes per node */
228 npppn
= cr
->nnodes_pp_intra
;
231 th_or_proc_plural
[0] = '\0';
234 sprintf(th_or_proc
, "thread-MPI thread");
237 sprintf(th_or_proc_plural
, "s");
242 sprintf(th_or_proc
, "MPI process");
245 sprintf(th_or_proc_plural
, "es");
247 sprintf(pernode
, " per node");
251 /* neither MPI nor tMPI */
252 sprintf(th_or_proc
, "process");
257 print_gpu_detection_stats(fplog
, &hwinfo
->gpu_info
, cr
);
260 if (bUseGPU
&& hwinfo
->bCanUseGPU
&& !bEmulateGPU
)
262 ngpu
= hwinfo
->gpu_info
.ncuda_dev_use
;
263 sprintf(gpu_plural
, "%s", (ngpu
> 1) ? "s" : "");
265 /* number of tMPI threads atuo-adjusted */
266 if (btMPI
&& bNthreadsAuto
&& SIMMASTER(cr
))
270 if (hwinfo
->gpu_info
.bUserSet
)
272 /* The user manually provided more GPUs than threads we could
273 * automatically start. */
275 "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
276 "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
277 ngpu
, gpu_plural
, npppn
, th_or_proc_plural
,
278 ShortProgram(), bMaxMpiThreadsSet
? "\nor allow more threads to be used" : "");
282 /* There are more GPUs than tMPI threads; we have to limit the number GPUs used. */
283 md_print_warn(cr
,fplog
,
284 "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
285 " %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
286 ngpu
, gpu_plural
, npppn
, th_or_proc_plural
,
287 ShortProgram(), npppn
, npppn
> 1 ? "s" : "",
288 bMaxMpiThreadsSet
? "\n Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
290 if (cr
->nodeid_intra
== 0)
292 limit_num_gpus_used(hwinfo
, npppn
);
293 ngpu
= hwinfo
->gpu_info
.ncuda_dev_use
;
294 sprintf(gpu_plural
, "%s", (ngpu
> 1) ? "s" : "");
302 if (hwinfo
->gpu_info
.bUserSet
)
305 "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
306 "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
307 th_or_proc
, btMPI
? "s" : "es" , pernode
,
308 ShortProgram(), npppn
, th_or_proc
, th_or_proc_plural
, pernode
, ngpu
, gpu_plural
);
314 md_print_warn(cr
,fplog
,
315 "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
316 " PP %s%s%s than GPU%s available.\n"
317 " Each PP %s can only use one GPU, so only %d GPU%s%s will be used.",
319 th_or_proc
, th_or_proc_plural
, pernode
, gpu_plural
,
320 th_or_proc
, npppn
, gpu_plural
, pernode
);
322 if (bMPI
|| (btMPI
&& cr
->nodeid_intra
== 0))
324 limit_num_gpus_used(hwinfo
, npppn
);
325 ngpu
= hwinfo
->gpu_info
.ncuda_dev_use
;
326 sprintf(gpu_plural
, "%s", (ngpu
> 1) ? "s" : "");
331 /* Avoid duplicate error messages.
332 * Unfortunately we can only do this at the physical node
333 * level, since the hardware setup and MPI process count
334 * might be differ over physical nodes.
336 if (cr
->nodeid_intra
== 0)
339 "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
340 "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
341 th_or_proc
, btMPI
? "s" : "es" , pernode
,
342 ShortProgram(), npppn
, th_or_proc
, th_or_proc_plural
, pernode
, ngpu
, gpu_plural
);
347 /* Avoid other ranks to continue after inconsistency */
348 MPI_Barrier(cr
->mpi_comm_mygroup
);
355 if (hwinfo
->gpu_info
.bUserSet
&& (cr
->nodeid_intra
== 0))
357 int i
, j
, same_count
;
358 gmx_bool bSomeSame
, bAllDifferent
;
362 bAllDifferent
= TRUE
;
364 for (i
= 0; i
< ngpu
- 1; i
++)
366 for (j
= i
+ 1; j
< ngpu
; j
++)
368 bSomeSame
|= hwinfo
->gpu_info
.cuda_dev_use
[i
] == hwinfo
->gpu_info
.cuda_dev_use
[j
];
369 bAllDifferent
&= hwinfo
->gpu_info
.cuda_dev_use
[i
] != hwinfo
->gpu_info
.cuda_dev_use
[j
];
370 same_count
+= hwinfo
->gpu_info
.cuda_dev_use
[i
] == hwinfo
->gpu_info
.cuda_dev_use
[j
];
374 if (btMPI
&& !bAllDifferent
)
377 "Invalid GPU assignment: can't share a GPU among multiple thread-MPI threads.\n"
378 "Use MPI if you are sure that you want to assign GPU to multiple threads.");
383 md_print_warn(cr
,fplog
,
384 "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
385 " multiple %s%s; this should be avoided as it generally\n"
386 " causes performance loss.",
387 same_count
> 1 ? "GPUs" : "a GPU", th_or_proc
, btMPI
? "s" : "es");
390 print_gpu_use_stats(fplog
, &hwinfo
->gpu_info
, cr
);
394 /* Return the number of hardware threads supported by the current CPU.
395 * We assume that this is equal with the number of CPUs reported to be
396 * online by the OS at the time of the call.
398 static int get_nthreads_hw_avail(FILE *fplog
, const t_commrec
*cr
)
402 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
405 GetSystemInfo( &sysinfo
);
406 ret
= sysinfo
.dwNumberOfProcessors
;
407 #elif defined HAVE_SYSCONF
408 /* We are probably on Unix.
409 * Now check if we have the argument to use before executing the call
411 #if defined(_SC_NPROCESSORS_ONLN)
412 ret
= sysconf(_SC_NPROCESSORS_ONLN
);
413 #elif defined(_SC_NPROC_ONLN)
414 ret
= sysconf(_SC_NPROC_ONLN
);
415 #elif defined(_SC_NPROCESSORS_CONF)
416 ret
= sysconf(_SC_NPROCESSORS_CONF
);
417 #elif defined(_SC_NPROC_CONF)
418 ret
= sysconf(_SC_NPROC_CONF
);
419 #endif /* End of check for sysconf argument values */
422 /* Neither windows nor Unix. No fscking idea how many CPUs we have! */
428 fprintf(debug
, "Detected %d processors, will use this as the number "
429 "of supported hardware threads.\n", ret
);
433 if (ret
!= gmx_omp_get_num_procs())
435 md_print_warn(cr
, fplog
,
436 "Number of CPUs detected (%d) does not match the number reported by OpenMP (%d).\n"
437 "Consider setting the launch configuration manually!",
438 ret
, gmx_omp_get_num_procs());
445 void gmx_detect_hardware(FILE *fplog
, gmx_hw_info_t
*hwinfo
,
447 gmx_bool bForceUseGPU
, gmx_bool bTryUseGPU
,
452 char sbuf
[STRLEN
], stmp
[STRLEN
];
454 gmx_gpu_info_t gpuinfo_auto
, gpuinfo_user
;
459 /* detect CPUID info; no fuss, we don't detect system-wide
460 * -- sloppy, but that's it for now */
461 if (gmx_cpuid_init(&hwinfo
->cpuid_info
) != 0)
463 gmx_fatal_collective(FARGS
, cr
, NULL
, "CPUID detection failed!");
466 /* detect number of hardware threads */
467 hwinfo
->nthreads_hw_avail
= get_nthreads_hw_avail(fplog
, cr
);
470 hwinfo
->gpu_info
.ncuda_dev_use
= 0;
471 hwinfo
->gpu_info
.cuda_dev_use
= NULL
;
472 hwinfo
->gpu_info
.ncuda_dev
= 0;
473 hwinfo
->gpu_info
.cuda_dev
= NULL
;
481 /* Bail if binary is not compiled with GPU on */
482 if (bForceUseGPU
&& !bGPUBin
)
484 gmx_fatal_collective(FARGS
, cr
, NULL
, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
487 /* run the detection if the binary was compiled with GPU support */
488 if (bGPUBin
&& getenv("GMX_DISABLE_GPU_DETECTION")==NULL
)
490 detect_cuda_gpus(&hwinfo
->gpu_info
);
493 if (bForceUseGPU
|| bTryUseGPU
)
495 env
= getenv("GMX_GPU_ID");
496 if (env
!= NULL
&& gpu_id
!= NULL
)
498 gmx_fatal(FARGS
,"GMX_GPU_ID and -gpu_id can not be used at the same time");
505 /* parse GPU IDs if the user passed any */
508 int *gpuid
, *checkres
;
511 snew(gpuid
, max_gpu_ids_user
);
512 snew(checkres
, max_gpu_ids_user
);
514 parse_gpu_id_plain_string(env
, &nid
, gpuid
);
518 gmx_fatal(FARGS
, "Empty GPU ID string passed\n");
521 res
= check_select_cuda_gpus(checkres
, &hwinfo
->gpu_info
, gpuid
, nid
);
525 print_gpu_detection_stats(fplog
, &hwinfo
->gpu_info
, cr
);
527 sprintf(sbuf
, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
528 for (i
= 0; i
< nid
; i
++)
530 if (checkres
[i
] != egpuCompatible
)
532 sprintf(stmp
, " GPU #%d: %s\n",
533 gpuid
[i
], gpu_detect_res_str
[checkres
[i
]]);
537 gmx_fatal(FARGS
, "%s", sbuf
);
540 hwinfo
->gpu_info
.bUserSet
= TRUE
;
547 pick_compatible_gpus(&hwinfo
->gpu_info
);
548 hwinfo
->gpu_info
.bUserSet
= FALSE
;
551 /* decide whether we can use GPU */
552 hwinfo
->bCanUseGPU
= (hwinfo
->gpu_info
.ncuda_dev_use
> 0);
553 if (!hwinfo
->bCanUseGPU
&& bForceUseGPU
)
555 gmx_fatal(FARGS
, "GPU acceleration requested, but no compatible GPUs were detected.");
560 void limit_num_gpus_used(gmx_hw_info_t
*hwinfo
, int count
)
566 ndev_use
= hwinfo
->gpu_info
.ncuda_dev_use
;
568 if (count
> ndev_use
)
570 /* won't increase the # of GPUs */
577 sprintf(sbuf
, "Limiting the number of GPUs to <1 doesn't make sense (detected %d, %d requested)!",
582 /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
583 hwinfo
->gpu_info
.ncuda_dev_use
= count
;
586 void gmx_hardware_info_free(gmx_hw_info_t
*hwinfo
)
590 gmx_cpuid_done(hwinfo
->cpuid_info
);
591 free_gpu_info(&hwinfo
->gpu_info
);