added Verlet scheme and NxN non-bonded functionality
[gromacs.git] / src / gmxlib / gmx_detect_hardware.c
blob6fe77d759ef9f8e3a9f5fd847b4c53172daafac3
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This file is part of GROMACS.
5 * Copyright (c) 2012-
7 * Written by the Gromacs development team under coordination of
8 * David van der Spoel, Berk Hess, and Erik Lindahl.
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * To help us fund GROMACS development, we humbly ask that you cite
16 * the research papers on the package. Check out http://www.gromacs.org
18 * And Hey:
19 * GROup of MAchos and Cynical Suckers
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
25 #include <stdlib.h>
26 #include <assert.h>
27 #include <string.h>
29 #include "types/enums.h"
30 #include "types/hw_info.h"
31 #include "types/commrec.h"
32 #include "gmx_fatal.h"
33 #include "gmx_fatal_collective.h"
34 #include "smalloc.h"
35 #include "gpu_utils.h"
36 #include "statutil.h"
37 #include "gmx_detect_hardware.h"
38 #include "main.h"
39 #include "md_logging.h"
41 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
42 #include "windows.h"
43 #endif
45 /* Although we can't have more than 10 GPU different ID-s passed by the user as
46 * the id-s are assumed to be represented by single digits, as multiple
47 * processes can share a GPU, we can end up with more than 10 IDs.
48 * To account for potential extreme cases we'll set the limit to a pretty
49 * ridiculous number. */
50 static unsigned int max_gpu_ids_user = 64;
52 /* FW decl. */
53 void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count);
55 static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info, gmx_bool bPrintAll)
57 int i, ndev;
58 char stmp[STRLEN];
60 ndev = gpu_info->ncuda_dev;
62 sbuf[0] = '\0';
63 for (i = 0; i < ndev; i++)
65 get_gpu_device_info_string(stmp, gpu_info, i);
66 strcat(sbuf, " ");
67 strcat(sbuf, stmp);
68 if (i < ndev - 1)
70 strcat(sbuf, "\n");
75 static void print_gpu_detection_stats(FILE *fplog,
76 const gmx_gpu_info_t *gpu_info,
77 const t_commrec *cr)
79 char onhost[266],stmp[STRLEN];
80 int ngpu;
82 ngpu = gpu_info->ncuda_dev;
84 #if defined GMX_MPI && !defined GMX_THREAD_MPI
85 /* We only print the detection on one, of possibly multiple, nodes */
86 strncpy(onhost," on host ",10);
87 gmx_gethostname(onhost+9,256);
88 #else
89 /* We detect all relevant GPUs */
90 strncpy(onhost,"",1);
91 #endif
93 if (ngpu > 0)
95 sprint_gpus(stmp, gpu_info, TRUE);
96 md_print_warn(cr, fplog, "%d GPU%s detected%s:\n%s\n",
97 ngpu, (ngpu > 1) ? "s" : "", onhost, stmp);
99 else
101 md_print_warn(cr, fplog, "No GPUs detected%s\n", onhost);
105 static void print_gpu_use_stats(FILE *fplog,
106 const gmx_gpu_info_t *gpu_info,
107 const t_commrec *cr)
109 char sbuf[STRLEN], stmp[STRLEN];
110 int i, ngpu, ngpu_all;
112 ngpu = gpu_info->ncuda_dev_use;
113 ngpu_all = gpu_info->ncuda_dev;
115 /* Issue note if GPUs are available but not used */
116 if (ngpu_all > 0 && ngpu < 1)
118 sprintf(sbuf,
119 "%d compatible GPU%s detected in the system, but none will be used.\n"
120 "Consider trying GPU acceleration with the Verlet scheme!",
121 ngpu_all, (ngpu_all > 1) ? "s" : "");
123 else
125 sprintf(sbuf, "%d GPU%s %sselected to be used for this run: ",
126 ngpu, (ngpu > 1) ? "s" : "",
127 gpu_info->bUserSet ? "user-" : "auto-");
128 for (i = 0; i < ngpu; i++)
130 sprintf(stmp, "#%d", get_gpu_device_id(gpu_info, i));
131 if (i < ngpu - 1)
133 strcat(stmp, ", ");
135 strcat(sbuf, stmp);
138 md_print_info(cr, fplog, "%s\n\n", sbuf);
141 /* Parse a "plain" GPU ID string which contains a sequence of digits corresponding
142 * to GPU IDs; the order will indicate the process/tMPI thread - GPU assignment. */
143 static void parse_gpu_id_plain_string(const char *idstr, int *nid, int *idlist)
145 int i;
146 size_t len_idstr;
148 len_idstr = strlen(idstr);
150 if (len_idstr > max_gpu_ids_user)
152 gmx_fatal(FARGS,"%d GPU IDs provided, but only at most %d are supported",
153 len_idstr, max_gpu_ids_user);
156 *nid = len_idstr;
158 for (i = 0; i < *nid; i++)
160 if (idstr[i] < '0' || idstr[i] > '9')
162 gmx_fatal(FARGS, "Invalid character in GPU ID string: '%c'\n", idstr[i]);
164 idlist[i] = idstr[i] - '0';
168 static void parse_gpu_id_csv_string(const char *idstr, int *nid, int *idlist)
170 /* XXX implement cvs format to support more than 10 different GPUs in a box. */
171 gmx_incons("Not implemented yet");
174 void gmx_check_hw_runconf_consistency(FILE *fplog, gmx_hw_info_t *hwinfo,
175 const t_commrec *cr, int ntmpi_requested,
176 gmx_bool bUseGPU)
178 int npppn, ntmpi_pp, ngpu;
179 char sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
180 char gpu_plural[2];
181 gmx_bool bGPUBin, btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
183 assert(hwinfo);
184 assert(cr);
186 btMPI = bMPI = FALSE;
187 bNthreadsAuto = FALSE;
188 #if defined(GMX_THREAD_MPI)
189 btMPI = TRUE;
190 bNthreadsAuto = (ntmpi_requested < 1);
191 #elif defined(GMX_LIB_MPI)
192 bMPI = TRUE;
193 #endif
195 #ifdef GMX_GPU
196 bGPUBin = TRUE;
197 #else
198 bGPUBin = FALSE;
199 #endif
201 /* GPU emulation detection is done later, but we need here as well
202 * -- uncool, but there's no elegant workaround */
203 bEmulateGPU = (getenv("GMX_EMULATE_GPU") != NULL);
204 bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
206 if (SIMMASTER(cr))
208 /* check the acceleration mdrun is compiled with against hardware capabilities */
209 /* TODO: Here we assume homogeneous hardware which is not necessarily the case!
210 * Might not hurt to add an extra check over MPI. */
211 gmx_cpuid_acceleration_check(hwinfo->cpuid_info, fplog);
214 /* Below we only do consistency checks for PP and GPUs,
215 * this is irrelevant for PME only nodes, so in that case we return here.
217 if (!(cr->duty & DUTY_PP))
219 return;
222 /* Need to ensure that we have enough GPUs:
223 * - need one GPU per PP node
224 * - no GPU oversubscription with tMPI
225 * => keep on the GPU support, otherwise turn off (or bail if forced)
226 * */
227 /* number of PP processes per node */
228 npppn = cr->nnodes_pp_intra;
230 pernode[0] = '\0';
231 th_or_proc_plural[0] = '\0';
232 if (btMPI)
234 sprintf(th_or_proc, "thread-MPI thread");
235 if (npppn > 1)
237 sprintf(th_or_proc_plural, "s");
240 else if (bMPI)
242 sprintf(th_or_proc, "MPI process");
243 if (npppn > 1)
245 sprintf(th_or_proc_plural, "es");
247 sprintf(pernode, " per node");
249 else
251 /* neither MPI nor tMPI */
252 sprintf(th_or_proc, "process");
255 if (bGPUBin)
257 print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
260 if (bUseGPU && hwinfo->bCanUseGPU && !bEmulateGPU)
262 ngpu = hwinfo->gpu_info.ncuda_dev_use;
263 sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
265 /* number of tMPI threads atuo-adjusted */
266 if (btMPI && bNthreadsAuto && SIMMASTER(cr))
268 if (npppn < ngpu)
270 if (hwinfo->gpu_info.bUserSet)
272 /* The user manually provided more GPUs than threads we could
273 * automatically start. */
274 gmx_fatal(FARGS,
275 "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
276 "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
277 ngpu, gpu_plural, npppn, th_or_proc_plural,
278 ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
280 else
282 /* There are more GPUs than tMPI threads; we have to limit the number GPUs used. */
283 md_print_warn(cr,fplog,
284 "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
285 " %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
286 ngpu, gpu_plural, npppn, th_or_proc_plural,
287 ShortProgram(), npppn, npppn > 1 ? "s" : "",
288 bMaxMpiThreadsSet ? "\n Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
290 if (cr->nodeid_intra == 0)
292 limit_num_gpus_used(hwinfo, npppn);
293 ngpu = hwinfo->gpu_info.ncuda_dev_use;
294 sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
300 if (ngpu != npppn)
302 if (hwinfo->gpu_info.bUserSet)
304 gmx_fatal(FARGS,
305 "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
306 "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
307 th_or_proc, btMPI ? "s" : "es" , pernode,
308 ShortProgram(), npppn, th_or_proc, th_or_proc_plural, pernode, ngpu, gpu_plural);
310 else
312 if (ngpu > npppn)
314 md_print_warn(cr,fplog,
315 "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
316 " PP %s%s%s than GPU%s available.\n"
317 " Each PP %s can only use one GPU, so only %d GPU%s%s will be used.",
318 ShortProgram(),
319 th_or_proc, th_or_proc_plural, pernode, gpu_plural,
320 th_or_proc, npppn, gpu_plural, pernode);
322 if (bMPI || (btMPI && cr->nodeid_intra == 0))
324 limit_num_gpus_used(hwinfo, npppn);
325 ngpu = hwinfo->gpu_info.ncuda_dev_use;
326 sprintf(gpu_plural, "%s", (ngpu > 1) ? "s" : "");
329 else
331 /* Avoid duplicate error messages.
332 * Unfortunately we can only do this at the physical node
333 * level, since the hardware setup and MPI process count
334 * might be differ over physical nodes.
336 if (cr->nodeid_intra == 0)
338 gmx_fatal(FARGS,
339 "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
340 "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
341 th_or_proc, btMPI ? "s" : "es" , pernode,
342 ShortProgram(), npppn, th_or_proc, th_or_proc_plural, pernode, ngpu, gpu_plural);
344 #ifdef GMX_MPI
345 else
347 /* Avoid other ranks to continue after inconsistency */
348 MPI_Barrier(cr->mpi_comm_mygroup);
350 #endif
355 if (hwinfo->gpu_info.bUserSet && (cr->nodeid_intra == 0))
357 int i, j, same_count;
358 gmx_bool bSomeSame, bAllDifferent;
360 same_count = 0;
361 bSomeSame = FALSE;
362 bAllDifferent = TRUE;
364 for (i = 0; i < ngpu - 1; i++)
366 for (j = i + 1; j < ngpu; j++)
368 bSomeSame |= hwinfo->gpu_info.cuda_dev_use[i] == hwinfo->gpu_info.cuda_dev_use[j];
369 bAllDifferent &= hwinfo->gpu_info.cuda_dev_use[i] != hwinfo->gpu_info.cuda_dev_use[j];
370 same_count += hwinfo->gpu_info.cuda_dev_use[i] == hwinfo->gpu_info.cuda_dev_use[j];
374 if (btMPI && !bAllDifferent)
376 gmx_fatal(FARGS,
377 "Invalid GPU assignment: can't share a GPU among multiple thread-MPI threads.\n"
378 "Use MPI if you are sure that you want to assign GPU to multiple threads.");
381 if (bSomeSame)
383 md_print_warn(cr,fplog,
384 "NOTE: Potentially sub-optimal launch configuration: you assigned %s to\n"
385 " multiple %s%s; this should be avoided as it generally\n"
386 " causes performance loss.",
387 same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
390 print_gpu_use_stats(fplog, &hwinfo->gpu_info, cr);
394 /* Return the number of hardware threads supported by the current CPU.
395 * We assume that this is equal with the number of CPUs reported to be
396 * online by the OS at the time of the call.
398 static int get_nthreads_hw_avail(FILE *fplog, const t_commrec *cr)
400 int ret = 0;
402 #if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
403 /* Windows */
404 SYSTEM_INFO sysinfo;
405 GetSystemInfo( &sysinfo );
406 ret = sysinfo.dwNumberOfProcessors;
407 #elif defined HAVE_SYSCONF
408 /* We are probably on Unix.
409 * Now check if we have the argument to use before executing the call
411 #if defined(_SC_NPROCESSORS_ONLN)
412 ret = sysconf(_SC_NPROCESSORS_ONLN);
413 #elif defined(_SC_NPROC_ONLN)
414 ret = sysconf(_SC_NPROC_ONLN);
415 #elif defined(_SC_NPROCESSORS_CONF)
416 ret = sysconf(_SC_NPROCESSORS_CONF);
417 #elif defined(_SC_NPROC_CONF)
418 ret = sysconf(_SC_NPROC_CONF);
419 #endif /* End of check for sysconf argument values */
421 #else
422 /* Neither windows nor Unix. No fscking idea how many CPUs we have! */
423 ret = -1;
424 #endif
426 if (debug)
428 fprintf(debug, "Detected %d processors, will use this as the number "
429 "of supported hardware threads.\n", ret);
432 #ifdef GMX_OMPENMP
433 if (ret != gmx_omp_get_num_procs())
435 md_print_warn(cr, fplog,
436 "Number of CPUs detected (%d) does not match the number reported by OpenMP (%d).\n"
437 "Consider setting the launch configuration manually!",
438 ret, gmx_omp_get_num_procs());
440 #endif
442 return ret;
445 void gmx_detect_hardware(FILE *fplog, gmx_hw_info_t *hwinfo,
446 const t_commrec *cr,
447 gmx_bool bForceUseGPU, gmx_bool bTryUseGPU,
448 const char *gpu_id)
450 int i;
451 const char *env;
452 char sbuf[STRLEN], stmp[STRLEN];
453 gmx_hw_info_t *hw;
454 gmx_gpu_info_t gpuinfo_auto, gpuinfo_user;
455 gmx_bool bGPUBin;
457 assert(hwinfo);
459 /* detect CPUID info; no fuss, we don't detect system-wide
460 * -- sloppy, but that's it for now */
461 if (gmx_cpuid_init(&hwinfo->cpuid_info) != 0)
463 gmx_fatal_collective(FARGS, cr, NULL, "CPUID detection failed!");
466 /* detect number of hardware threads */
467 hwinfo->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
469 /* detect GPUs */
470 hwinfo->gpu_info.ncuda_dev_use = 0;
471 hwinfo->gpu_info.cuda_dev_use = NULL;
472 hwinfo->gpu_info.ncuda_dev = 0;
473 hwinfo->gpu_info.cuda_dev = NULL;
475 #ifdef GMX_GPU
476 bGPUBin = TRUE;
477 #else
478 bGPUBin = FALSE;
479 #endif
481 /* Bail if binary is not compiled with GPU on */
482 if (bForceUseGPU && !bGPUBin)
484 gmx_fatal_collective(FARGS, cr, NULL, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
487 /* run the detection if the binary was compiled with GPU support */
488 if (bGPUBin && getenv("GMX_DISABLE_GPU_DETECTION")==NULL)
490 detect_cuda_gpus(&hwinfo->gpu_info);
493 if (bForceUseGPU || bTryUseGPU)
495 env = getenv("GMX_GPU_ID");
496 if (env != NULL && gpu_id != NULL)
498 gmx_fatal(FARGS,"GMX_GPU_ID and -gpu_id can not be used at the same time");
500 if (env == NULL)
502 env = gpu_id;
505 /* parse GPU IDs if the user passed any */
506 if (env != NULL)
508 int *gpuid, *checkres;
509 int nid, res;
511 snew(gpuid, max_gpu_ids_user);
512 snew(checkres, max_gpu_ids_user);
514 parse_gpu_id_plain_string(env, &nid, gpuid);
516 if (nid == 0)
518 gmx_fatal(FARGS, "Empty GPU ID string passed\n");
521 res = check_select_cuda_gpus(checkres, &hwinfo->gpu_info, gpuid, nid);
523 if (!res)
525 print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
527 sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
528 for (i = 0; i < nid; i++)
530 if (checkres[i] != egpuCompatible)
532 sprintf(stmp, " GPU #%d: %s\n",
533 gpuid[i], gpu_detect_res_str[checkres[i]]);
534 strcat(sbuf, stmp);
537 gmx_fatal(FARGS, "%s", sbuf);
540 hwinfo->gpu_info.bUserSet = TRUE;
542 sfree(gpuid);
543 sfree(checkres);
545 else
547 pick_compatible_gpus(&hwinfo->gpu_info);
548 hwinfo->gpu_info.bUserSet = FALSE;
551 /* decide whether we can use GPU */
552 hwinfo->bCanUseGPU = (hwinfo->gpu_info.ncuda_dev_use > 0);
553 if (!hwinfo->bCanUseGPU && bForceUseGPU)
555 gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
560 void limit_num_gpus_used(gmx_hw_info_t *hwinfo, int count)
562 int ndev_use;
564 assert(hwinfo);
566 ndev_use = hwinfo->gpu_info.ncuda_dev_use;
568 if (count > ndev_use)
570 /* won't increase the # of GPUs */
571 return;
574 if (count < 1)
576 char sbuf[STRLEN];
577 sprintf(sbuf, "Limiting the number of GPUs to <1 doesn't make sense (detected %d, %d requested)!",
578 ndev_use, count);
579 gmx_incons(sbuf);
582 /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
583 hwinfo->gpu_info.ncuda_dev_use = count;
586 void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
588 if (hwinfo)
590 gmx_cpuid_done(hwinfo->cpuid_info);
591 free_gpu_info(&hwinfo->gpu_info);
592 sfree(hwinfo);