2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Define functions for detection and initialization for CUDA devices.
38 * \author Szilard Pall <pall.szilard@gmail.com>
43 #include "gpu_utils.h"
51 #include "gromacs/gmxlib/cuda_tools/cudautils.cuh"
52 #include "gromacs/gmxlib/cuda_tools/pmalloc_cuda.h"
53 #include "gromacs/hardware/gpu_hw_info.h"
54 #include "gromacs/utility/basedefinitions.h"
55 #include "gromacs/utility/cstringutil.h"
56 #include "gromacs/utility/smalloc.h"
60 #define HAVE_NVML_APPLICATION_CLOCKS (NVML_API_VERSION >= 6)
62 #define HAVE_NVML_APPLICATION_CLOCKS 0
63 #endif /* HAVE_NVML */
65 #if defined(CHECK_CUDA_ERRORS) && HAVE_NVML_APPLICATION_CLOCKS
66 /*! Check for NVML error on the return status of a NVML API call. */
67 # define HANDLE_NVML_RET_ERR(status, msg) \
69 if (status != NVML_SUCCESS) \
71 gmx_warning("%s: %s\n", msg, nvmlErrorString(status)); \
74 #else /* defined(CHECK_CUDA_ERRORS) && HAVE_NVML_APPLICATION_CLOCKS */
75 # define HANDLE_NVML_RET_ERR(status, msg) do { } while (0)
76 #endif /* defined(CHECK_CUDA_ERRORS) && HAVE_NVML_APPLICATION_CLOCKS */
78 #if HAVE_NVML_APPLICATION_CLOCKS
79 static const gmx_bool bCompiledWithApplicationClockSupport = true;
81 static const gmx_bool gmx_unused bCompiledWithApplicationClockSupport = false;
85 * Max number of devices supported by CUDA (for consistency checking).
87 * In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
89 static int cuda_max_device_count = 32;
91 /** Dummy kernel used for sanity checking. */
92 __global__ void k_dummy_test()
98 * \brief Runs GPU sanity checks.
100 * Runs a series of checks to determine that the given GPU and underlying CUDA
101 * driver/runtime functions properly.
102 * Returns properties of a device with given ID or the one that has
103 * already been initialized earlier in the case if of \dev_id == -1.
105 * \param[in] dev_id the device ID of the GPU or -1 if the device has already been initialized
106 * \param[out] dev_prop pointer to the structure in which the device properties will be returned
107 * \returns 0 if the device looks OK
109 * TODO: introduce errors codes and handle errors more smoothly.
111 static int do_sanity_checks(int dev_id, cudaDeviceProp *dev_prop)
116 cu_err = cudaGetDeviceCount(&dev_count);
117 if (cu_err != cudaSuccess)
119 fprintf(stderr, "Error %d while querying device count: %s\n", cu_err,
120 cudaGetErrorString(cu_err));
124 /* no CUDA compatible device at all */
130 /* things might go horribly wrong if cudart is not compatible with the driver */
131 if (dev_count < 0 || dev_count > cuda_max_device_count)
136 if (dev_id == -1) /* device already selected let's not destroy the context */
138 cu_err = cudaGetDevice(&id);
139 if (cu_err != cudaSuccess)
141 fprintf(stderr, "Error %d while querying device id: %s\n", cu_err,
142 cudaGetErrorString(cu_err));
149 if (id > dev_count - 1) /* pfff there's no such device */
151 fprintf(stderr, "The requested device with id %d does not seem to exist (device count=%d)\n",
157 memset(dev_prop, 0, sizeof(cudaDeviceProp));
158 cu_err = cudaGetDeviceProperties(dev_prop, id);
159 if (cu_err != cudaSuccess)
161 fprintf(stderr, "Error %d while querying device properties: %s\n", cu_err,
162 cudaGetErrorString(cu_err));
166 /* both major & minor is 9999 if no CUDA capable devices are present */
167 if (dev_prop->major == 9999 && dev_prop->minor == 9999)
171 /* we don't care about emulation mode */
172 if (dev_prop->major == 0)
179 cu_err = cudaSetDevice(id);
180 if (cu_err != cudaSuccess)
182 fprintf(stderr, "Error %d while switching to device #%d: %s\n",
183 cu_err, id, cudaGetErrorString(cu_err));
188 /* try to execute a dummy kernel */
189 k_dummy_test<<< 1, 512>>> ();
190 if (cudaThreadSynchronize() != cudaSuccess)
195 /* destroy context if we created one */
198 cu_err = cudaDeviceReset();
199 CU_RET_ERR(cu_err, "cudaDeviceReset failed");
206 /* TODO: We should actually be using md_print_warn in md_logging.c,
207 * but we can't include mpi.h in CUDA code.
209 static void md_print_info(FILE *fplog,
210 const char *fmt, ...)
216 /* We should only print to stderr on the master node,
217 * in most cases fplog is only set on the master node, so this works.
220 vfprintf(stderr, fmt, ap);
224 vfprintf(fplog, fmt, ap);
230 /* TODO: We should actually be using md_print_warn in md_logging.c,
231 * but we can't include mpi.h in CUDA code.
232 * This is replicated from nbnxn_cuda_data_mgmt.cu.
234 static void md_print_warn(FILE *fplog,
235 const char *fmt, ...)
241 /* We should only print to stderr on the master node,
242 * in most cases fplog is only set on the master node, so this works.
245 fprintf(stderr, "\n");
246 vfprintf(stderr, fmt, ap);
247 fprintf(stderr, "\n");
251 fprintf(fplog, "\n");
252 vfprintf(fplog, fmt, ap);
253 fprintf(fplog, "\n");
258 #if HAVE_NVML_APPLICATION_CLOCKS
259 /*! \brief Determines and adds the NVML device ID to the passed \cuda_dev.
261 * Determines and adds the NVML device ID to the passed \cuda_dev. This is done by
262 * matching PCI-E information from \cuda_dev with the available NVML devices.
264 * \param[in,out] cuda_dev CUDA device information to enrich with NVML device info
265 * \returns true if \cuda_dev could be enriched with matching NVML device information.
267 static bool addNVMLDeviceId(gmx_device_info_t* cuda_dev)
269 nvmlDevice_t nvml_device_id;
270 unsigned int nvml_device_count = 0;
271 nvmlReturn_t nvml_stat = nvmlDeviceGetCount ( &nvml_device_count );
272 cuda_dev->nvml_initialized = false;
273 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetCount failed" );
274 for (unsigned int nvml_device_idx = 0; nvml_stat == NVML_SUCCESS && nvml_device_idx < nvml_device_count; ++nvml_device_idx)
276 nvml_stat = nvmlDeviceGetHandleByIndex ( nvml_device_idx, &nvml_device_id );
277 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetHandleByIndex failed" );
278 if (nvml_stat != NVML_SUCCESS)
283 nvmlPciInfo_t nvml_pci_info;
284 nvml_stat = nvmlDeviceGetPciInfo ( nvml_device_id, &nvml_pci_info );
285 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetPciInfo failed" );
286 if (nvml_stat != NVML_SUCCESS)
290 if (static_cast<unsigned int>(cuda_dev->prop.pciBusID) == nvml_pci_info.bus &&
291 static_cast<unsigned int>(cuda_dev->prop.pciDeviceID) == nvml_pci_info.device &&
292 static_cast<unsigned int>(cuda_dev->prop.pciDomainID) == nvml_pci_info.domain)
294 cuda_dev->nvml_initialized = true;
295 cuda_dev->nvml_device_id = nvml_device_id;
299 return cuda_dev->nvml_initialized;
301 #endif /* HAVE_NVML_APPLICATION_CLOCKS */
303 /*! \brief Tries to set application clocks for the GPU with the given index.
305 * The variable \gpuid is the index of the GPU in the gpu_info.cuda_dev array
306 * to handle the application clocks for. Application clocks are set to the
307 * max supported value to increase performance if application clock permissions
308 * allow this. For future GPU architectures a more sophisticated scheme might be
311 * \param[out] fplog log file to write to
312 * \param[in] gpuid index of the GPU to set application clocks for
313 * \param[in] gpu_info GPU info of all detected devices in the system.
314 * \returns true if no error occurs during application clocks handling.
316 static gmx_bool init_gpu_application_clocks(FILE gmx_unused *fplog, int gmx_unused gpuid, const gmx_gpu_info_t gmx_unused *gpu_info)
318 const cudaDeviceProp *prop = &gpu_info->gpu_dev[gpuid].prop;
319 int cuda_version_number = prop->major * 10 + prop->minor;
320 gmx_bool bGpuCanUseApplicationClocks =
321 ((0 == gmx_wcmatch("*Tesla*", prop->name) && cuda_version_number >= 35 ) ||
322 (0 == gmx_wcmatch("*Quadro*", prop->name) && cuda_version_number >= 52 ));
323 if (!bGpuCanUseApplicationClocks)
329 int cuda_runtime = 0;
330 cudaDriverGetVersion(&cuda_driver);
331 cudaRuntimeGetVersion(&cuda_runtime);
332 md_print_warn( fplog, "NOTE: GROMACS was configured without NVML support hence it can not exploit\n"
333 " application clocks of the detected %s GPU to improve performance.\n"
334 " Recompile with the NVML library (compatible with the driver used) or set application clocks manually.\n",
338 if (!bCompiledWithApplicationClockSupport)
341 int cuda_runtime = 0;
342 cudaDriverGetVersion(&cuda_driver);
343 cudaRuntimeGetVersion(&cuda_runtime);
344 md_print_warn( fplog, "NOTE: GROMACS was compiled with an old NVML library which does not support\n"
345 " managing application clocks of the detected %s GPU to improve performance.\n"
346 " If your GPU supports application clocks, upgrade NVML (and driver) and recompile or set the clocks manually.\n",
351 /* We've compiled with NVML application clocks support, and have a GPU that can use it */
352 nvmlReturn_t nvml_stat = NVML_SUCCESS;
354 //TODO: GMX_GPU_APPLICATION_CLOCKS is currently only used to enable/disable setting of application clocks
355 // this variable can be later used to give a user more fine grained control.
356 env = getenv("GMX_GPU_APPLICATION_CLOCKS");
357 if (env != NULL && ( strcmp( env, "0") == 0 ||
358 gmx_strcasecmp( env, "OFF") == 0 ||
359 gmx_strcasecmp( env, "DISABLE") == 0 ))
363 nvml_stat = nvmlInit();
364 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlInit failed." );
365 if (nvml_stat != NVML_SUCCESS)
369 if (!addNVMLDeviceId( &(gpu_info->gpu_dev[gpuid])))
373 //get current application clocks setting
374 unsigned int app_sm_clock = 0;
375 unsigned int app_mem_clock = 0;
376 nvml_stat = nvmlDeviceGetApplicationsClock ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_SM, &app_sm_clock );
377 if (NVML_ERROR_NOT_SUPPORTED == nvml_stat)
381 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
382 nvml_stat = nvmlDeviceGetApplicationsClock ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_MEM, &app_mem_clock );
383 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
384 //get max application clocks
385 unsigned int max_sm_clock = 0;
386 unsigned int max_mem_clock = 0;
387 nvml_stat = nvmlDeviceGetMaxClockInfo ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_SM, &max_sm_clock );
388 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetMaxClockInfo failed" );
389 nvml_stat = nvmlDeviceGetMaxClockInfo ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_MEM, &max_mem_clock );
390 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetMaxClockInfo failed" );
392 gpu_info->gpu_dev[gpuid].nvml_is_restricted = NVML_FEATURE_ENABLED;
393 gpu_info->gpu_dev[gpuid].nvml_ap_clocks_changed = false;
395 nvml_stat = nvmlDeviceGetAPIRestriction ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, &(gpu_info->gpu_dev[gpuid].nvml_is_restricted) );
396 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetAPIRestriction failed" );
398 /* Note: Distinguishing between different types of GPUs here might be necessary in the future,
399 e.g. if max application clocks should not be used for certain GPUs. */
400 if (nvml_stat == NVML_SUCCESS && app_sm_clock < max_sm_clock && gpu_info->gpu_dev[gpuid].nvml_is_restricted == NVML_FEATURE_DISABLED)
402 md_print_info( fplog, "Changing GPU application clocks for %s to (%d,%d)\n", gpu_info->gpu_dev[gpuid].prop.name, max_mem_clock, max_sm_clock);
403 nvml_stat = nvmlDeviceSetApplicationsClocks ( gpu_info->gpu_dev[gpuid].nvml_device_id, max_mem_clock, max_sm_clock );
404 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
405 gpu_info->gpu_dev[gpuid].nvml_ap_clocks_changed = true;
407 else if (nvml_stat == NVML_SUCCESS && app_sm_clock < max_sm_clock)
409 md_print_warn( fplog, "Can not change application clocks for %s to optimal values due to insufficient permissions. Current values are (%d,%d), max values are (%d,%d).\nUse sudo nvidia-smi -acp UNRESTRICTED or contact your admin to change application clocks.\n", gpu_info->gpu_dev[gpuid].prop.name, app_mem_clock, app_sm_clock, max_mem_clock, max_sm_clock);
411 else if (nvml_stat == NVML_SUCCESS && app_sm_clock == max_sm_clock)
413 //TODO: This should probably be integrated into the GPU Properties table.
414 md_print_info( fplog, "Application clocks (GPU clocks) for %s are (%d,%d)\n", gpu_info->gpu_dev[gpuid].prop.name, app_mem_clock, app_sm_clock);
418 md_print_warn( fplog, "Can not change GPU application clocks to optimal values due to NVML error (%d): %s.\n", nvml_stat, nvmlErrorString(nvml_stat));
420 return (nvml_stat == NVML_SUCCESS);
421 #endif /* HAVE_NVML */
424 /*! \brief Resets application clocks if changed and cleans up NVML for the passed \gpu_dev.
426 * \param[in] gpu_dev CUDA device information
428 static gmx_bool reset_gpu_application_clocks(const gmx_device_info_t gmx_unused * cuda_dev)
430 #if !HAVE_NVML_APPLICATION_CLOCKS
431 GMX_UNUSED_VALUE(cuda_dev);
433 #else /* HAVE_NVML_APPLICATION_CLOCKS */
434 nvmlReturn_t nvml_stat = NVML_SUCCESS;
436 cuda_dev->nvml_is_restricted == NVML_FEATURE_DISABLED &&
437 cuda_dev->nvml_ap_clocks_changed)
439 nvml_stat = nvmlDeviceResetApplicationsClocks( cuda_dev->nvml_device_id );
440 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceResetApplicationsClocks failed" );
442 nvml_stat = nvmlShutdown();
443 HANDLE_NVML_RET_ERR( nvml_stat, "nvmlShutdown failed" );
444 return (nvml_stat == NVML_SUCCESS);
445 #endif /* HAVE_NVML_APPLICATION_CLOCKS */
448 gmx_bool init_gpu(FILE gmx_unused *fplog, int mygpu, char *result_str,
449 const struct gmx_gpu_info_t *gpu_info,
450 const struct gmx_gpu_opt_t *gpu_opt)
459 if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
461 sprintf(sbuf, "Trying to initialize an inexistent GPU: "
462 "there are %d %s-selected GPU(s), but #%d was requested.",
463 gpu_opt->n_dev_use, gpu_opt->bUserSet ? "user" : "auto", mygpu);
467 gpuid = gpu_info->gpu_dev[gpu_opt->dev_use[mygpu]].id;
469 stat = cudaSetDevice(gpuid);
470 strncpy(result_str, cudaGetErrorString(stat), STRLEN);
474 fprintf(stderr, "Initialized GPU ID #%d: %s\n", gpuid, gpu_info->gpu_dev[gpuid].prop.name);
477 //Ignoring return value as NVML errors should be treated not critical.
478 if (stat == cudaSuccess)
480 init_gpu_application_clocks(fplog, gpuid, gpu_info);
482 return (stat == cudaSuccess);
485 gmx_bool free_cuda_gpu(
486 int gmx_unused mygpu, char *result_str,
487 const gmx_gpu_info_t gmx_unused *gpu_info,
488 const gmx_gpu_opt_t gmx_unused *gpu_opt
492 gmx_bool reset_gpu_application_clocks_status = true;
500 stat = cudaGetDevice(&gpuid);
501 CU_RET_ERR(stat, "cudaGetDevice failed");
502 fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
505 gpuid = gpu_opt ? gpu_opt->dev_use[mygpu] : -1;
508 reset_gpu_application_clocks_status = reset_gpu_application_clocks( &(gpu_info->gpu_dev[gpuid]) );
511 stat = cudaDeviceReset();
512 strncpy(result_str, cudaGetErrorString(stat), STRLEN);
513 return (stat == cudaSuccess) && reset_gpu_application_clocks_status;
516 /*! \brief Returns true if the gpu characterized by the device properties is
517 * supported by the native gpu acceleration.
519 * \param[in] dev_prop the CUDA device properties of the gpus to test.
520 * \returns true if the GPU properties passed indicate a compatible
521 * GPU, otherwise false.
523 static bool is_gmx_supported_gpu(const cudaDeviceProp *dev_prop)
525 return (dev_prop->major >= 2);
528 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
530 * \param[in] stat GPU status.
531 * \returns true if the provided status is egpuCompatible, otherwise false.
533 static bool is_compatible_gpu(int stat)
535 return (stat == egpuCompatible);
538 /*! \brief Checks if a GPU with a given ID is supported by the native GROMACS acceleration.
540 * Returns a status value which indicates compatibility or one of the following
541 * errors: incompatibility, insistence, or insanity (=unexpected behavior).
542 * It also returns the respective device's properties in \dev_prop (if applicable).
544 * \param[in] dev_id the ID of the GPU to check.
545 * \param[out] dev_prop the CUDA device properties of the device checked.
546 * \returns the status of the requested device
548 static int is_gmx_supported_gpu_id(int dev_id, cudaDeviceProp *dev_prop)
553 stat = cudaGetDeviceCount(&ndev);
554 if (stat != cudaSuccess)
559 if (dev_id > ndev - 1)
561 return egpuNonexistent;
564 /* TODO: currently we do not make a distinction between the type of errors
565 * that can appear during sanity checks. This needs to be improved, e.g if
566 * the dummy test kernel fails to execute with a "device busy message" we
567 * should appropriately report that the device is busy instead of insane.
569 if (do_sanity_checks(dev_id, dev_prop) == 0)
571 if (is_gmx_supported_gpu(dev_prop))
573 return egpuCompatible;
577 return egpuIncompatible;
587 int detect_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
589 int i, ndev, checkres, retval;
592 gmx_device_info_t *devs;
597 gpu_info->n_dev_compatible = 0;
602 stat = cudaGetDeviceCount(&ndev);
603 if (stat != cudaSuccess)
607 /* cudaGetDeviceCount failed which means that there is something
608 * wrong with the machine: driver-runtime mismatch, all GPUs being
609 * busy in exclusive mode, or some other condition which should
610 * result in us issuing a warning a falling back to CPUs. */
612 s = cudaGetErrorString(stat);
613 strncpy(err_str, s, STRLEN*sizeof(err_str[0]));
618 for (i = 0; i < ndev; i++)
620 checkres = is_gmx_supported_gpu_id(i, &prop);
624 devs[i].stat = checkres;
626 if (checkres == egpuCompatible)
628 gpu_info->n_dev_compatible++;
634 gpu_info->n_dev = ndev;
635 gpu_info->gpu_dev = devs;
640 void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
641 gmx_gpu_opt_t *gpu_opt)
647 /* gpu_dev/n_dev have to be either NULL/0 or not (NULL/0) */
648 assert((gpu_info->n_dev != 0 ? 0 : 1) ^ (gpu_info->gpu_dev == NULL ? 0 : 1));
650 snew(compat, gpu_info->n_dev);
652 for (i = 0; i < gpu_info->n_dev; i++)
654 if (is_compatible_gpu(gpu_info->gpu_dev[i].stat))
657 compat[ncompat - 1] = i;
661 gpu_opt->n_dev_compatible = ncompat;
662 snew(gpu_opt->dev_compatible, ncompat);
663 memcpy(gpu_opt->dev_compatible, compat, ncompat*sizeof(*compat));
667 gmx_bool check_selected_gpus(int *checkres,
668 const gmx_gpu_info_t *gpu_info,
669 gmx_gpu_opt_t *gpu_opt)
676 assert(gpu_opt->n_dev_use >= 0);
678 if (gpu_opt->n_dev_use == 0)
683 assert(gpu_opt->dev_use);
685 /* we will assume that all GPUs requested are valid IDs,
686 otherwise we'll bail anyways */
689 for (i = 0; i < gpu_opt->n_dev_use; i++)
691 id = gpu_opt->dev_use[i];
693 /* devices are stored in increasing order of IDs in gpu_dev */
694 gpu_opt->dev_use[i] = id;
696 checkres[i] = (id >= gpu_info->n_dev) ?
697 egpuNonexistent : gpu_info->gpu_dev[id].stat;
699 bAllOk = bAllOk && is_compatible_gpu(checkres[i]);
705 void free_gpu_info(const gmx_gpu_info_t *gpu_info)
707 if (gpu_info == NULL)
712 sfree(gpu_info->gpu_dev);
715 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t *gpu_info, int index)
720 if (index < 0 && index >= gpu_info->n_dev)
725 gmx_device_info_t *dinfo = &gpu_info->gpu_dev[index];
728 dinfo->stat == egpuCompatible ||
729 dinfo->stat == egpuIncompatible;
733 sprintf(s, "#%d: %s, stat: %s",
735 gpu_detect_res_str[dinfo->stat]);
739 sprintf(s, "#%d: NVIDIA %s, compute cap.: %d.%d, ECC: %3s, stat: %s",
740 dinfo->id, dinfo->prop.name,
741 dinfo->prop.major, dinfo->prop.minor,
742 dinfo->prop.ECCEnabled ? "yes" : " no",
743 gpu_detect_res_str[dinfo->stat]);
747 int get_gpu_device_id(const gmx_gpu_info_t *gpu_info,
748 const gmx_gpu_opt_t *gpu_opt,
753 assert(idx >= 0 && idx < gpu_opt->n_dev_use);
755 return gpu_info->gpu_dev[gpu_opt->dev_use[idx]].id;
758 int get_current_cuda_gpu_device_id(void)
761 CU_RET_ERR(cudaGetDevice(&gpuid), "cudaGetDevice failed");
766 size_t sizeof_gpu_dev_info(void)
768 return sizeof(gmx_device_info_t);
771 void gpu_set_host_malloc_and_free(bool bUseGpuKernels,
772 gmx_host_alloc_t **nb_alloc,
773 gmx_host_free_t **nb_free)
777 *nb_alloc = &pmalloc;