Simplify reporting from init_gpu
[gromacs.git] / src / gromacs / gpu_utils / gpu_utils_ocl.cpp
blob2b245395d80bc678699e3c280e51c65dfe24a304
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
36 * \brief Define functions for detection and initialization for OpenCL devices.
38 * \author Anca Hamuraru <anca@streamcomputing.eu>
39 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
40 * \author Teemu Virolainen <teemu@streamcomputing.eu>
43 #include "gmxpre.h"
45 #include <assert.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #ifdef __APPLE__
50 # include <sys/sysctl.h>
51 #endif
53 #include <memory.h>
55 #include "gromacs/gpu_utils/gpu_utils.h"
56 #include "gromacs/gpu_utils/ocl_compiler.h"
57 #include "gromacs/gpu_utils/oclutils.h"
58 #include "gromacs/hardware/hw_info.h"
59 #include "gromacs/mdtypes/md_enums.h"
60 #include "gromacs/utility/cstringutil.h"
61 #include "gromacs/utility/fatalerror.h"
62 #include "gromacs/utility/smalloc.h"
64 /*! \brief Helper macro for error handling */
65 #define CALLOCLFUNC_LOGERROR(func, err_str, retval) { \
66 cl_int opencl_ret = func; \
67 if (CL_SUCCESS != opencl_ret) \
68 { \
69 sprintf(err_str, "OpenCL error %d", opencl_ret); \
70 retval = -1; \
71 } \
72 else{ \
73 retval = 0; } \
77 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
79 * This is assumed to be true for OS X version of at least 10.10.4 and
80 * all other OS flavors.
82 * Uses the BSD sysctl() interfaces to extract the kernel version.
84 * \return true if version is 14.4 or later (= OS X version 10.10.4),
85 * or OS is not Darwin.
87 static bool
88 runningOnCompatibleOSForAmd()
90 #ifdef __APPLE__
91 int mib[2];
92 char kernelVersion[256];
93 size_t len = sizeof(kernelVersion);
95 mib[0] = CTL_KERN;
96 mib[1] = KERN_OSRELEASE;
98 sysctl(mib, sizeof(mib)/sizeof(mib[0]), kernelVersion, &len, NULL, 0);
100 int major = strtod(kernelVersion, NULL);
101 int minor = strtod(strchr(kernelVersion, '.')+1, NULL);
103 // Kernel 14.4 corresponds to OS X 10.10.4
104 return (major > 14 || (major == 14 && minor >= 4));
105 #else
106 return true;
107 #endif
110 /*! \brief Returns true if the gpu characterized by the device properties is
111 * supported by the native gpu acceleration.
112 * \returns true if the GPU properties passed indicate a compatible
113 * GPU, otherwise false.
115 static int is_gmx_supported_gpu_id(struct gmx_device_info_t *ocl_gpu_device)
117 if ((getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK")) != NULL)
119 return egpuCompatible;
122 /* Only AMD and NVIDIA GPUs are supported for now */
123 switch (ocl_gpu_device->vendor_e)
125 case OCL_VENDOR_NVIDIA:
126 return egpuCompatible;
127 case OCL_VENDOR_AMD:
128 return runningOnCompatibleOSForAmd() ? egpuCompatible : egpuIncompatible;
129 default:
130 return egpuIncompatible;
135 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
137 * \param[in] vendor_name String with OpenCL vendor name.
138 * \returns ocl_vendor_id_t value for the input vendor_name
140 static ocl_vendor_id_t get_vendor_id(char *vendor_name)
142 if (vendor_name)
144 if (strstr(vendor_name, "NVIDIA"))
146 return OCL_VENDOR_NVIDIA;
148 else
149 if (strstr(vendor_name, "AMD") ||
150 strstr(vendor_name, "Advanced Micro Devices"))
152 return OCL_VENDOR_AMD;
154 else
155 if (strstr(vendor_name, "Intel"))
157 return OCL_VENDOR_INTEL;
160 return OCL_VENDOR_UNKNOWN;
164 //! This function is documented in the header file
165 int detect_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
167 int retval;
168 cl_uint ocl_platform_count;
169 cl_platform_id *ocl_platform_ids;
170 cl_device_type req_dev_type = CL_DEVICE_TYPE_GPU;
172 retval = 0;
173 ocl_platform_ids = NULL;
175 if (getenv("GMX_OCL_FORCE_CPU") != NULL)
177 req_dev_type = CL_DEVICE_TYPE_CPU;
180 while (1)
182 CALLOCLFUNC_LOGERROR(clGetPlatformIDs(0, NULL, &ocl_platform_count), err_str, retval)
183 if (0 != retval)
185 break;
188 if (1 > ocl_platform_count)
190 break;
193 snew(ocl_platform_ids, ocl_platform_count);
195 CALLOCLFUNC_LOGERROR(clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, NULL), err_str, retval)
196 if (0 != retval)
198 break;
201 for (unsigned int i = 0; i < ocl_platform_count; i++)
203 cl_uint ocl_device_count;
205 /* If requesting req_dev_type devices fails, just go to the next platform */
206 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, NULL, &ocl_device_count))
208 continue;
211 if (1 <= ocl_device_count)
213 gpu_info->n_dev += ocl_device_count;
217 if (1 > gpu_info->n_dev)
219 break;
222 snew(gpu_info->gpu_dev, gpu_info->n_dev);
225 int device_index;
226 cl_device_id *ocl_device_ids;
228 snew(ocl_device_ids, gpu_info->n_dev);
229 device_index = 0;
231 for (unsigned int i = 0; i < ocl_platform_count; i++)
233 cl_uint ocl_device_count;
235 /* If requesting req_dev_type devices fails, just go to the next platform */
236 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev, ocl_device_ids, &ocl_device_count))
238 continue;
241 if (1 > ocl_device_count)
243 break;
246 for (unsigned int j = 0; j < ocl_device_count; j++)
248 gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
249 gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id = ocl_device_ids[j];
251 gpu_info->gpu_dev[device_index].device_name[0] = 0;
252 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, NULL);
254 gpu_info->gpu_dev[device_index].device_version[0] = 0;
255 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, NULL);
257 gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
258 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, NULL);
260 gpu_info->gpu_dev[device_index].compute_units = 0;
261 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), NULL);
263 gpu_info->gpu_dev[device_index].adress_bits = 0;
264 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), NULL);
266 gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
268 gpu_info->gpu_dev[device_index].stat = is_gmx_supported_gpu_id(gpu_info->gpu_dev + device_index);
270 if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
272 gpu_info->n_dev_compatible++;
275 device_index++;
279 gpu_info->n_dev = device_index;
281 /* Dummy sort of devices - AMD first, then NVIDIA, then Intel */
282 // TODO: Sort devices based on performance.
283 if (0 < gpu_info->n_dev)
285 int last = -1;
286 for (int i = 0; i < gpu_info->n_dev; i++)
288 if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
290 last++;
292 if (last < i)
294 gmx_device_info_t ocl_gpu_info;
295 ocl_gpu_info = gpu_info->gpu_dev[i];
296 gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last];
297 gpu_info->gpu_dev[last] = ocl_gpu_info;
302 /* if more than 1 device left to be sorted */
303 if ((gpu_info->n_dev - 1 - last) > 1)
305 for (int i = 0; i < gpu_info->n_dev; i++)
307 if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
309 last++;
311 if (last < i)
313 gmx_device_info_t ocl_gpu_info;
314 ocl_gpu_info = gpu_info->gpu_dev[i];
315 gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last];
316 gpu_info->gpu_dev[last] = ocl_gpu_info;
323 sfree(ocl_device_ids);
326 break;
329 sfree(ocl_platform_ids);
331 return retval;
334 //! This function is documented in the header file
335 void free_gpu_info(const gmx_gpu_info_t gmx_unused *gpu_info)
337 if (gpu_info == NULL)
339 return;
342 sfree(gpu_info->gpu_dev);
345 //! This function is documented in the header file
346 bool isGpuCompatible(const gmx_gpu_info_t *gpu_info,
347 int index)
349 assert(gpu_info);
351 return (index >= gpu_info->n_dev ?
352 false :
353 gpu_info->gpu_dev[index].stat == egpuCompatible);
356 //! This function is documented in the header file
357 const char *getGpuCompatibilityDescription(const gmx_gpu_info_t *gpu_info,
358 int index)
360 assert(gpu_info);
362 return (index >= gpu_info->n_dev ?
363 gpu_detect_res_str[egpuNonexistent] :
364 gpu_detect_res_str[gpu_info->gpu_dev[index].stat]);
367 //! This function is documented in the header file
368 void get_gpu_device_info_string(char gmx_unused *s, const gmx_gpu_info_t gmx_unused *gpu_info, int gmx_unused index)
370 assert(s);
371 assert(gpu_info);
373 if (index < 0 && index >= gpu_info->n_dev)
375 return;
378 gmx_device_info_t *dinfo = &gpu_info->gpu_dev[index];
380 bool bGpuExists =
381 dinfo->stat == egpuCompatible ||
382 dinfo->stat == egpuIncompatible;
384 if (!bGpuExists)
386 sprintf(s, "#%d: %s, stat: %s",
387 index, "N/A",
388 gpu_detect_res_str[dinfo->stat]);
390 else
392 sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
393 index, dinfo->device_name, dinfo->device_vendor,
394 dinfo->device_version,
395 gpu_detect_res_str[dinfo->stat]);
399 //! This function is documented in the header file
400 gmx_bool init_gpu(const gmx::MDLogger & /*mdlog*/,
401 int mygpu,
402 char *result_str,
403 const gmx_gpu_info_t gmx_unused *gpu_info,
404 const gmx_gpu_opt_t *gpu_opt
407 assert(result_str);
409 result_str[0] = 0;
411 if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
413 char sbuf[STRLEN];
414 sprintf(sbuf, "Trying to initialize an non-existent GPU: "
415 "there are %d selected GPU(s), but #%d was requested.",
416 gpu_opt->n_dev_use, mygpu);
417 gmx_incons(sbuf);
420 // If the device is NVIDIA, for safety reasons we disable the JIT
421 // caching as this is known to be broken at least until driver 364.19;
422 // the cache does not always get regenerated when the source code changes,
423 // e.g. if the path to the kernel sources remains the same
425 if (gpu_info->gpu_dev[mygpu].vendor_e == OCL_VENDOR_NVIDIA)
427 // Ignore return values, failing to set the variable does not mean
428 // that something will go wrong later.
429 #ifdef _MSC_VER
430 _putenv("CUDA_CACHE_DISABLE=1");
431 #else
432 // Don't override, maybe a dev is testing.
433 setenv("CUDA_CACHE_DISABLE", "1", 0);
434 #endif
437 return TRUE;
440 //! This function is documented in the header file
441 int get_gpu_device_id(const gmx_gpu_info_t *,
442 const gmx_gpu_opt_t *gpu_opt,
443 int idx)
445 assert(gpu_opt);
446 assert(idx >= 0 && idx < gpu_opt->n_dev_use);
448 return gpu_opt->dev_use[idx];
451 //! This function is documented in the header file
452 char* get_ocl_gpu_device_name(const gmx_gpu_info_t *gpu_info,
453 const gmx_gpu_opt_t *gpu_opt,
454 int idx)
456 assert(gpu_info);
457 assert(gpu_opt);
458 assert(idx >= 0 && idx < gpu_opt->n_dev_use);
460 return gpu_info->gpu_dev[gpu_opt->dev_use[idx]].device_name;
463 //! This function is documented in the header file
464 size_t sizeof_gpu_dev_info(void)
466 return sizeof(gmx_device_info_t);
469 /*! \brief Prints the name of a kernel function pointer.
471 * \param[in] kernel OpenCL kernel
472 * \returns CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
474 cl_int dbg_ocl_kernel_name(const cl_kernel kernel)
476 cl_int cl_error;
477 char kernel_name[256];
478 cl_error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME,
479 sizeof(kernel_name), &kernel_name, NULL);
480 if (cl_error)
482 printf("No kernel found!\n");
484 else
486 printf("%s\n", kernel_name);
488 return cl_error;
491 /*! \brief Prints the name of a kernel function pointer.
493 * \param[in] kernel OpenCL kernel
494 * \returns CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
496 cl_int dbg_ocl_kernel_name_address(void* kernel)
498 cl_int cl_error;
499 char kernel_name[256];
500 cl_error = clGetKernelInfo((cl_kernel)kernel, CL_KERNEL_FUNCTION_NAME,
501 sizeof(kernel_name), &kernel_name, NULL);
502 if (cl_error)
504 printf("No kernel found!\n");
506 else
508 printf("%s\n", kernel_name);
510 return cl_error;
513 void gpu_set_host_malloc_and_free(bool bUseGpuKernels,
514 gmx_host_alloc_t **nb_alloc,
515 gmx_host_free_t **nb_free)
517 if (bUseGpuKernels)
519 *nb_alloc = &ocl_pmalloc;
520 *nb_free = &ocl_pfree;
522 else
524 *nb_alloc = NULL;
525 *nb_free = NULL;