2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Define functions for detection and initialization for OpenCL devices.
38 * \author Anca Hamuraru <anca@streamcomputing.eu>
39 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
40 * \author Teemu Virolainen <teemu@streamcomputing.eu>
50 # include <sys/sysctl.h>
55 #include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
56 #include "gromacs/gmxlib/gpu_utils/ocl_compiler.h"
57 #include "gromacs/gmxlib/ocl_tools/oclutils.h"
58 #include "gromacs/legacyheaders/types/enums.h"
59 #include "gromacs/legacyheaders/types/hw_info.h"
60 #include "gromacs/utility/cstringutil.h"
61 #include "gromacs/utility/fatalerror.h"
62 #include "gromacs/utility/smalloc.h"
64 /*! \brief Helper macro for error handling */
65 #define CALLOCLFUNC_LOGERROR(func, err_str, retval) { \
66 cl_int opencl_ret = func; \
67 if (CL_SUCCESS != opencl_ret) \
69 sprintf(err_str, "OpenCL error %d", opencl_ret); \
77 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
79 * \param[in] stat GPU status.
80 * \returns true if the provided status is egpuCompatible, otherwise false.
82 static bool is_compatible_gpu(int stat
)
84 return (stat
== egpuCompatible
);
87 /*! \brief Return true if executing on OS X earlier than 10.10.4
89 * Uses the BSD sysctl() interfaces to extract the kernel version.
91 * \return true if version is 14.4 or later (= OS X version 10.10.4),
95 runningOnWorkingOSXVersionForAmd()
99 char kernelVersion
[256];
100 size_t len
= sizeof(kernelVersion
);
103 mib
[1] = KERN_OSRELEASE
;
105 sysctl(mib
, sizeof(mib
)/sizeof(mib
[0]), kernelVersion
, &len
, NULL
, 0);
107 int major
= strtod(kernelVersion
, NULL
);
108 int minor
= strtod(strchr(kernelVersion
, '.')+1, NULL
);
110 // Kernel 14.4 corresponds to OS X 10.10.4
111 return (major
> 14 || (major
== 14 && minor
>= 4));
117 /*! \brief Returns true if the gpu characterized by the device properties is
118 * supported by the native gpu acceleration.
119 * \returns true if the GPU properties passed indicate a compatible
120 * GPU, otherwise false.
122 static int is_gmx_supported_gpu_id(struct gmx_device_info_t
*ocl_gpu_device
)
124 /* Only AMD and NVIDIA GPUs are supported for now */
125 switch (ocl_gpu_device
->vendor_e
)
127 case OCL_VENDOR_NVIDIA
:
128 return egpuCompatible
;
130 return runningOnWorkingOSXVersionForAmd() ? egpuCompatible
: egpuIncompatible
;
132 return egpuIncompatible
;
137 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
139 * \param[in] vendor_name String with OpenCL vendor name.
140 * \returns ocl_vendor_id_t value for the input vendor_name
142 ocl_vendor_id_t
get_vendor_id(char *vendor_name
)
146 if (strstr(vendor_name
, "NVIDIA"))
148 return OCL_VENDOR_NVIDIA
;
151 if (strstr(vendor_name
, "AMD") ||
152 strstr(vendor_name
, "Advanced Micro Devices"))
154 return OCL_VENDOR_AMD
;
157 if (strstr(vendor_name
, "Intel"))
159 return OCL_VENDOR_INTEL
;
162 return OCL_VENDOR_UNKNOWN
;
166 //! This function is documented in the header file
167 int detect_gpus(gmx_gpu_info_t
*gpu_info
, char *err_str
)
170 cl_uint ocl_platform_count
;
171 cl_platform_id
*ocl_platform_ids
;
172 cl_device_type req_dev_type
= CL_DEVICE_TYPE_GPU
;
175 ocl_platform_ids
= NULL
;
177 if (getenv("GMX_OCL_FORCE_CPU") != NULL
)
179 req_dev_type
= CL_DEVICE_TYPE_CPU
;
184 CALLOCLFUNC_LOGERROR(clGetPlatformIDs(0, NULL
, &ocl_platform_count
), err_str
, retval
)
190 if (1 > ocl_platform_count
)
195 snew(ocl_platform_ids
, ocl_platform_count
);
197 CALLOCLFUNC_LOGERROR(clGetPlatformIDs(ocl_platform_count
, ocl_platform_ids
, NULL
), err_str
, retval
)
203 for (unsigned int i
= 0; i
< ocl_platform_count
; i
++)
205 cl_uint ocl_device_count
;
207 /* If requesting req_dev_type devices fails, just go to the next platform */
208 if (CL_SUCCESS
!= clGetDeviceIDs(ocl_platform_ids
[i
], req_dev_type
, 0, NULL
, &ocl_device_count
))
213 if (1 <= ocl_device_count
)
215 gpu_info
->n_dev
+= ocl_device_count
;
219 if (1 > gpu_info
->n_dev
)
224 snew(gpu_info
->gpu_dev
, gpu_info
->n_dev
);
228 cl_device_id
*ocl_device_ids
;
230 snew(ocl_device_ids
, gpu_info
->n_dev
);
233 for (unsigned int i
= 0; i
< ocl_platform_count
; i
++)
235 cl_uint ocl_device_count
;
237 /* If requesting req_dev_type devices fails, just go to the next platform */
238 if (CL_SUCCESS
!= clGetDeviceIDs(ocl_platform_ids
[i
], req_dev_type
, gpu_info
->n_dev
, ocl_device_ids
, &ocl_device_count
))
243 if (1 > ocl_device_count
)
248 for (unsigned int j
= 0; j
< ocl_device_count
; j
++)
250 gpu_info
->gpu_dev
[device_index
].ocl_gpu_id
.ocl_platform_id
= ocl_platform_ids
[i
];
251 gpu_info
->gpu_dev
[device_index
].ocl_gpu_id
.ocl_device_id
= ocl_device_ids
[j
];
253 gpu_info
->gpu_dev
[device_index
].device_name
[0] = 0;
254 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_NAME
, sizeof(gpu_info
->gpu_dev
[device_index
].device_name
), gpu_info
->gpu_dev
[device_index
].device_name
, NULL
);
256 gpu_info
->gpu_dev
[device_index
].device_version
[0] = 0;
257 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_VERSION
, sizeof(gpu_info
->gpu_dev
[device_index
].device_version
), gpu_info
->gpu_dev
[device_index
].device_version
, NULL
);
259 gpu_info
->gpu_dev
[device_index
].device_vendor
[0] = 0;
260 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_VENDOR
, sizeof(gpu_info
->gpu_dev
[device_index
].device_vendor
), gpu_info
->gpu_dev
[device_index
].device_vendor
, NULL
);
262 gpu_info
->gpu_dev
[device_index
].compute_units
= 0;
263 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_MAX_COMPUTE_UNITS
, sizeof(gpu_info
->gpu_dev
[device_index
].compute_units
), &(gpu_info
->gpu_dev
[device_index
].compute_units
), NULL
);
265 gpu_info
->gpu_dev
[device_index
].adress_bits
= 0;
266 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_ADDRESS_BITS
, sizeof(gpu_info
->gpu_dev
[device_index
].adress_bits
), &(gpu_info
->gpu_dev
[device_index
].adress_bits
), NULL
);
268 gpu_info
->gpu_dev
[device_index
].vendor_e
= get_vendor_id(gpu_info
->gpu_dev
[device_index
].device_vendor
);
270 gpu_info
->gpu_dev
[device_index
].stat
= is_gmx_supported_gpu_id(gpu_info
->gpu_dev
+ device_index
);
272 if (egpuCompatible
== gpu_info
->gpu_dev
[device_index
].stat
)
274 gpu_info
->n_dev_compatible
++;
281 gpu_info
->n_dev
= device_index
;
283 /* Dummy sort of devices - AMD first, then NVIDIA, then Intel */
284 // TODO: Sort devices based on performance.
285 if (0 < gpu_info
->n_dev
)
288 for (int i
= 0; i
< gpu_info
->n_dev
; i
++)
290 if (OCL_VENDOR_AMD
== gpu_info
->gpu_dev
[i
].vendor_e
)
296 gmx_device_info_t ocl_gpu_info
;
297 ocl_gpu_info
= gpu_info
->gpu_dev
[i
];
298 gpu_info
->gpu_dev
[i
] = gpu_info
->gpu_dev
[last
];
299 gpu_info
->gpu_dev
[last
] = ocl_gpu_info
;
304 /* if more than 1 device left to be sorted */
305 if ((gpu_info
->n_dev
- 1 - last
) > 1)
307 for (int i
= 0; i
< gpu_info
->n_dev
; i
++)
309 if (OCL_VENDOR_NVIDIA
== gpu_info
->gpu_dev
[i
].vendor_e
)
315 gmx_device_info_t ocl_gpu_info
;
316 ocl_gpu_info
= gpu_info
->gpu_dev
[i
];
317 gpu_info
->gpu_dev
[i
] = gpu_info
->gpu_dev
[last
];
318 gpu_info
->gpu_dev
[last
] = ocl_gpu_info
;
325 sfree(ocl_device_ids
);
331 sfree(ocl_platform_ids
);
336 //! This function is documented in the header file
337 void free_gpu_info(const gmx_gpu_info_t gmx_unused
*gpu_info
)
341 for (int i
= 0; i
< gpu_info
->n_dev
; i
++)
343 cl_int gmx_unused cl_error
;
345 if (gpu_info
->gpu_dev
[i
].context
)
347 cl_error
= clReleaseContext(gpu_info
->gpu_dev
[i
].context
);
348 gpu_info
->gpu_dev
[i
].context
= NULL
;
349 assert(CL_SUCCESS
== cl_error
);
352 if (gpu_info
->gpu_dev
[i
].program
)
354 cl_error
= clReleaseProgram(gpu_info
->gpu_dev
[i
].program
);
355 gpu_info
->gpu_dev
[i
].program
= NULL
;
356 assert(CL_SUCCESS
== cl_error
);
360 sfree(gpu_info
->gpu_dev
);
364 //! This function is documented in the header file
365 void pick_compatible_gpus(const gmx_gpu_info_t
*gpu_info
,
366 gmx_gpu_opt_t
*gpu_opt
)
372 /* gpu_dev/n_dev have to be either NULL/0 or not (NULL/0) */
373 assert((gpu_info
->n_dev
!= 0 ? 0 : 1) ^ (gpu_info
->gpu_dev
== NULL
? 0 : 1));
375 snew(compat
, gpu_info
->n_dev
);
377 for (i
= 0; i
< gpu_info
->n_dev
; i
++)
379 if (is_compatible_gpu(gpu_info
->gpu_dev
[i
].stat
))
382 compat
[ncompat
- 1] = i
;
386 gpu_opt
->n_dev_compatible
= ncompat
;
387 snew(gpu_opt
->dev_compatible
, ncompat
);
388 memcpy(gpu_opt
->dev_compatible
, compat
, ncompat
*sizeof(*compat
));
392 //! This function is documented in the header file
393 gmx_bool
check_selected_gpus(int *checkres
,
394 const gmx_gpu_info_t
*gpu_info
,
395 gmx_gpu_opt_t
*gpu_opt
)
402 assert(gpu_opt
->n_dev_use
>= 0);
404 if (gpu_opt
->n_dev_use
== 0)
409 assert(gpu_opt
->dev_use
);
411 /* we will assume that all GPUs requested are valid IDs,
412 otherwise we'll bail anyways */
415 for (i
= 0; i
< gpu_opt
->n_dev_use
; i
++)
417 id
= gpu_opt
->dev_use
[i
];
419 /* devices are stored in increasing order of IDs in gpu_dev */
420 gpu_opt
->dev_use
[i
] = id
;
422 checkres
[i
] = (id
>= gpu_info
->n_dev
) ?
423 egpuNonexistent
: gpu_info
->gpu_dev
[id
].stat
;
425 bAllOk
= bAllOk
&& is_compatible_gpu(checkres
[i
]);
431 //! This function is documented in the header file
432 void get_gpu_device_info_string(char gmx_unused
*s
, const gmx_gpu_info_t gmx_unused
*gpu_info
, int gmx_unused index
)
437 if (index
< 0 && index
>= gpu_info
->n_dev
)
442 gmx_device_info_t
*dinfo
= &gpu_info
->gpu_dev
[index
];
445 dinfo
->stat
== egpuCompatible
||
446 dinfo
->stat
== egpuIncompatible
;
450 sprintf(s
, "#%d: %s, stat: %s",
452 gpu_detect_res_str
[dinfo
->stat
]);
456 sprintf(s
, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
457 index
, dinfo
->device_name
, dinfo
->device_vendor
,
458 dinfo
->device_version
,
459 gpu_detect_res_str
[dinfo
->stat
]);
463 //! This function is documented in the header file
464 gmx_bool
init_gpu(FILE gmx_unused
*fplog
,
467 const gmx_gpu_info_t gmx_unused
*gpu_info
,
468 const gmx_gpu_opt_t
*gpu_opt
475 if (mygpu
< 0 || mygpu
>= gpu_opt
->n_dev_use
)
478 sprintf(sbuf
, "Trying to initialize an inexistent GPU: "
479 "there are %d %s-selected GPU(s), but #%d was requested.",
480 gpu_opt
->n_dev_use
, gpu_opt
->bUserSet
? "user" : "auto", mygpu
);
487 //! This function is documented in the header file
488 int get_gpu_device_id(const gmx_gpu_info_t
*,
489 const gmx_gpu_opt_t
*gpu_opt
,
493 assert(idx
>= 0 && idx
< gpu_opt
->n_dev_use
);
495 return gpu_opt
->dev_use
[idx
];
498 //! This function is documented in the header file
499 char* get_ocl_gpu_device_name(const gmx_gpu_info_t
*gpu_info
,
500 const gmx_gpu_opt_t
*gpu_opt
,
505 assert(idx
>= 0 && idx
< gpu_opt
->n_dev_use
);
507 return gpu_info
->gpu_dev
[gpu_opt
->dev_use
[idx
]].device_name
;
510 //! This function is documented in the header file
511 size_t sizeof_gpu_dev_info(void)
513 return sizeof(gmx_device_info_t
);
516 /*! \brief Prints the name of a kernel function pointer.
518 * \param[in] kernel OpenCL kernel
519 * \returns CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
521 cl_int
dbg_ocl_kernel_name(const cl_kernel kernel
)
524 char kernel_name
[256];
525 cl_error
= clGetKernelInfo(kernel
, CL_KERNEL_FUNCTION_NAME
,
526 sizeof(kernel_name
), &kernel_name
, NULL
);
529 printf("No kernel found!\n");
533 printf("%s\n", kernel_name
);
538 /*! \brief Prints the name of a kernel function pointer.
540 * \param[in] kernel OpenCL kernel
541 * \returns CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
543 cl_int
dbg_ocl_kernel_name_address(void* kernel
)
546 char kernel_name
[256];
547 cl_error
= clGetKernelInfo((cl_kernel
)kernel
, CL_KERNEL_FUNCTION_NAME
,
548 sizeof(kernel_name
), &kernel_name
, NULL
);
551 printf("No kernel found!\n");
555 printf("%s\n", kernel_name
);
560 void gpu_set_host_malloc_and_free(bool bUseGpuKernels
,
561 gmx_host_alloc_t
**nb_alloc
,
562 gmx_host_free_t
**nb_free
)
566 *nb_alloc
= &ocl_pmalloc
;
567 *nb_free
= &ocl_pfree
;