2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Define functions for detection and initialization for OpenCL devices.
39 * \author Anca Hamuraru <anca@streamcomputing.eu>
40 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
41 * \author Teemu Virolainen <teemu@streamcomputing.eu>
42 * \author Mark Abraham <mark.j.abraham@gmail.com>
43 * \author Szilárd Páll <pall.szilard@gmail.com>
57 # include <sys/sysctl.h>
62 #include "gromacs/gpu_utils/gpu_utils.h"
63 #include "gromacs/gpu_utils/ocl_compiler.h"
64 #include "gromacs/gpu_utils/oclraii.h"
65 #include "gromacs/gpu_utils/oclutils.h"
66 #include "gromacs/hardware/hw_info.h"
67 #include "gromacs/utility/cstringutil.h"
68 #include "gromacs/utility/exceptions.h"
69 #include "gromacs/utility/fatalerror.h"
70 #include "gromacs/utility/smalloc.h"
71 #include "gromacs/utility/stringutil.h"
73 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
75 * This is assumed to be true for OS X version of at least 10.10.4 and
76 * all other OS flavors.
78 * Uses the BSD sysctl() interfaces to extract the kernel version.
80 * \return true if version is 14.4 or later (= OS X version 10.10.4),
81 * or OS is not Darwin.
83 static bool runningOnCompatibleOSForAmd()
87 char kernelVersion
[256];
88 size_t len
= sizeof(kernelVersion
);
91 mib
[1] = KERN_OSRELEASE
;
93 sysctl(mib
, sizeof(mib
) / sizeof(mib
[0]), kernelVersion
, &len
, NULL
, 0);
95 int major
= strtod(kernelVersion
, NULL
);
96 int minor
= strtod(strchr(kernelVersion
, '.') + 1, NULL
);
98 // Kernel 14.4 corresponds to OS X 10.10.4
99 return (major
> 14 || (major
== 14 && minor
>= 4));
108 /*! \brief Make an error string following an OpenCL API call.
110 * It is meant to be called with \p status != CL_SUCCESS, but it will
111 * work correctly even if it is called with no OpenCL failure.
113 * \param[in] message Supplies context, e.g. the name of the API call that returned the error.
114 * \param[in] status OpenCL API status code
115 * \returns A string describing the OpenCL error.
117 static std::string
makeOpenClInternalErrorString(const char* message
, cl_int status
)
119 if (message
!= nullptr)
121 return formatString("%s did %ssucceed %d: %s", message
, ((status
!= CL_SUCCESS
) ? "not " : ""),
122 status
, ocl_get_error_string(status
).c_str());
126 return formatString("%sOpenCL error encountered %d: %s", ((status
!= CL_SUCCESS
) ? "" : "No "),
127 status
, ocl_get_error_string(status
).c_str());
132 * \brief Checks that device \c devInfo is sane (ie can run a kernel).
134 * Compiles and runs a dummy kernel to determine whether the given
135 * OpenCL device functions properly.
138 * \param[in] devInfo The device info pointer.
139 * \param[out] errorMessage An error message related to a failing OpenCL API call.
140 * \throws std::bad_alloc When out of memory.
141 * \returns Whether the device passed sanity checks
143 static bool isDeviceSane(const gmx_device_info_t
* devInfo
, std::string
* errorMessage
)
145 cl_context_properties properties
[] = {
146 CL_CONTEXT_PLATFORM
, reinterpret_cast<cl_context_properties
>(devInfo
->ocl_gpu_id
.ocl_platform_id
), 0
148 // uncrustify spacing
151 auto deviceId
= devInfo
->ocl_gpu_id
.ocl_device_id
;
152 ClContext
context(clCreateContext(properties
, 1, &deviceId
, nullptr, nullptr, &status
));
153 if (status
!= CL_SUCCESS
)
155 errorMessage
->assign(makeOpenClInternalErrorString("clCreateContext", status
));
158 ClCommandQueue
commandQueue(clCreateCommandQueue(context
, deviceId
, 0, &status
));
159 if (status
!= CL_SUCCESS
)
161 errorMessage
->assign(makeOpenClInternalErrorString("clCreateCommandQueue", status
));
165 // Some compilers such as Apple's require kernel functions to have at least one argument
166 const char* lines
[] = { "__kernel void dummyKernel(__global void* input){}" };
167 ClProgram
program(clCreateProgramWithSource(context
, 1, lines
, nullptr, &status
));
168 if (status
!= CL_SUCCESS
)
170 errorMessage
->assign(makeOpenClInternalErrorString("clCreateProgramWithSource", status
));
174 if ((status
= clBuildProgram(program
, 0, nullptr, nullptr, nullptr, nullptr)) != CL_SUCCESS
)
176 errorMessage
->assign(makeOpenClInternalErrorString("clBuildProgram", status
));
180 ClKernel
kernel(clCreateKernel(program
, "dummyKernel", &status
));
181 if (status
!= CL_SUCCESS
)
183 errorMessage
->assign(makeOpenClInternalErrorString("clCreateKernel", status
));
187 clSetKernelArg(kernel
, 0, sizeof(void*), nullptr);
189 const size_t localWorkSize
= 1, globalWorkSize
= 1;
190 if ((status
= clEnqueueNDRangeKernel(commandQueue
, kernel
, 1, nullptr, &globalWorkSize
,
191 &localWorkSize
, 0, nullptr, nullptr))
194 errorMessage
->assign(makeOpenClInternalErrorString("clEnqueueNDRangeKernel", status
));
201 * \brief Checks that device \c devInfo is compatible with GROMACS.
203 * Vendor and OpenCL version support checks are executed an the result
206 * \param[in] devInfo The device info pointer.
207 * \returns The result of the compatibility checks.
209 static int isDeviceSupported(const gmx_device_info_t
* devInfo
)
211 if (getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK") != nullptr)
213 // Assume the device is compatible because checking has been disabled.
214 return egpuCompatible
;
217 // OpenCL device version check, ensure >= REQUIRED_OPENCL_MIN_VERSION
218 constexpr unsigned int minVersionMajor
= REQUIRED_OPENCL_MIN_VERSION_MAJOR
;
219 constexpr unsigned int minVersionMinor
= REQUIRED_OPENCL_MIN_VERSION_MINOR
;
221 // Based on the OpenCL spec we're checking the version supported by
222 // the device which has the following format:
223 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
224 unsigned int deviceVersionMinor
, deviceVersionMajor
;
225 const int valuesScanned
= std::sscanf(devInfo
->device_version
, "OpenCL %u.%u",
226 &deviceVersionMajor
, &deviceVersionMinor
);
227 const bool versionLargeEnough
=
228 ((valuesScanned
== 2)
229 && ((deviceVersionMajor
> minVersionMajor
)
230 || (deviceVersionMajor
== minVersionMajor
&& deviceVersionMinor
>= minVersionMinor
)));
231 if (!versionLargeEnough
)
233 return egpuIncompatible
;
236 /* Only AMD, Intel, and NVIDIA GPUs are supported for now */
237 switch (devInfo
->vendor_e
)
239 case OCL_VENDOR_NVIDIA
: return egpuCompatible
;
241 return runningOnCompatibleOSForAmd() ? egpuCompatible
: egpuIncompatible
;
242 case OCL_VENDOR_INTEL
:
243 return GMX_OPENCL_NB_CLUSTER_SIZE
== 4 ? egpuCompatible
: egpuIncompatibleClusterSize
;
244 default: return egpuIncompatible
;
249 /*! \brief Check whether the \c ocl_gpu_device is suitable for use by mdrun
251 * Runs sanity checks: checking that the runtime can compile a dummy kernel
252 * and this can be executed;
253 * Runs compatibility checks verifying the device OpenCL version requirement
254 * and vendor/OS support.
256 * \param[in] deviceId The runtime-reported numeric ID of the device.
257 * \param[in] deviceInfo The device info pointer.
258 * \returns An e_gpu_detect_res_t to indicate how the GPU coped with
259 * the sanity and compatibility check.
261 static int checkGpu(size_t deviceId
, const gmx_device_info_t
* deviceInfo
)
264 int supportStatus
= isDeviceSupported(deviceInfo
);
265 if (supportStatus
!= egpuCompatible
)
267 return supportStatus
;
270 std::string errorMessage
;
271 if (!isDeviceSane(deviceInfo
, &errorMessage
))
273 gmx_warning("While sanity checking device #%zu, %s", deviceId
, errorMessage
.c_str());
277 return egpuCompatible
;
282 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
284 * \param[in] vendor_name String with OpenCL vendor name.
285 * \returns ocl_vendor_id_t value for the input vendor_name
287 static ocl_vendor_id_t
get_vendor_id(char* vendor_name
)
291 if (strstr(vendor_name
, "NVIDIA"))
293 return OCL_VENDOR_NVIDIA
;
295 else if (strstr(vendor_name
, "AMD") || strstr(vendor_name
, "Advanced Micro Devices"))
297 return OCL_VENDOR_AMD
;
299 else if (strstr(vendor_name
, "Intel"))
301 return OCL_VENDOR_INTEL
;
304 return OCL_VENDOR_UNKNOWN
;
307 bool isGpuDetectionFunctional(std::string
* errorMessage
)
309 cl_uint numPlatforms
;
310 cl_int status
= clGetPlatformIDs(0, nullptr, &numPlatforms
);
311 GMX_ASSERT(status
!= CL_INVALID_VALUE
, "Incorrect call of clGetPlatformIDs detected");
313 if (status
== CL_PLATFORM_NOT_FOUND_KHR
)
315 // No valid ICDs found
316 if (errorMessage
!= nullptr)
318 errorMessage
->assign("No valid OpenCL driver found");
324 status
== CL_SUCCESS
,
325 gmx::formatString("An unexpected value was returned from clGetPlatformIDs %d: %s",
326 status
, ocl_get_error_string(status
).c_str())
328 bool foundPlatform
= (numPlatforms
> 0);
329 if (!foundPlatform
&& errorMessage
!= nullptr)
331 errorMessage
->assign("No OpenCL platforms found even though the driver was valid");
333 return foundPlatform
;
336 void findGpus(gmx_gpu_info_t
* gpu_info
)
338 cl_uint ocl_platform_count
;
339 cl_platform_id
* ocl_platform_ids
;
340 cl_device_type req_dev_type
= CL_DEVICE_TYPE_GPU
;
342 ocl_platform_ids
= nullptr;
344 if (getenv("GMX_OCL_FORCE_CPU") != nullptr)
346 req_dev_type
= CL_DEVICE_TYPE_CPU
;
351 cl_int status
= clGetPlatformIDs(0, nullptr, &ocl_platform_count
);
352 if (CL_SUCCESS
!= status
)
354 GMX_THROW(gmx::InternalError(
355 gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ", status
)
356 + ocl_get_error_string(status
)));
359 if (1 > ocl_platform_count
)
361 // TODO this should have a descriptive error message that we only support one OpenCL platform
365 snew(ocl_platform_ids
, ocl_platform_count
);
367 status
= clGetPlatformIDs(ocl_platform_count
, ocl_platform_ids
, nullptr);
368 if (CL_SUCCESS
!= status
)
370 GMX_THROW(gmx::InternalError(
371 gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ", status
)
372 + ocl_get_error_string(status
)));
375 for (unsigned int i
= 0; i
< ocl_platform_count
; i
++)
377 cl_uint ocl_device_count
;
379 /* If requesting req_dev_type devices fails, just go to the next platform */
380 if (CL_SUCCESS
!= clGetDeviceIDs(ocl_platform_ids
[i
], req_dev_type
, 0, nullptr, &ocl_device_count
))
385 if (1 <= ocl_device_count
)
387 gpu_info
->n_dev
+= ocl_device_count
;
391 if (1 > gpu_info
->n_dev
)
396 snew(gpu_info
->gpu_dev
, gpu_info
->n_dev
);
400 cl_device_id
* ocl_device_ids
;
402 snew(ocl_device_ids
, gpu_info
->n_dev
);
405 for (unsigned int i
= 0; i
< ocl_platform_count
; i
++)
407 cl_uint ocl_device_count
;
409 /* If requesting req_dev_type devices fails, just go to the next platform */
411 != clGetDeviceIDs(ocl_platform_ids
[i
], req_dev_type
, gpu_info
->n_dev
,
412 ocl_device_ids
, &ocl_device_count
))
417 if (1 > ocl_device_count
)
422 for (unsigned int j
= 0; j
< ocl_device_count
; j
++)
424 gpu_info
->gpu_dev
[device_index
].ocl_gpu_id
.ocl_platform_id
= ocl_platform_ids
[i
];
425 gpu_info
->gpu_dev
[device_index
].ocl_gpu_id
.ocl_device_id
= ocl_device_ids
[j
];
427 gpu_info
->gpu_dev
[device_index
].device_name
[0] = 0;
428 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_NAME
,
429 sizeof(gpu_info
->gpu_dev
[device_index
].device_name
),
430 gpu_info
->gpu_dev
[device_index
].device_name
, nullptr);
432 gpu_info
->gpu_dev
[device_index
].device_version
[0] = 0;
433 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_VERSION
,
434 sizeof(gpu_info
->gpu_dev
[device_index
].device_version
),
435 gpu_info
->gpu_dev
[device_index
].device_version
, nullptr);
437 gpu_info
->gpu_dev
[device_index
].device_vendor
[0] = 0;
438 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_VENDOR
,
439 sizeof(gpu_info
->gpu_dev
[device_index
].device_vendor
),
440 gpu_info
->gpu_dev
[device_index
].device_vendor
, nullptr);
442 gpu_info
->gpu_dev
[device_index
].compute_units
= 0;
443 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_MAX_COMPUTE_UNITS
,
444 sizeof(gpu_info
->gpu_dev
[device_index
].compute_units
),
445 &(gpu_info
->gpu_dev
[device_index
].compute_units
), nullptr);
447 gpu_info
->gpu_dev
[device_index
].adress_bits
= 0;
448 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_ADDRESS_BITS
,
449 sizeof(gpu_info
->gpu_dev
[device_index
].adress_bits
),
450 &(gpu_info
->gpu_dev
[device_index
].adress_bits
), nullptr);
452 gpu_info
->gpu_dev
[device_index
].vendor_e
=
453 get_vendor_id(gpu_info
->gpu_dev
[device_index
].device_vendor
);
455 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_MAX_WORK_ITEM_SIZES
, 3 * sizeof(size_t),
456 &gpu_info
->gpu_dev
[device_index
].maxWorkItemSizes
, nullptr);
458 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_MAX_WORK_GROUP_SIZE
, sizeof(size_t),
459 &gpu_info
->gpu_dev
[device_index
].maxWorkGroupSize
, nullptr);
461 gpu_info
->gpu_dev
[device_index
].stat
=
462 gmx::checkGpu(device_index
, gpu_info
->gpu_dev
+ device_index
);
464 if (egpuCompatible
== gpu_info
->gpu_dev
[device_index
].stat
)
466 gpu_info
->n_dev_compatible
++;
473 gpu_info
->n_dev
= device_index
;
475 /* Dummy sort of devices - AMD first, then NVIDIA, then Intel */
476 // TODO: Sort devices based on performance.
477 if (0 < gpu_info
->n_dev
)
480 for (int i
= 0; i
< gpu_info
->n_dev
; i
++)
482 if (OCL_VENDOR_AMD
== gpu_info
->gpu_dev
[i
].vendor_e
)
488 gmx_device_info_t ocl_gpu_info
;
489 ocl_gpu_info
= gpu_info
->gpu_dev
[i
];
490 gpu_info
->gpu_dev
[i
] = gpu_info
->gpu_dev
[last
];
491 gpu_info
->gpu_dev
[last
] = ocl_gpu_info
;
496 /* if more than 1 device left to be sorted */
497 if ((gpu_info
->n_dev
- 1 - last
) > 1)
499 for (int i
= 0; i
< gpu_info
->n_dev
; i
++)
501 if (OCL_VENDOR_NVIDIA
== gpu_info
->gpu_dev
[i
].vendor_e
)
507 gmx_device_info_t ocl_gpu_info
;
508 ocl_gpu_info
= gpu_info
->gpu_dev
[i
];
509 gpu_info
->gpu_dev
[i
] = gpu_info
->gpu_dev
[last
];
510 gpu_info
->gpu_dev
[last
] = ocl_gpu_info
;
517 sfree(ocl_device_ids
);
523 sfree(ocl_platform_ids
);
526 void get_gpu_device_info_string(char* s
, const gmx_gpu_info_t
& gpu_info
, int index
)
530 if (index
< 0 && index
>= gpu_info
.n_dev
)
535 gmx_device_info_t
* dinfo
= &gpu_info
.gpu_dev
[index
];
537 bool bGpuExists
= (dinfo
->stat
!= egpuNonexistent
&& dinfo
->stat
!= egpuInsane
);
541 sprintf(s
, "#%d: %s, stat: %s", index
, "N/A", gpu_detect_res_str
[dinfo
->stat
]);
545 sprintf(s
, "#%d: name: %s, vendor: %s, device version: %s, stat: %s", index
, dinfo
->device_name
,
546 dinfo
->device_vendor
, dinfo
->device_version
, gpu_detect_res_str
[dinfo
->stat
]);
551 void init_gpu(const gmx_device_info_t
* deviceInfo
)
555 // If the device is NVIDIA, for safety reasons we disable the JIT
556 // caching as this is known to be broken at least until driver 364.19;
557 // the cache does not always get regenerated when the source code changes,
558 // e.g. if the path to the kernel sources remains the same
560 if (deviceInfo
->vendor_e
== OCL_VENDOR_NVIDIA
)
562 // Ignore return values, failing to set the variable does not mean
563 // that something will go wrong later.
565 _putenv("CUDA_CACHE_DISABLE=1");
567 // Don't override, maybe a dev is testing.
568 setenv("CUDA_CACHE_DISABLE", "1", 0);
573 gmx_device_info_t
* getDeviceInfo(const gmx_gpu_info_t
& gpu_info
, int deviceId
)
575 if (deviceId
< 0 || deviceId
>= gpu_info
.n_dev
)
577 gmx_incons("Invalid GPU deviceId requested");
579 return &gpu_info
.gpu_dev
[deviceId
];
582 size_t sizeof_gpu_dev_info()
584 return sizeof(gmx_device_info_t
);
587 int gpu_info_get_stat(const gmx_gpu_info_t
& info
, int index
)
589 return info
.gpu_dev
[index
].stat
;