2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Define functions for detection and initialization for OpenCL devices.
38 * \author Anca Hamuraru <anca@streamcomputing.eu>
39 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
40 * \author Teemu Virolainen <teemu@streamcomputing.eu>
50 # include <sys/sysctl.h>
55 #include "gromacs/gpu_utils/gpu_utils.h"
56 #include "gromacs/gpu_utils/ocl_compiler.h"
57 #include "gromacs/gpu_utils/oclutils.h"
58 #include "gromacs/hardware/hw_info.h"
59 #include "gromacs/utility/cstringutil.h"
60 #include "gromacs/utility/exceptions.h"
61 #include "gromacs/utility/fatalerror.h"
62 #include "gromacs/utility/smalloc.h"
63 #include "gromacs/utility/stringutil.h"
65 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
67 * This is assumed to be true for OS X version of at least 10.10.4 and
68 * all other OS flavors.
70 * Uses the BSD sysctl() interfaces to extract the kernel version.
72 * \return true if version is 14.4 or later (= OS X version 10.10.4),
73 * or OS is not Darwin.
76 runningOnCompatibleOSForAmd()
80 char kernelVersion
[256];
81 size_t len
= sizeof(kernelVersion
);
84 mib
[1] = KERN_OSRELEASE
;
86 sysctl(mib
, sizeof(mib
)/sizeof(mib
[0]), kernelVersion
, &len
, NULL
, 0);
88 int major
= strtod(kernelVersion
, NULL
);
89 int minor
= strtod(strchr(kernelVersion
, '.')+1, NULL
);
91 // Kernel 14.4 corresponds to OS X 10.10.4
92 return (major
> 14 || (major
== 14 && minor
>= 4));
98 /*! \brief Returns true if the gpu characterized by the device properties is
99 * supported by the native gpu acceleration.
100 * \returns true if the GPU properties passed indicate a compatible
101 * GPU, otherwise false.
103 static int is_gmx_supported_gpu_id(struct gmx_device_info_t
*ocl_gpu_device
)
105 if ((getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK")) != NULL
)
107 return egpuCompatible
;
110 /* Only AMD and NVIDIA GPUs are supported for now */
111 switch (ocl_gpu_device
->vendor_e
)
113 case OCL_VENDOR_NVIDIA
:
114 return egpuCompatible
;
116 return runningOnCompatibleOSForAmd() ? egpuCompatible
: egpuIncompatible
;
118 return egpuIncompatible
;
123 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
125 * \param[in] vendor_name String with OpenCL vendor name.
126 * \returns ocl_vendor_id_t value for the input vendor_name
128 static ocl_vendor_id_t
get_vendor_id(char *vendor_name
)
132 if (strstr(vendor_name
, "NVIDIA"))
134 return OCL_VENDOR_NVIDIA
;
137 if (strstr(vendor_name
, "AMD") ||
138 strstr(vendor_name
, "Advanced Micro Devices"))
140 return OCL_VENDOR_AMD
;
143 if (strstr(vendor_name
, "Intel"))
145 return OCL_VENDOR_INTEL
;
148 return OCL_VENDOR_UNKNOWN
;
152 //! This function is documented in the header file
153 bool canDetectGpus(std::string
*errorMessage
)
155 cl_uint numPlatforms
;
156 cl_int status
= clGetPlatformIDs(0, nullptr, &numPlatforms
);
157 GMX_ASSERT(status
!= CL_INVALID_VALUE
, "Incorrect call of clGetPlatformIDs detected");
159 if (status
== CL_PLATFORM_NOT_FOUND_KHR
)
161 // No valid ICDs found
162 if (errorMessage
!= nullptr)
164 errorMessage
->assign("No valid OpenCL driver found");
169 GMX_RELEASE_ASSERT(status
== CL_SUCCESS
,
170 gmx::formatString("An unexpected value was returned from clGetPlatformIDs %u: %s",
171 status
, ocl_get_error_string(status
).c_str()).c_str());
172 bool foundPlatform
= (numPlatforms
> 0);
173 if (!foundPlatform
&& errorMessage
!= nullptr)
175 errorMessage
->assign("No OpenCL platforms found even though the driver was valid");
177 return foundPlatform
;
180 //! This function is documented in the header file
181 void findGpus(gmx_gpu_info_t
*gpu_info
)
183 cl_uint ocl_platform_count
;
184 cl_platform_id
*ocl_platform_ids
;
185 cl_device_type req_dev_type
= CL_DEVICE_TYPE_GPU
;
187 ocl_platform_ids
= NULL
;
189 if (getenv("GMX_OCL_FORCE_CPU") != NULL
)
191 req_dev_type
= CL_DEVICE_TYPE_CPU
;
196 cl_int status
= clGetPlatformIDs(0, NULL
, &ocl_platform_count
);
197 if (CL_SUCCESS
!= status
)
199 GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %u was returned from clGetPlatformIDs: ",
200 status
) + ocl_get_error_string(status
)));
203 if (1 > ocl_platform_count
)
205 // TODO this should have a descriptive error message that we only support one OpenCL platform
209 snew(ocl_platform_ids
, ocl_platform_count
);
211 status
= clGetPlatformIDs(ocl_platform_count
, ocl_platform_ids
, NULL
);
212 if (CL_SUCCESS
!= status
)
214 GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %u was returned from clGetPlatformIDs: ",
215 status
) + ocl_get_error_string(status
)));
218 for (unsigned int i
= 0; i
< ocl_platform_count
; i
++)
220 cl_uint ocl_device_count
;
222 /* If requesting req_dev_type devices fails, just go to the next platform */
223 if (CL_SUCCESS
!= clGetDeviceIDs(ocl_platform_ids
[i
], req_dev_type
, 0, NULL
, &ocl_device_count
))
228 if (1 <= ocl_device_count
)
230 gpu_info
->n_dev
+= ocl_device_count
;
234 if (1 > gpu_info
->n_dev
)
239 snew(gpu_info
->gpu_dev
, gpu_info
->n_dev
);
243 cl_device_id
*ocl_device_ids
;
245 snew(ocl_device_ids
, gpu_info
->n_dev
);
248 for (unsigned int i
= 0; i
< ocl_platform_count
; i
++)
250 cl_uint ocl_device_count
;
252 /* If requesting req_dev_type devices fails, just go to the next platform */
253 if (CL_SUCCESS
!= clGetDeviceIDs(ocl_platform_ids
[i
], req_dev_type
, gpu_info
->n_dev
, ocl_device_ids
, &ocl_device_count
))
258 if (1 > ocl_device_count
)
263 for (unsigned int j
= 0; j
< ocl_device_count
; j
++)
265 gpu_info
->gpu_dev
[device_index
].ocl_gpu_id
.ocl_platform_id
= ocl_platform_ids
[i
];
266 gpu_info
->gpu_dev
[device_index
].ocl_gpu_id
.ocl_device_id
= ocl_device_ids
[j
];
268 gpu_info
->gpu_dev
[device_index
].device_name
[0] = 0;
269 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_NAME
, sizeof(gpu_info
->gpu_dev
[device_index
].device_name
), gpu_info
->gpu_dev
[device_index
].device_name
, NULL
);
271 gpu_info
->gpu_dev
[device_index
].device_version
[0] = 0;
272 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_VERSION
, sizeof(gpu_info
->gpu_dev
[device_index
].device_version
), gpu_info
->gpu_dev
[device_index
].device_version
, NULL
);
274 gpu_info
->gpu_dev
[device_index
].device_vendor
[0] = 0;
275 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_VENDOR
, sizeof(gpu_info
->gpu_dev
[device_index
].device_vendor
), gpu_info
->gpu_dev
[device_index
].device_vendor
, NULL
);
277 gpu_info
->gpu_dev
[device_index
].compute_units
= 0;
278 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_MAX_COMPUTE_UNITS
, sizeof(gpu_info
->gpu_dev
[device_index
].compute_units
), &(gpu_info
->gpu_dev
[device_index
].compute_units
), NULL
);
280 gpu_info
->gpu_dev
[device_index
].adress_bits
= 0;
281 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_ADDRESS_BITS
, sizeof(gpu_info
->gpu_dev
[device_index
].adress_bits
), &(gpu_info
->gpu_dev
[device_index
].adress_bits
), NULL
);
283 gpu_info
->gpu_dev
[device_index
].vendor_e
= get_vendor_id(gpu_info
->gpu_dev
[device_index
].device_vendor
);
285 gpu_info
->gpu_dev
[device_index
].stat
= is_gmx_supported_gpu_id(gpu_info
->gpu_dev
+ device_index
);
287 if (egpuCompatible
== gpu_info
->gpu_dev
[device_index
].stat
)
289 gpu_info
->n_dev_compatible
++;
296 gpu_info
->n_dev
= device_index
;
298 /* Dummy sort of devices - AMD first, then NVIDIA, then Intel */
299 // TODO: Sort devices based on performance.
300 if (0 < gpu_info
->n_dev
)
303 for (int i
= 0; i
< gpu_info
->n_dev
; i
++)
305 if (OCL_VENDOR_AMD
== gpu_info
->gpu_dev
[i
].vendor_e
)
311 gmx_device_info_t ocl_gpu_info
;
312 ocl_gpu_info
= gpu_info
->gpu_dev
[i
];
313 gpu_info
->gpu_dev
[i
] = gpu_info
->gpu_dev
[last
];
314 gpu_info
->gpu_dev
[last
] = ocl_gpu_info
;
319 /* if more than 1 device left to be sorted */
320 if ((gpu_info
->n_dev
- 1 - last
) > 1)
322 for (int i
= 0; i
< gpu_info
->n_dev
; i
++)
324 if (OCL_VENDOR_NVIDIA
== gpu_info
->gpu_dev
[i
].vendor_e
)
330 gmx_device_info_t ocl_gpu_info
;
331 ocl_gpu_info
= gpu_info
->gpu_dev
[i
];
332 gpu_info
->gpu_dev
[i
] = gpu_info
->gpu_dev
[last
];
333 gpu_info
->gpu_dev
[last
] = ocl_gpu_info
;
340 sfree(ocl_device_ids
);
346 sfree(ocl_platform_ids
);
349 //! This function is documented in the header file
350 void free_gpu_info(const gmx_gpu_info_t gmx_unused
*gpu_info
)
352 if (gpu_info
== NULL
)
357 sfree(gpu_info
->gpu_dev
);
360 //! This function is documented in the header file
361 std::vector
<int> getCompatibleGpus(const gmx_gpu_info_t
&gpu_info
)
363 // Possible minor over-allocation here, but not important for anything
364 std::vector
<int> compatibleGpus
;
365 compatibleGpus
.reserve(gpu_info
.n_dev
);
366 for (int i
= 0; i
< gpu_info
.n_dev
; i
++)
368 assert(gpu_info
.gpu_dev
);
369 if (gpu_info
.gpu_dev
[i
].stat
== egpuCompatible
)
371 compatibleGpus
.push_back(i
);
374 return compatibleGpus
;
377 //! This function is documented in the header file
378 const char *getGpuCompatibilityDescription(const gmx_gpu_info_t
&gpu_info
,
381 return (index
>= gpu_info
.n_dev
?
382 gpu_detect_res_str
[egpuNonexistent
] :
383 gpu_detect_res_str
[gpu_info
.gpu_dev
[index
].stat
]);
386 //! This function is documented in the header file
387 void get_gpu_device_info_string(char *s
, const gmx_gpu_info_t
&gpu_info
, int index
)
391 if (index
< 0 && index
>= gpu_info
.n_dev
)
396 gmx_device_info_t
*dinfo
= &gpu_info
.gpu_dev
[index
];
399 dinfo
->stat
== egpuCompatible
||
400 dinfo
->stat
== egpuIncompatible
;
404 sprintf(s
, "#%d: %s, stat: %s",
406 gpu_detect_res_str
[dinfo
->stat
]);
410 sprintf(s
, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
411 index
, dinfo
->device_name
, dinfo
->device_vendor
,
412 dinfo
->device_version
,
413 gpu_detect_res_str
[dinfo
->stat
]);
417 //! This function is documented in the header file
418 void init_gpu(const gmx::MDLogger
& /*mdlog*/,
419 gmx_device_info_t
*deviceInfo
)
423 // If the device is NVIDIA, for safety reasons we disable the JIT
424 // caching as this is known to be broken at least until driver 364.19;
425 // the cache does not always get regenerated when the source code changes,
426 // e.g. if the path to the kernel sources remains the same
428 if (deviceInfo
->vendor_e
== OCL_VENDOR_NVIDIA
)
430 // Ignore return values, failing to set the variable does not mean
431 // that something will go wrong later.
433 _putenv("CUDA_CACHE_DISABLE=1");
435 // Don't override, maybe a dev is testing.
436 setenv("CUDA_CACHE_DISABLE", "1", 0);
441 //! This function is documented in the header file
442 gmx_device_info_t
*getDeviceInfo(const gmx_gpu_info_t
&gpu_info
,
445 if (deviceId
< 0 || deviceId
>= gpu_info
.n_dev
)
447 gmx_incons("Invalid GPU deviceId requested");
449 return &gpu_info
.gpu_dev
[deviceId
];
452 //! This function is documented in the header file
453 size_t sizeof_gpu_dev_info(void)
455 return sizeof(gmx_device_info_t
);
458 void gpu_set_host_malloc_and_free(bool bUseGpuKernels
,
459 gmx_host_alloc_t
**nb_alloc
,
460 gmx_host_free_t
**nb_free
)
464 *nb_alloc
= &ocl_pmalloc
;
465 *nb_free
= &ocl_pfree
;