src/gromacs/gpu_utils/gpu_utils_ocl.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *  \brief Define functions for detection and initialization for OpenCL devices.
  37  *
  38  *  \author Anca Hamuraru <anca@streamcomputing.eu>
  39  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
  40  *  \author Teemu Virolainen <teemu@streamcomputing.eu>
  41  */
  42
  43 #include "gmxpre.h"
  44
  45 #include <assert.h>
  46 #include <stdio.h>
  47 #include <stdlib.h>
  48 #include <string.h>
  49 #ifdef __APPLE__
  50 #    include <sys/sysctl.h>
  51 #endif
  52
  53 #include <memory.h>
  54
  55 #include "gromacs/gpu_utils/gpu_utils.h"
  56 #include "gromacs/gpu_utils/ocl_compiler.h"
  57 #include "gromacs/gpu_utils/oclutils.h"
  58 #include "gromacs/hardware/hw_info.h"
  59 #include "gromacs/utility/cstringutil.h"
  60 #include "gromacs/utility/exceptions.h"
  61 #include "gromacs/utility/fatalerror.h"
  62 #include "gromacs/utility/smalloc.h"
  63 #include "gromacs/utility/stringutil.h"
  64
  65 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
  66  *
  67  * This is assumed to be true for OS X version of at least 10.10.4 and
  68  * all other OS flavors.
  69  *
  70  * Uses the BSD sysctl() interfaces to extract the kernel version.
  71  *
  72  * \return true if version is 14.4 or later (= OS X version 10.10.4),
  73  *         or OS is not Darwin.
  74  */
  75 static bool
  76 runningOnCompatibleOSForAmd()
  77 {
  78 #ifdef __APPLE__
  79     int    mib[2];
  80     char   kernelVersion[256];
  81     size_t len = sizeof(kernelVersion);
  82
  83     mib[0] = CTL_KERN;
  84     mib[1] = KERN_OSRELEASE;
  85
  86     sysctl(mib, sizeof(mib)/sizeof(mib[0]), kernelVersion, &len, NULL, 0);
  87
  88     int major = strtod(kernelVersion, NULL);
  89     int minor = strtod(strchr(kernelVersion, '.')+1, NULL);
  90
  91     // Kernel 14.4 corresponds to OS X 10.10.4
  92     return (major > 14 || (major == 14 && minor >= 4));
  93 #else
  94     return true;
  95 #endif
  96 }
  97
  98 /*! \brief Returns true if the gpu characterized by the device properties is
  99  *  supported by the native gpu acceleration.
 100  * \returns             true if the GPU properties passed indicate a compatible
 101  *                      GPU, otherwise false.
 102  */
 103 static int is_gmx_supported_gpu_id(struct gmx_device_info_t *ocl_gpu_device)
 104 {
 105     if ((getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK")) != NULL)
 106     {
 107         return egpuCompatible;
 108     }
 109
 110     /* Only AMD and NVIDIA GPUs are supported for now */
 111     switch (ocl_gpu_device->vendor_e)
 112     {
 113         case OCL_VENDOR_NVIDIA:
 114             return egpuCompatible;
 115         case OCL_VENDOR_AMD:
 116             return runningOnCompatibleOSForAmd() ? egpuCompatible : egpuIncompatible;
 117         default:
 118             return egpuIncompatible;
 119     }
 120 }
 121
 122
 123 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
 124  *
 125  *  \param[in] vendor_name String with OpenCL vendor name.
 126  *  \returns               ocl_vendor_id_t value for the input vendor_name
 127  */
 128 static ocl_vendor_id_t get_vendor_id(char *vendor_name)
 129 {
 130     if (vendor_name)
 131     {
 132         if (strstr(vendor_name, "NVIDIA"))
 133         {
 134             return OCL_VENDOR_NVIDIA;
 135         }
 136         else
 137         if (strstr(vendor_name, "AMD") ||
 138             strstr(vendor_name, "Advanced Micro Devices"))
 139         {
 140             return OCL_VENDOR_AMD;
 141         }
 142         else
 143         if (strstr(vendor_name, "Intel"))
 144         {
 145             return OCL_VENDOR_INTEL;
 146         }
 147     }
 148     return OCL_VENDOR_UNKNOWN;
 149 }
 150
 151
 152 //! This function is documented in the header file
 153 bool canDetectGpus(std::string *errorMessage)
 154 {
 155     cl_uint numPlatforms;
 156     cl_int  status       = clGetPlatformIDs(0, nullptr, &numPlatforms);
 157     GMX_ASSERT(status != CL_INVALID_VALUE, "Incorrect call of clGetPlatformIDs detected");
 158 #ifdef cl_khr_icd
 159     if (status == CL_PLATFORM_NOT_FOUND_KHR)
 160     {
 161         // No valid ICDs found
 162         if (errorMessage != nullptr)
 163         {
 164             errorMessage->assign("No valid OpenCL driver found");
 165         }
 166         return false;
 167     }
 168 #endif
 169     GMX_RELEASE_ASSERT(status == CL_SUCCESS,
 170                        gmx::formatString("An unexpected value was returned from clGetPlatformIDs %u: %s",
 171                                          status, ocl_get_error_string(status).c_str()).c_str());
 172     bool foundPlatform = (numPlatforms > 0);
 173     if (!foundPlatform && errorMessage != nullptr)
 174     {
 175         errorMessage->assign("No OpenCL platforms found even though the driver was valid");
 176     }
 177     return foundPlatform;
 178 }
 179
 180 //! This function is documented in the header file
 181 void findGpus(gmx_gpu_info_t *gpu_info)
 182 {
 183     cl_uint         ocl_platform_count;
 184     cl_platform_id *ocl_platform_ids;
 185     cl_device_type  req_dev_type = CL_DEVICE_TYPE_GPU;
 186
 187     ocl_platform_ids = NULL;
 188
 189     if (getenv("GMX_OCL_FORCE_CPU") != NULL)
 190     {
 191         req_dev_type = CL_DEVICE_TYPE_CPU;
 192     }
 193
 194     while (1)
 195     {
 196         cl_int status = clGetPlatformIDs(0, NULL, &ocl_platform_count);
 197         if (CL_SUCCESS != status)
 198         {
 199             GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %u was returned from clGetPlatformIDs: ",
 200                                                            status) + ocl_get_error_string(status)));
 201         }
 202
 203         if (1 > ocl_platform_count)
 204         {
 205             // TODO this should have a descriptive error message that we only support one OpenCL platform
 206             break;
 207         }
 208
 209         snew(ocl_platform_ids, ocl_platform_count);
 210
 211         status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, NULL);
 212         if (CL_SUCCESS != status)
 213         {
 214             GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %u was returned from clGetPlatformIDs: ",
 215                                                            status) + ocl_get_error_string(status)));
 216         }
 217
 218         for (unsigned int i = 0; i < ocl_platform_count; i++)
 219         {
 220             cl_uint ocl_device_count;
 221
 222             /* If requesting req_dev_type devices fails, just go to the next platform */
 223             if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, NULL, &ocl_device_count))
 224             {
 225                 continue;
 226             }
 227
 228             if (1 <= ocl_device_count)
 229             {
 230                 gpu_info->n_dev += ocl_device_count;
 231             }
 232         }
 233
 234         if (1 > gpu_info->n_dev)
 235         {
 236             break;
 237         }
 238
 239         snew(gpu_info->gpu_dev, gpu_info->n_dev);
 240
 241         {
 242             int           device_index;
 243             cl_device_id *ocl_device_ids;
 244
 245             snew(ocl_device_ids, gpu_info->n_dev);
 246             device_index = 0;
 247
 248             for (unsigned int i = 0; i < ocl_platform_count; i++)
 249             {
 250                 cl_uint ocl_device_count;
 251
 252                 /* If requesting req_dev_type devices fails, just go to the next platform */
 253                 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev, ocl_device_ids, &ocl_device_count))
 254                 {
 255                     continue;
 256                 }
 257
 258                 if (1 > ocl_device_count)
 259                 {
 260                     break;
 261                 }
 262
 263                 for (unsigned int j = 0; j < ocl_device_count; j++)
 264                 {
 265                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
 266                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id   = ocl_device_ids[j];
 267
 268                     gpu_info->gpu_dev[device_index].device_name[0] = 0;
 269                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, NULL);
 270
 271                     gpu_info->gpu_dev[device_index].device_version[0] = 0;
 272                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, NULL);
 273
 274                     gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
 275                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, NULL);
 276
 277                     gpu_info->gpu_dev[device_index].compute_units = 0;
 278                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), NULL);
 279
 280                     gpu_info->gpu_dev[device_index].adress_bits = 0;
 281                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), NULL);
 282
 283                     gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
 284
 285                     gpu_info->gpu_dev[device_index].stat = is_gmx_supported_gpu_id(gpu_info->gpu_dev + device_index);
 286
 287                     if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
 288                     {
 289                         gpu_info->n_dev_compatible++;
 290                     }
 291
 292                     device_index++;
 293                 }
 294             }
 295
 296             gpu_info->n_dev = device_index;
 297
 298             /* Dummy sort of devices -  AMD first, then NVIDIA, then Intel */
 299             // TODO: Sort devices based on performance.
 300             if (0 < gpu_info->n_dev)
 301             {
 302                 int last = -1;
 303                 for (int i = 0; i < gpu_info->n_dev; i++)
 304                 {
 305                     if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
 306                     {
 307                         last++;
 308
 309                         if (last < i)
 310                         {
 311                             gmx_device_info_t ocl_gpu_info;
 312                             ocl_gpu_info            = gpu_info->gpu_dev[i];
 313                             gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 314                             gpu_info->gpu_dev[last] = ocl_gpu_info;
 315                         }
 316                     }
 317                 }
 318
 319                 /* if more than 1 device left to be sorted */
 320                 if ((gpu_info->n_dev - 1 - last) > 1)
 321                 {
 322                     for (int i = 0; i < gpu_info->n_dev; i++)
 323                     {
 324                         if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
 325                         {
 326                             last++;
 327
 328                             if (last < i)
 329                             {
 330                                 gmx_device_info_t ocl_gpu_info;
 331                                 ocl_gpu_info            = gpu_info->gpu_dev[i];
 332                                 gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 333                                 gpu_info->gpu_dev[last] = ocl_gpu_info;
 334                             }
 335                         }
 336                     }
 337                 }
 338             }
 339
 340             sfree(ocl_device_ids);
 341         }
 342
 343         break;
 344     }
 345
 346     sfree(ocl_platform_ids);
 347 }
 348
 349 //! This function is documented in the header file
 350 void free_gpu_info(const gmx_gpu_info_t gmx_unused *gpu_info)
 351 {
 352     if (gpu_info == NULL)
 353     {
 354         return;
 355     }
 356
 357     sfree(gpu_info->gpu_dev);
 358 }
 359
 360 //! This function is documented in the header file
 361 std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info)
 362 {
 363     // Possible minor over-allocation here, but not important for anything
 364     std::vector<int> compatibleGpus;
 365     compatibleGpus.reserve(gpu_info.n_dev);
 366     for (int i = 0; i < gpu_info.n_dev; i++)
 367     {
 368         assert(gpu_info.gpu_dev);
 369         if (gpu_info.gpu_dev[i].stat == egpuCompatible)
 370         {
 371             compatibleGpus.push_back(i);
 372         }
 373     }
 374     return compatibleGpus;
 375 }
 376
 377 //! This function is documented in the header file
 378 const char *getGpuCompatibilityDescription(const gmx_gpu_info_t &gpu_info,
 379                                            int                   index)
 380 {
 381     return (index >= gpu_info.n_dev ?
 382             gpu_detect_res_str[egpuNonexistent] :
 383             gpu_detect_res_str[gpu_info.gpu_dev[index].stat]);
 384 }
 385
 386 //! This function is documented in the header file
 387 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t &gpu_info, int index)
 388 {
 389     assert(s);
 390
 391     if (index < 0 && index >= gpu_info.n_dev)
 392     {
 393         return;
 394     }
 395
 396     gmx_device_info_t  *dinfo = &gpu_info.gpu_dev[index];
 397
 398     bool                bGpuExists =
 399         dinfo->stat == egpuCompatible ||
 400         dinfo->stat == egpuIncompatible;
 401
 402     if (!bGpuExists)
 403     {
 404         sprintf(s, "#%d: %s, stat: %s",
 405                 index, "N/A",
 406                 gpu_detect_res_str[dinfo->stat]);
 407     }
 408     else
 409     {
 410         sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
 411                 index, dinfo->device_name, dinfo->device_vendor,
 412                 dinfo->device_version,
 413                 gpu_detect_res_str[dinfo->stat]);
 414     }
 415 }
 416
 417 //! This function is documented in the header file
 418 void init_gpu(const gmx::MDLogger               & /*mdlog*/,
 419               gmx_device_info_t                *deviceInfo)
 420 {
 421     assert(deviceInfo);
 422
 423     // If the device is NVIDIA, for safety reasons we disable the JIT
 424     // caching as this is known to be broken at least until driver 364.19;
 425     // the cache does not always get regenerated when the source code changes,
 426     // e.g. if the path to the kernel sources remains the same
 427
 428     if (deviceInfo->vendor_e == OCL_VENDOR_NVIDIA)
 429     {
 430         // Ignore return values, failing to set the variable does not mean
 431         // that something will go wrong later.
 432 #ifdef _MSC_VER
 433         _putenv("CUDA_CACHE_DISABLE=1");
 434 #else
 435         // Don't override, maybe a dev is testing.
 436         setenv("CUDA_CACHE_DISABLE", "1", 0);
 437 #endif
 438     }
 439 }
 440
 441 //! This function is documented in the header file
 442 gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &gpu_info,
 443                                  int                   deviceId)
 444 {
 445     if (deviceId < 0 || deviceId >= gpu_info.n_dev)
 446     {
 447         gmx_incons("Invalid GPU deviceId requested");
 448     }
 449     return &gpu_info.gpu_dev[deviceId];
 450 }
 451
 452 //! This function is documented in the header file
 453 size_t sizeof_gpu_dev_info(void)
 454 {
 455     return sizeof(gmx_device_info_t);
 456 }
 457
 458 void gpu_set_host_malloc_and_free(bool               bUseGpuKernels,
 459                                   gmx_host_alloc_t **nb_alloc,
 460                                   gmx_host_free_t  **nb_free)
 461 {
 462     if (bUseGpuKernels)
 463     {
 464         *nb_alloc = &ocl_pmalloc;
 465         *nb_free  = &ocl_pfree;
 466     }
 467     else
 468     {
 469         *nb_alloc = NULL;
 470         *nb_free  = NULL;
 471     }
 472 }