src/gromacs/gpu_utils/gpu_utils_ocl.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *  \brief Define functions for detection and initialization for OpenCL devices.
  37  *
  38  *  \author Anca Hamuraru <anca@streamcomputing.eu>
  39  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
  40  *  \author Teemu Virolainen <teemu@streamcomputing.eu>
  41  */
  42
  43 #include "gmxpre.h"
  44
  45 #include <assert.h>
  46 #include <stdio.h>
  47 #include <stdlib.h>
  48 #include <string.h>
  49 #ifdef __APPLE__
  50 #    include <sys/sysctl.h>
  51 #endif
  52
  53 #include <memory.h>
  54
  55 #include "gromacs/gpu_utils/gpu_utils.h"
  56 #include "gromacs/gpu_utils/ocl_compiler.h"
  57 #include "gromacs/gpu_utils/oclutils.h"
  58 #include "gromacs/hardware/hw_info.h"
  59 #include "gromacs/mdtypes/md_enums.h"
  60 #include "gromacs/utility/cstringutil.h"
  61 #include "gromacs/utility/fatalerror.h"
  62 #include "gromacs/utility/smalloc.h"
  63
  64 /*! \brief Helper macro for error handling */
  65 #define CALLOCLFUNC_LOGERROR(func, err_str, retval) { \
  66         cl_int opencl_ret = func; \
  67         if (CL_SUCCESS != opencl_ret) \
  68         { \
  69             sprintf(err_str, "OpenCL error %d", opencl_ret); \
  70             retval = -1; \
  71         } \
  72         else{ \
  73             retval = 0; } \
  74 }
  75
  76
  77 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
  78  *
  79  * This is assumed to be true for OS X version of at least 10.10.4 and
  80  * all other OS flavors.
  81  *
  82  * Uses the BSD sysctl() interfaces to extract the kernel version.
  83  *
  84  * \return true if version is 14.4 or later (= OS X version 10.10.4),
  85  *         or OS is not Darwin.
  86  */
  87 static bool
  88 runningOnCompatibleOSForAmd()
  89 {
  90 #ifdef __APPLE__
  91     int    mib[2];
  92     char   kernelVersion[256];
  93     size_t len = sizeof(kernelVersion);
  94
  95     mib[0] = CTL_KERN;
  96     mib[1] = KERN_OSRELEASE;
  97
  98     sysctl(mib, sizeof(mib)/sizeof(mib[0]), kernelVersion, &len, NULL, 0);
  99
 100     int major = strtod(kernelVersion, NULL);
 101     int minor = strtod(strchr(kernelVersion, '.')+1, NULL);
 102
 103     // Kernel 14.4 corresponds to OS X 10.10.4
 104     return (major > 14 || (major == 14 && minor >= 4));
 105 #else
 106     return true;
 107 #endif
 108 }
 109
 110 /*! \brief Returns true if the gpu characterized by the device properties is
 111  *  supported by the native gpu acceleration.
 112  * \returns             true if the GPU properties passed indicate a compatible
 113  *                      GPU, otherwise false.
 114  */
 115 static int is_gmx_supported_gpu_id(struct gmx_device_info_t *ocl_gpu_device)
 116 {
 117     if ((getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK")) != NULL)
 118     {
 119         return egpuCompatible;
 120     }
 121
 122     /* Only AMD and NVIDIA GPUs are supported for now */
 123     switch (ocl_gpu_device->vendor_e)
 124     {
 125         case OCL_VENDOR_NVIDIA:
 126             return egpuCompatible;
 127         case OCL_VENDOR_AMD:
 128             return runningOnCompatibleOSForAmd() ? egpuCompatible : egpuIncompatible;
 129         default:
 130             return egpuIncompatible;
 131     }
 132 }
 133
 134
 135 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
 136  *
 137  *  \param[in] vendor_name String with OpenCL vendor name.
 138  *  \returns               ocl_vendor_id_t value for the input vendor_name
 139  */
 140 static ocl_vendor_id_t get_vendor_id(char *vendor_name)
 141 {
 142     if (vendor_name)
 143     {
 144         if (strstr(vendor_name, "NVIDIA"))
 145         {
 146             return OCL_VENDOR_NVIDIA;
 147         }
 148         else
 149         if (strstr(vendor_name, "AMD") ||
 150             strstr(vendor_name, "Advanced Micro Devices"))
 151         {
 152             return OCL_VENDOR_AMD;
 153         }
 154         else
 155         if (strstr(vendor_name, "Intel"))
 156         {
 157             return OCL_VENDOR_INTEL;
 158         }
 159     }
 160     return OCL_VENDOR_UNKNOWN;
 161 }
 162
 163
 164 //! This function is documented in the header file
 165 int detect_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
 166 {
 167     int             retval;
 168     cl_uint         ocl_platform_count;
 169     cl_platform_id *ocl_platform_ids;
 170     cl_device_type  req_dev_type = CL_DEVICE_TYPE_GPU;
 171
 172     retval           = 0;
 173     ocl_platform_ids = NULL;
 174
 175     if (getenv("GMX_OCL_FORCE_CPU") != NULL)
 176     {
 177         req_dev_type = CL_DEVICE_TYPE_CPU;
 178     }
 179
 180     while (1)
 181     {
 182         CALLOCLFUNC_LOGERROR(clGetPlatformIDs(0, NULL, &ocl_platform_count), err_str, retval)
 183         if (0 != retval)
 184         {
 185             break;
 186         }
 187
 188         if (1 > ocl_platform_count)
 189         {
 190             break;
 191         }
 192
 193         snew(ocl_platform_ids, ocl_platform_count);
 194
 195         CALLOCLFUNC_LOGERROR(clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, NULL), err_str, retval)
 196         if (0 != retval)
 197         {
 198             break;
 199         }
 200
 201         for (unsigned int i = 0; i < ocl_platform_count; i++)
 202         {
 203             cl_uint ocl_device_count;
 204
 205             /* If requesting req_dev_type devices fails, just go to the next platform */
 206             if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, NULL, &ocl_device_count))
 207             {
 208                 continue;
 209             }
 210
 211             if (1 <= ocl_device_count)
 212             {
 213                 gpu_info->n_dev += ocl_device_count;
 214             }
 215         }
 216
 217         if (1 > gpu_info->n_dev)
 218         {
 219             break;
 220         }
 221
 222         snew(gpu_info->gpu_dev, gpu_info->n_dev);
 223
 224         {
 225             int           device_index;
 226             cl_device_id *ocl_device_ids;
 227
 228             snew(ocl_device_ids, gpu_info->n_dev);
 229             device_index = 0;
 230
 231             for (unsigned int i = 0; i < ocl_platform_count; i++)
 232             {
 233                 cl_uint ocl_device_count;
 234
 235                 /* If requesting req_dev_type devices fails, just go to the next platform */
 236                 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev, ocl_device_ids, &ocl_device_count))
 237                 {
 238                     continue;
 239                 }
 240
 241                 if (1 > ocl_device_count)
 242                 {
 243                     break;
 244                 }
 245
 246                 for (unsigned int j = 0; j < ocl_device_count; j++)
 247                 {
 248                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
 249                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id   = ocl_device_ids[j];
 250
 251                     gpu_info->gpu_dev[device_index].device_name[0] = 0;
 252                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, NULL);
 253
 254                     gpu_info->gpu_dev[device_index].device_version[0] = 0;
 255                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, NULL);
 256
 257                     gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
 258                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, NULL);
 259
 260                     gpu_info->gpu_dev[device_index].compute_units = 0;
 261                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), NULL);
 262
 263                     gpu_info->gpu_dev[device_index].adress_bits = 0;
 264                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), NULL);
 265
 266                     gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
 267
 268                     gpu_info->gpu_dev[device_index].stat = is_gmx_supported_gpu_id(gpu_info->gpu_dev + device_index);
 269
 270                     if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
 271                     {
 272                         gpu_info->n_dev_compatible++;
 273                     }
 274
 275                     device_index++;
 276                 }
 277             }
 278
 279             gpu_info->n_dev = device_index;
 280
 281             /* Dummy sort of devices -  AMD first, then NVIDIA, then Intel */
 282             // TODO: Sort devices based on performance.
 283             if (0 < gpu_info->n_dev)
 284             {
 285                 int last = -1;
 286                 for (int i = 0; i < gpu_info->n_dev; i++)
 287                 {
 288                     if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
 289                     {
 290                         last++;
 291
 292                         if (last < i)
 293                         {
 294                             gmx_device_info_t ocl_gpu_info;
 295                             ocl_gpu_info            = gpu_info->gpu_dev[i];
 296                             gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 297                             gpu_info->gpu_dev[last] = ocl_gpu_info;
 298                         }
 299                     }
 300                 }
 301
 302                 /* if more than 1 device left to be sorted */
 303                 if ((gpu_info->n_dev - 1 - last) > 1)
 304                 {
 305                     for (int i = 0; i < gpu_info->n_dev; i++)
 306                     {
 307                         if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
 308                         {
 309                             last++;
 310
 311                             if (last < i)
 312                             {
 313                                 gmx_device_info_t ocl_gpu_info;
 314                                 ocl_gpu_info            = gpu_info->gpu_dev[i];
 315                                 gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 316                                 gpu_info->gpu_dev[last] = ocl_gpu_info;
 317                             }
 318                         }
 319                     }
 320                 }
 321             }
 322
 323             sfree(ocl_device_ids);
 324         }
 325
 326         break;
 327     }
 328
 329     sfree(ocl_platform_ids);
 330
 331     return retval;
 332 }
 333
 334 //! This function is documented in the header file
 335 void free_gpu_info(const gmx_gpu_info_t gmx_unused *gpu_info)
 336 {
 337     if (gpu_info == NULL)
 338     {
 339         return;
 340     }
 341
 342     sfree(gpu_info->gpu_dev);
 343 }
 344
 345 //! This function is documented in the header file
 346 bool isGpuCompatible(const gmx_gpu_info_t *gpu_info,
 347                      int                   index)
 348 {
 349     assert(gpu_info);
 350
 351     return (index >= gpu_info->n_dev ?
 352             false :
 353             gpu_info->gpu_dev[index].stat == egpuCompatible);
 354 }
 355
 356 //! This function is documented in the header file
 357 const char *getGpuCompatibilityDescription(const gmx_gpu_info_t *gpu_info,
 358                                            int                   index)
 359 {
 360     assert(gpu_info);
 361
 362     return (index >= gpu_info->n_dev ?
 363             gpu_detect_res_str[egpuNonexistent] :
 364             gpu_detect_res_str[gpu_info->gpu_dev[index].stat]);
 365 }
 366
 367 //! This function is documented in the header file
 368 void get_gpu_device_info_string(char gmx_unused *s, const gmx_gpu_info_t gmx_unused *gpu_info, int gmx_unused index)
 369 {
 370     assert(s);
 371     assert(gpu_info);
 372
 373     if (index < 0 && index >= gpu_info->n_dev)
 374     {
 375         return;
 376     }
 377
 378     gmx_device_info_t  *dinfo = &gpu_info->gpu_dev[index];
 379
 380     bool                bGpuExists =
 381         dinfo->stat == egpuCompatible ||
 382         dinfo->stat == egpuIncompatible;
 383
 384     if (!bGpuExists)
 385     {
 386         sprintf(s, "#%d: %s, stat: %s",
 387                 index, "N/A",
 388                 gpu_detect_res_str[dinfo->stat]);
 389     }
 390     else
 391     {
 392         sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
 393                 index, dinfo->device_name, dinfo->device_vendor,
 394                 dinfo->device_version,
 395                 gpu_detect_res_str[dinfo->stat]);
 396     }
 397 }
 398
 399 //! This function is documented in the header file
 400 gmx_bool init_gpu(const gmx::MDLogger              & /*mdlog*/,
 401                   int                              mygpu,
 402                   char                            *result_str,
 403                   const gmx_gpu_info_t gmx_unused *gpu_info,
 404                   const gmx_gpu_opt_t             *gpu_opt
 405                   )
 406 {
 407     assert(result_str);
 408
 409     result_str[0] = 0;
 410
 411     if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
 412     {
 413         char        sbuf[STRLEN];
 414         sprintf(sbuf, "Trying to initialize an non-existent GPU: "
 415                 "there are %d selected GPU(s), but #%d was requested.",
 416                 gpu_opt->n_dev_use, mygpu);
 417         gmx_incons(sbuf);
 418     }
 419
 420     // If the device is NVIDIA, for safety reasons we disable the JIT
 421     // caching as this is known to be broken at least until driver 364.19;
 422     // the cache does not always get regenerated when the source code changes,
 423     // e.g. if the path to the kernel sources remains the same
 424
 425     if (gpu_info->gpu_dev[mygpu].vendor_e == OCL_VENDOR_NVIDIA)
 426     {
 427         // Ignore return values, failing to set the variable does not mean
 428         // that something will go wrong later.
 429 #ifdef _MSC_VER
 430         _putenv("CUDA_CACHE_DISABLE=1");
 431 #else
 432         // Don't override, maybe a dev is testing.
 433         setenv("CUDA_CACHE_DISABLE", "1", 0);
 434 #endif
 435     }
 436
 437     return TRUE;
 438 }
 439
 440 //! This function is documented in the header file
 441 int get_gpu_device_id(const gmx_gpu_info_t  *,
 442                       const gmx_gpu_opt_t  *gpu_opt,
 443                       int                   idx)
 444 {
 445     assert(gpu_opt);
 446     assert(idx >= 0 && idx < gpu_opt->n_dev_use);
 447
 448     return gpu_opt->dev_use[idx];
 449 }
 450
 451 //! This function is documented in the header file
 452 char* get_ocl_gpu_device_name(const gmx_gpu_info_t *gpu_info,
 453                               const gmx_gpu_opt_t  *gpu_opt,
 454                               int                   idx)
 455 {
 456     assert(gpu_info);
 457     assert(gpu_opt);
 458     assert(idx >= 0 && idx < gpu_opt->n_dev_use);
 459
 460     return gpu_info->gpu_dev[gpu_opt->dev_use[idx]].device_name;
 461 }
 462
 463 //! This function is documented in the header file
 464 size_t sizeof_gpu_dev_info(void)
 465 {
 466     return sizeof(gmx_device_info_t);
 467 }
 468
 469 /*! \brief Prints the name of a kernel function pointer.
 470  *
 471  * \param[in]    kernel   OpenCL kernel
 472  * \returns               CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
 473  */
 474 cl_int dbg_ocl_kernel_name(const cl_kernel kernel)
 475 {
 476     cl_int cl_error;
 477     char   kernel_name[256];
 478     cl_error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME,
 479                                sizeof(kernel_name), &kernel_name, NULL);
 480     if (cl_error)
 481     {
 482         printf("No kernel found!\n");
 483     }
 484     else
 485     {
 486         printf("%s\n", kernel_name);
 487     }
 488     return cl_error;
 489 }
 490
 491 /*! \brief Prints the name of a kernel function pointer.
 492  *
 493  * \param[in]    kernel   OpenCL kernel
 494  * \returns               CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
 495  */
 496 cl_int dbg_ocl_kernel_name_address(void* kernel)
 497 {
 498     cl_int cl_error;
 499     char   kernel_name[256];
 500     cl_error = clGetKernelInfo((cl_kernel)kernel, CL_KERNEL_FUNCTION_NAME,
 501                                sizeof(kernel_name), &kernel_name, NULL);
 502     if (cl_error)
 503     {
 504         printf("No kernel found!\n");
 505     }
 506     else
 507     {
 508         printf("%s\n", kernel_name);
 509     }
 510     return cl_error;
 511 }
 512
 513 void gpu_set_host_malloc_and_free(bool               bUseGpuKernels,
 514                                   gmx_host_alloc_t **nb_alloc,
 515                                   gmx_host_free_t  **nb_free)
 516 {
 517     if (bUseGpuKernels)
 518     {
 519         *nb_alloc = &ocl_pmalloc;
 520         *nb_free  = &ocl_pfree;
 521     }
 522     else
 523     {
 524         *nb_alloc = NULL;
 525         *nb_free  = NULL;
 526     }
 527 }