src/gromacs/gpu_utils/gpu_utils_ocl.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *  \brief Define functions for detection and initialization for OpenCL devices.
  37  *
  38  *  \author Anca Hamuraru <anca@streamcomputing.eu>
  39  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
  40  *  \author Teemu Virolainen <teemu@streamcomputing.eu>
  41  */
  42
  43 #include "gmxpre.h"
  44
  45 #include <assert.h>
  46 #include <stdio.h>
  47 #include <stdlib.h>
  48 #include <string.h>
  49 #ifdef __APPLE__
  50 #    include <sys/sysctl.h>
  51 #endif
  52
  53 #include <memory.h>
  54
  55 #include "gromacs/gpu_utils/gpu_utils.h"
  56 #include "gromacs/gpu_utils/ocl_compiler.h"
  57 #include "gromacs/gpu_utils/oclutils.h"
  58 #include "gromacs/hardware/hw_info.h"
  59 #include "gromacs/mdtypes/md_enums.h"
  60 #include "gromacs/utility/cstringutil.h"
  61 #include "gromacs/utility/fatalerror.h"
  62 #include "gromacs/utility/smalloc.h"
  63
  64 /*! \brief Helper macro for error handling */
  65 #define CALLOCLFUNC_LOGERROR(func, err_str, retval) { \
  66         cl_int opencl_ret = func; \
  67         if (CL_SUCCESS != opencl_ret) \
  68         { \
  69             sprintf(err_str, "OpenCL error %d", opencl_ret); \
  70             retval = -1; \
  71         } \
  72         else{ \
  73             retval = 0; } \
  74 }
  75
  76
  77 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
  78  *
  79  * \param[in] stat  GPU status.
  80  * \returns         true if the provided status is egpuCompatible, otherwise false.
  81  */
  82 static bool is_compatible_gpu(int stat)
  83 {
  84     return (stat == egpuCompatible);
  85 }
  86
  87 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
  88  *
  89  * This is assumed to be true for OS X version of at least 10.10.4 and
  90  * all other OS flavors.
  91  *
  92  * Uses the BSD sysctl() interfaces to extract the kernel version.
  93  *
  94  * \return true if version is 14.4 or later (= OS X version 10.10.4),
  95  *         or OS is not Darwin.
  96  */
  97 static bool
  98 runningOnCompatibleOSForAmd()
  99 {
 100 #ifdef __APPLE__
 101     int    mib[2];
 102     char   kernelVersion[256];
 103     size_t len = sizeof(kernelVersion);
 104
 105     mib[0] = CTL_KERN;
 106     mib[1] = KERN_OSRELEASE;
 107
 108     sysctl(mib, sizeof(mib)/sizeof(mib[0]), kernelVersion, &len, NULL, 0);
 109
 110     int major = strtod(kernelVersion, NULL);
 111     int minor = strtod(strchr(kernelVersion, '.')+1, NULL);
 112
 113     // Kernel 14.4 corresponds to OS X 10.10.4
 114     return (major > 14 || (major == 14 && minor >= 4));
 115 #else
 116     return true;
 117 #endif
 118 }
 119
 120 /*! \brief Returns true if the gpu characterized by the device properties is
 121  *  supported by the native gpu acceleration.
 122  * \returns             true if the GPU properties passed indicate a compatible
 123  *                      GPU, otherwise false.
 124  */
 125 static int is_gmx_supported_gpu_id(struct gmx_device_info_t *ocl_gpu_device)
 126 {
 127     /* Only AMD and NVIDIA GPUs are supported for now */
 128     switch (ocl_gpu_device->vendor_e)
 129     {
 130         case OCL_VENDOR_NVIDIA:
 131             return egpuCompatible;
 132         case OCL_VENDOR_AMD:
 133             return runningOnCompatibleOSForAmd() ? egpuCompatible : egpuIncompatible;
 134         default:
 135             return egpuIncompatible;
 136     }
 137 }
 138
 139
 140 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
 141  *
 142  *  \param[in] vendor_name String with OpenCL vendor name.
 143  *  \returns               ocl_vendor_id_t value for the input vendor_name
 144  */
 145 static ocl_vendor_id_t get_vendor_id(char *vendor_name)
 146 {
 147     if (vendor_name)
 148     {
 149         if (strstr(vendor_name, "NVIDIA"))
 150         {
 151             return OCL_VENDOR_NVIDIA;
 152         }
 153         else
 154         if (strstr(vendor_name, "AMD") ||
 155             strstr(vendor_name, "Advanced Micro Devices"))
 156         {
 157             return OCL_VENDOR_AMD;
 158         }
 159         else
 160         if (strstr(vendor_name, "Intel"))
 161         {
 162             return OCL_VENDOR_INTEL;
 163         }
 164     }
 165     return OCL_VENDOR_UNKNOWN;
 166 }
 167
 168
 169 //! This function is documented in the header file
 170 int detect_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
 171 {
 172     int             retval;
 173     cl_uint         ocl_platform_count;
 174     cl_platform_id *ocl_platform_ids;
 175     cl_device_type  req_dev_type = CL_DEVICE_TYPE_GPU;
 176
 177     retval           = 0;
 178     ocl_platform_ids = NULL;
 179
 180     if (getenv("GMX_OCL_FORCE_CPU") != NULL)
 181     {
 182         req_dev_type = CL_DEVICE_TYPE_CPU;
 183     }
 184
 185     while (1)
 186     {
 187         CALLOCLFUNC_LOGERROR(clGetPlatformIDs(0, NULL, &ocl_platform_count), err_str, retval)
 188         if (0 != retval)
 189         {
 190             break;
 191         }
 192
 193         if (1 > ocl_platform_count)
 194         {
 195             break;
 196         }
 197
 198         snew(ocl_platform_ids, ocl_platform_count);
 199
 200         CALLOCLFUNC_LOGERROR(clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, NULL), err_str, retval)
 201         if (0 != retval)
 202         {
 203             break;
 204         }
 205
 206         for (unsigned int i = 0; i < ocl_platform_count; i++)
 207         {
 208             cl_uint ocl_device_count;
 209
 210             /* If requesting req_dev_type devices fails, just go to the next platform */
 211             if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, NULL, &ocl_device_count))
 212             {
 213                 continue;
 214             }
 215
 216             if (1 <= ocl_device_count)
 217             {
 218                 gpu_info->n_dev += ocl_device_count;
 219             }
 220         }
 221
 222         if (1 > gpu_info->n_dev)
 223         {
 224             break;
 225         }
 226
 227         snew(gpu_info->gpu_dev, gpu_info->n_dev);
 228
 229         {
 230             int           device_index;
 231             cl_device_id *ocl_device_ids;
 232
 233             snew(ocl_device_ids, gpu_info->n_dev);
 234             device_index = 0;
 235
 236             for (unsigned int i = 0; i < ocl_platform_count; i++)
 237             {
 238                 cl_uint ocl_device_count;
 239
 240                 /* If requesting req_dev_type devices fails, just go to the next platform */
 241                 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev, ocl_device_ids, &ocl_device_count))
 242                 {
 243                     continue;
 244                 }
 245
 246                 if (1 > ocl_device_count)
 247                 {
 248                     break;
 249                 }
 250
 251                 for (unsigned int j = 0; j < ocl_device_count; j++)
 252                 {
 253                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
 254                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id   = ocl_device_ids[j];
 255
 256                     gpu_info->gpu_dev[device_index].device_name[0] = 0;
 257                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, NULL);
 258
 259                     gpu_info->gpu_dev[device_index].device_version[0] = 0;
 260                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, NULL);
 261
 262                     gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
 263                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, NULL);
 264
 265                     gpu_info->gpu_dev[device_index].compute_units = 0;
 266                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), NULL);
 267
 268                     gpu_info->gpu_dev[device_index].adress_bits = 0;
 269                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), NULL);
 270
 271                     gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
 272
 273                     gpu_info->gpu_dev[device_index].stat = is_gmx_supported_gpu_id(gpu_info->gpu_dev + device_index);
 274
 275                     if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
 276                     {
 277                         gpu_info->n_dev_compatible++;
 278                     }
 279
 280                     device_index++;
 281                 }
 282             }
 283
 284             gpu_info->n_dev = device_index;
 285
 286             /* Dummy sort of devices -  AMD first, then NVIDIA, then Intel */
 287             // TODO: Sort devices based on performance.
 288             if (0 < gpu_info->n_dev)
 289             {
 290                 int last = -1;
 291                 for (int i = 0; i < gpu_info->n_dev; i++)
 292                 {
 293                     if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
 294                     {
 295                         last++;
 296
 297                         if (last < i)
 298                         {
 299                             gmx_device_info_t ocl_gpu_info;
 300                             ocl_gpu_info            = gpu_info->gpu_dev[i];
 301                             gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 302                             gpu_info->gpu_dev[last] = ocl_gpu_info;
 303                         }
 304                     }
 305                 }
 306
 307                 /* if more than 1 device left to be sorted */
 308                 if ((gpu_info->n_dev - 1 - last) > 1)
 309                 {
 310                     for (int i = 0; i < gpu_info->n_dev; i++)
 311                     {
 312                         if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
 313                         {
 314                             last++;
 315
 316                             if (last < i)
 317                             {
 318                                 gmx_device_info_t ocl_gpu_info;
 319                                 ocl_gpu_info            = gpu_info->gpu_dev[i];
 320                                 gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 321                                 gpu_info->gpu_dev[last] = ocl_gpu_info;
 322                             }
 323                         }
 324                     }
 325                 }
 326             }
 327
 328             sfree(ocl_device_ids);
 329         }
 330
 331         break;
 332     }
 333
 334     sfree(ocl_platform_ids);
 335
 336     return retval;
 337 }
 338
 339 //! This function is documented in the header file
 340 void free_gpu_info(const gmx_gpu_info_t gmx_unused *gpu_info)
 341 {
 342     if (gpu_info == NULL)
 343     {
 344         return;
 345     }
 346
 347     sfree(gpu_info->gpu_dev);
 348 }
 349
 350 //! This function is documented in the header file
 351 void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
 352                           gmx_gpu_opt_t        *gpu_opt)
 353 {
 354     int  i, ncompat;
 355     int *compat;
 356
 357     assert(gpu_info);
 358     /* gpu_dev/n_dev have to be either NULL/0 or not (NULL/0) */
 359     assert((gpu_info->n_dev != 0 ? 0 : 1) ^ (gpu_info->gpu_dev == NULL ? 0 : 1));
 360
 361     snew(compat, gpu_info->n_dev);
 362     ncompat = 0;
 363     for (i = 0; i < gpu_info->n_dev; i++)
 364     {
 365         if (is_compatible_gpu(gpu_info->gpu_dev[i].stat))
 366         {
 367             ncompat++;
 368             compat[ncompat - 1] = i;
 369         }
 370     }
 371
 372     gpu_opt->n_dev_compatible = ncompat;
 373     snew(gpu_opt->dev_compatible, ncompat);
 374     memcpy(gpu_opt->dev_compatible, compat, ncompat*sizeof(*compat));
 375     sfree(compat);
 376 }
 377
 378 //! This function is documented in the header file
 379 gmx_bool check_selected_gpus(int                  *checkres,
 380                              const gmx_gpu_info_t *gpu_info,
 381                              gmx_gpu_opt_t        *gpu_opt)
 382 {
 383     int  i, id;
 384     bool bAllOk;
 385
 386     assert(checkres);
 387     assert(gpu_info);
 388     assert(gpu_opt->n_dev_use >= 0);
 389
 390     if (gpu_opt->n_dev_use == 0)
 391     {
 392         return TRUE;
 393     }
 394
 395     assert(gpu_opt->dev_use);
 396
 397     /* we will assume that all GPUs requested are valid IDs,
 398        otherwise we'll bail anyways */
 399
 400     bAllOk = true;
 401     for (i = 0; i < gpu_opt->n_dev_use; i++)
 402     {
 403         id = gpu_opt->dev_use[i];
 404
 405         /* devices are stored in increasing order of IDs in gpu_dev */
 406         gpu_opt->dev_use[i] = id;
 407
 408         checkres[i] = (id >= gpu_info->n_dev) ?
 409             egpuNonexistent : gpu_info->gpu_dev[id].stat;
 410
 411         bAllOk = bAllOk && is_compatible_gpu(checkres[i]);
 412     }
 413
 414     return bAllOk;
 415 }
 416
 417 //! This function is documented in the header file
 418 void get_gpu_device_info_string(char gmx_unused *s, const gmx_gpu_info_t gmx_unused *gpu_info, int gmx_unused index)
 419 {
 420     assert(s);
 421     assert(gpu_info);
 422
 423     if (index < 0 && index >= gpu_info->n_dev)
 424     {
 425         return;
 426     }
 427
 428     gmx_device_info_t  *dinfo = &gpu_info->gpu_dev[index];
 429
 430     bool                bGpuExists =
 431         dinfo->stat == egpuCompatible ||
 432         dinfo->stat == egpuIncompatible;
 433
 434     if (!bGpuExists)
 435     {
 436         sprintf(s, "#%d: %s, stat: %s",
 437                 index, "N/A",
 438                 gpu_detect_res_str[dinfo->stat]);
 439     }
 440     else
 441     {
 442         sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
 443                 index, dinfo->device_name, dinfo->device_vendor,
 444                 dinfo->device_version,
 445                 gpu_detect_res_str[dinfo->stat]);
 446     }
 447 }
 448
 449 //! This function is documented in the header file
 450 gmx_bool init_gpu(const gmx::MDLogger              & /*mdlog*/,
 451                   int                              mygpu,
 452                   char                            *result_str,
 453                   const gmx_gpu_info_t gmx_unused *gpu_info,
 454                   const gmx_gpu_opt_t             *gpu_opt
 455                   )
 456 {
 457     assert(result_str);
 458
 459     result_str[0] = 0;
 460
 461     if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
 462     {
 463         char        sbuf[STRLEN];
 464         sprintf(sbuf, "Trying to initialize an non-existent GPU: "
 465                 "there are %d %s-selected GPU(s), but #%d was requested.",
 466                 gpu_opt->n_dev_use, gpu_opt->bUserSet ? "user" : "auto", mygpu);
 467         gmx_incons(sbuf);
 468     }
 469
 470     // If the device is NVIDIA, for safety reasons we disable the JIT
 471     // caching as this is known to be broken at least until driver 364.19;
 472     // the cache does not always get regenerated when the source code changes,
 473     // e.g. if the path to the kernel sources remains the same
 474
 475     if (gpu_info->gpu_dev[mygpu].vendor_e == OCL_VENDOR_NVIDIA)
 476     {
 477         // Ignore return values, failing to set the variable does not mean
 478         // that something will go wrong later.
 479 #ifdef _MSC_VER
 480         _putenv("CUDA_CACHE_DISABLE=1");
 481 #else
 482         // Don't override, maybe a dev is testing.
 483         setenv("CUDA_CACHE_DISABLE", "1", 0);
 484 #endif
 485     }
 486
 487     return TRUE;
 488 }
 489
 490 //! This function is documented in the header file
 491 int get_gpu_device_id(const gmx_gpu_info_t  *,
 492                       const gmx_gpu_opt_t  *gpu_opt,
 493                       int                   idx)
 494 {
 495     assert(gpu_opt);
 496     assert(idx >= 0 && idx < gpu_opt->n_dev_use);
 497
 498     return gpu_opt->dev_use[idx];
 499 }
 500
 501 //! This function is documented in the header file
 502 char* get_ocl_gpu_device_name(const gmx_gpu_info_t *gpu_info,
 503                               const gmx_gpu_opt_t  *gpu_opt,
 504                               int                   idx)
 505 {
 506     assert(gpu_info);
 507     assert(gpu_opt);
 508     assert(idx >= 0 && idx < gpu_opt->n_dev_use);
 509
 510     return gpu_info->gpu_dev[gpu_opt->dev_use[idx]].device_name;
 511 }
 512
 513 //! This function is documented in the header file
 514 size_t sizeof_gpu_dev_info(void)
 515 {
 516     return sizeof(gmx_device_info_t);
 517 }
 518
 519 /*! \brief Prints the name of a kernel function pointer.
 520  *
 521  * \param[in]    kernel   OpenCL kernel
 522  * \returns               CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
 523  */
 524 cl_int dbg_ocl_kernel_name(const cl_kernel kernel)
 525 {
 526     cl_int cl_error;
 527     char   kernel_name[256];
 528     cl_error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME,
 529                                sizeof(kernel_name), &kernel_name, NULL);
 530     if (cl_error)
 531     {
 532         printf("No kernel found!\n");
 533     }
 534     else
 535     {
 536         printf("%s\n", kernel_name);
 537     }
 538     return cl_error;
 539 }
 540
 541 /*! \brief Prints the name of a kernel function pointer.
 542  *
 543  * \param[in]    kernel   OpenCL kernel
 544  * \returns               CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
 545  */
 546 cl_int dbg_ocl_kernel_name_address(void* kernel)
 547 {
 548     cl_int cl_error;
 549     char   kernel_name[256];
 550     cl_error = clGetKernelInfo((cl_kernel)kernel, CL_KERNEL_FUNCTION_NAME,
 551                                sizeof(kernel_name), &kernel_name, NULL);
 552     if (cl_error)
 553     {
 554         printf("No kernel found!\n");
 555     }
 556     else
 557     {
 558         printf("%s\n", kernel_name);
 559     }
 560     return cl_error;
 561 }
 562
 563 void gpu_set_host_malloc_and_free(bool               bUseGpuKernels,
 564                                   gmx_host_alloc_t **nb_alloc,
 565                                   gmx_host_free_t  **nb_free)
 566 {
 567     if (bUseGpuKernels)
 568     {
 569         *nb_alloc = &ocl_pmalloc;
 570         *nb_free  = &ocl_pfree;
 571     }
 572     else
 573     {
 574         *nb_alloc = NULL;
 575         *nb_free  = NULL;
 576     }
 577 }