src/gromacs/gmxlib/gpu_utils/gpu_utils_ocl.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *  \brief Define functions for detection and initialization for OpenCL devices.
  37  *
  38  *  \author Anca Hamuraru <anca@streamcomputing.eu>
  39  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
  40  *  \author Teemu Virolainen <teemu@streamcomputing.eu>
  41  */
  42
  43 #include "gmxpre.h"
  44
  45 #include <assert.h>
  46 #include <stdio.h>
  47 #include <stdlib.h>
  48 #include <string.h>
  49 #if __APPLE__
  50 #    include <sys/sysctl.h>
  51 #endif
  52
  53 #include <memory.h>
  54
  55 #include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
  56 #include "gromacs/gmxlib/gpu_utils/ocl_compiler.h"
  57 #include "gromacs/gmxlib/ocl_tools/oclutils.h"
  58 #include "gromacs/legacyheaders/types/enums.h"
  59 #include "gromacs/legacyheaders/types/hw_info.h"
  60 #include "gromacs/utility/cstringutil.h"
  61 #include "gromacs/utility/fatalerror.h"
  62 #include "gromacs/utility/smalloc.h"
  63
  64 /*! \brief Helper macro for error handling */
  65 #define CALLOCLFUNC_LOGERROR(func, err_str, retval) { \
  66         cl_int opencl_ret = func; \
  67         if (CL_SUCCESS != opencl_ret) \
  68         { \
  69             sprintf(err_str, "OpenCL error %d", opencl_ret); \
  70             retval = -1; \
  71         } \
  72         else{ \
  73             retval = 0; } \
  74 }
  75
  76
  77 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
  78  *
  79  * \param[in] stat  GPU status.
  80  * \returns         true if the provided status is egpuCompatible, otherwise false.
  81  */
  82 static bool is_compatible_gpu(int stat)
  83 {
  84     return (stat == egpuCompatible);
  85 }
  86
  87 /*! \brief Return true if executing on OS X earlier than 10.10.4
  88  *
  89  * Uses the BSD sysctl() interfaces to extract the kernel version.
  90  *
  91  * \return true if version is 14.4 or later (= OS X version 10.10.4),
  92  *         otherwise false.
  93  */
  94 static bool
  95 runningOnWorkingOSXVersionForAmd()
  96 {
  97 #ifdef __APPLE__
  98     int    mib[2];
  99     char   kernelVersion[256];
 100     size_t len = sizeof(kernelVersion);
 101
 102     mib[0] = CTL_KERN;
 103     mib[1] = KERN_OSRELEASE;
 104
 105     sysctl(mib, sizeof(mib)/sizeof(mib[0]), kernelVersion, &len, NULL, 0);
 106
 107     int major = strtod(kernelVersion, NULL);
 108     int minor = strtod(strchr(kernelVersion, '.')+1, NULL);
 109
 110     // Kernel 14.4 corresponds to OS X 10.10.4
 111     return (major > 14 || (major == 14 && minor >= 4));
 112 #else
 113     return false;
 114 #endif
 115 }
 116
 117 /*! \brief Returns true if the gpu characterized by the device properties is
 118  *  supported by the native gpu acceleration.
 119  * \returns             true if the GPU properties passed indicate a compatible
 120  *                      GPU, otherwise false.
 121  */
 122 static int is_gmx_supported_gpu_id(struct gmx_device_info_t *ocl_gpu_device)
 123 {
 124     /* Only AMD and NVIDIA GPUs are supported for now */
 125     switch (ocl_gpu_device->vendor_e)
 126     {
 127         case OCL_VENDOR_NVIDIA:
 128             return egpuCompatible;
 129         case OCL_VENDOR_AMD:
 130             return runningOnWorkingOSXVersionForAmd() ? egpuCompatible : egpuIncompatible;
 131         default:
 132             return egpuIncompatible;
 133     }
 134 }
 135
 136
 137 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
 138  *
 139  *  \param[in] vendor_name String with OpenCL vendor name.
 140  *  \returns               ocl_vendor_id_t value for the input vendor_name
 141  */
 142 ocl_vendor_id_t get_vendor_id(char *vendor_name)
 143 {
 144     if (vendor_name)
 145     {
 146         if (strstr(vendor_name, "NVIDIA"))
 147         {
 148             return OCL_VENDOR_NVIDIA;
 149         }
 150         else
 151         if (strstr(vendor_name, "AMD") ||
 152             strstr(vendor_name, "Advanced Micro Devices"))
 153         {
 154             return OCL_VENDOR_AMD;
 155         }
 156         else
 157         if (strstr(vendor_name, "Intel"))
 158         {
 159             return OCL_VENDOR_INTEL;
 160         }
 161     }
 162     return OCL_VENDOR_UNKNOWN;
 163 }
 164
 165
 166 //! This function is documented in the header file
 167 int detect_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
 168 {
 169     int             retval;
 170     cl_uint         ocl_platform_count;
 171     cl_platform_id *ocl_platform_ids;
 172     cl_device_type  req_dev_type = CL_DEVICE_TYPE_GPU;
 173
 174     retval           = 0;
 175     ocl_platform_ids = NULL;
 176
 177     if (getenv("GMX_OCL_FORCE_CPU") != NULL)
 178     {
 179         req_dev_type = CL_DEVICE_TYPE_CPU;
 180     }
 181
 182     while (1)
 183     {
 184         CALLOCLFUNC_LOGERROR(clGetPlatformIDs(0, NULL, &ocl_platform_count), err_str, retval)
 185         if (0 != retval)
 186         {
 187             break;
 188         }
 189
 190         if (1 > ocl_platform_count)
 191         {
 192             break;
 193         }
 194
 195         snew(ocl_platform_ids, ocl_platform_count);
 196
 197         CALLOCLFUNC_LOGERROR(clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, NULL), err_str, retval)
 198         if (0 != retval)
 199         {
 200             break;
 201         }
 202
 203         for (unsigned int i = 0; i < ocl_platform_count; i++)
 204         {
 205             cl_uint ocl_device_count;
 206
 207             /* If requesting req_dev_type devices fails, just go to the next platform */
 208             if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, NULL, &ocl_device_count))
 209             {
 210                 continue;
 211             }
 212
 213             if (1 <= ocl_device_count)
 214             {
 215                 gpu_info->n_dev += ocl_device_count;
 216             }
 217         }
 218
 219         if (1 > gpu_info->n_dev)
 220         {
 221             break;
 222         }
 223
 224         snew(gpu_info->gpu_dev, gpu_info->n_dev);
 225
 226         {
 227             int           device_index;
 228             cl_device_id *ocl_device_ids;
 229
 230             snew(ocl_device_ids, gpu_info->n_dev);
 231             device_index = 0;
 232
 233             for (unsigned int i = 0; i < ocl_platform_count; i++)
 234             {
 235                 cl_uint ocl_device_count;
 236
 237                 /* If requesting req_dev_type devices fails, just go to the next platform */
 238                 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev, ocl_device_ids, &ocl_device_count))
 239                 {
 240                     continue;
 241                 }
 242
 243                 if (1 > ocl_device_count)
 244                 {
 245                     break;
 246                 }
 247
 248                 for (unsigned int j = 0; j < ocl_device_count; j++)
 249                 {
 250                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
 251                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id   = ocl_device_ids[j];
 252
 253                     gpu_info->gpu_dev[device_index].device_name[0] = 0;
 254                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, NULL);
 255
 256                     gpu_info->gpu_dev[device_index].device_version[0] = 0;
 257                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, NULL);
 258
 259                     gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
 260                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, NULL);
 261
 262                     gpu_info->gpu_dev[device_index].compute_units = 0;
 263                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), NULL);
 264
 265                     gpu_info->gpu_dev[device_index].adress_bits = 0;
 266                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), NULL);
 267
 268                     gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
 269
 270                     gpu_info->gpu_dev[device_index].stat = is_gmx_supported_gpu_id(gpu_info->gpu_dev + device_index);
 271
 272                     if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
 273                     {
 274                         gpu_info->n_dev_compatible++;
 275                     }
 276
 277                     device_index++;
 278                 }
 279             }
 280
 281             gpu_info->n_dev = device_index;
 282
 283             /* Dummy sort of devices -  AMD first, then NVIDIA, then Intel */
 284             // TODO: Sort devices based on performance.
 285             if (0 < gpu_info->n_dev)
 286             {
 287                 int last = -1;
 288                 for (int i = 0; i < gpu_info->n_dev; i++)
 289                 {
 290                     if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
 291                     {
 292                         last++;
 293
 294                         if (last < i)
 295                         {
 296                             gmx_device_info_t ocl_gpu_info;
 297                             ocl_gpu_info            = gpu_info->gpu_dev[i];
 298                             gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 299                             gpu_info->gpu_dev[last] = ocl_gpu_info;
 300                         }
 301                     }
 302                 }
 303
 304                 /* if more than 1 device left to be sorted */
 305                 if ((gpu_info->n_dev - 1 - last) > 1)
 306                 {
 307                     for (int i = 0; i < gpu_info->n_dev; i++)
 308                     {
 309                         if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
 310                         {
 311                             last++;
 312
 313                             if (last < i)
 314                             {
 315                                 gmx_device_info_t ocl_gpu_info;
 316                                 ocl_gpu_info            = gpu_info->gpu_dev[i];
 317                                 gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 318                                 gpu_info->gpu_dev[last] = ocl_gpu_info;
 319                             }
 320                         }
 321                     }
 322                 }
 323             }
 324
 325             sfree(ocl_device_ids);
 326         }
 327
 328         break;
 329     }
 330
 331     sfree(ocl_platform_ids);
 332
 333     return retval;
 334 }
 335
 336 //! This function is documented in the header file
 337 void free_gpu_info(const gmx_gpu_info_t gmx_unused *gpu_info)
 338 {
 339     if (gpu_info)
 340     {
 341         for (int i = 0; i < gpu_info->n_dev; i++)
 342         {
 343             cl_int gmx_unused cl_error;
 344
 345             if (gpu_info->gpu_dev[i].context)
 346             {
 347                 cl_error                     = clReleaseContext(gpu_info->gpu_dev[i].context);
 348                 gpu_info->gpu_dev[i].context = NULL;
 349                 assert(CL_SUCCESS == cl_error);
 350             }
 351
 352             if (gpu_info->gpu_dev[i].program)
 353             {
 354                 cl_error                     = clReleaseProgram(gpu_info->gpu_dev[i].program);
 355                 gpu_info->gpu_dev[i].program = NULL;
 356                 assert(CL_SUCCESS == cl_error);
 357             }
 358         }
 359
 360         sfree(gpu_info->gpu_dev);
 361     }
 362 }
 363
 364 //! This function is documented in the header file
 365 void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
 366                           gmx_gpu_opt_t        *gpu_opt)
 367 {
 368     int  i, ncompat;
 369     int *compat;
 370
 371     assert(gpu_info);
 372     /* gpu_dev/n_dev have to be either NULL/0 or not (NULL/0) */
 373     assert((gpu_info->n_dev != 0 ? 0 : 1) ^ (gpu_info->gpu_dev == NULL ? 0 : 1));
 374
 375     snew(compat, gpu_info->n_dev);
 376     ncompat = 0;
 377     for (i = 0; i < gpu_info->n_dev; i++)
 378     {
 379         if (is_compatible_gpu(gpu_info->gpu_dev[i].stat))
 380         {
 381             ncompat++;
 382             compat[ncompat - 1] = i;
 383         }
 384     }
 385
 386     gpu_opt->n_dev_compatible = ncompat;
 387     snew(gpu_opt->dev_compatible, ncompat);
 388     memcpy(gpu_opt->dev_compatible, compat, ncompat*sizeof(*compat));
 389     sfree(compat);
 390 }
 391
 392 //! This function is documented in the header file
 393 gmx_bool check_selected_gpus(int                  *checkres,
 394                              const gmx_gpu_info_t *gpu_info,
 395                              gmx_gpu_opt_t        *gpu_opt)
 396 {
 397     int  i, id;
 398     bool bAllOk;
 399
 400     assert(checkres);
 401     assert(gpu_info);
 402     assert(gpu_opt->n_dev_use >= 0);
 403
 404     if (gpu_opt->n_dev_use == 0)
 405     {
 406         return TRUE;
 407     }
 408
 409     assert(gpu_opt->dev_use);
 410
 411     /* we will assume that all GPUs requested are valid IDs,
 412        otherwise we'll bail anyways */
 413
 414     bAllOk = true;
 415     for (i = 0; i < gpu_opt->n_dev_use; i++)
 416     {
 417         id = gpu_opt->dev_use[i];
 418
 419         /* devices are stored in increasing order of IDs in gpu_dev */
 420         gpu_opt->dev_use[i] = id;
 421
 422         checkres[i] = (id >= gpu_info->n_dev) ?
 423             egpuNonexistent : gpu_info->gpu_dev[id].stat;
 424
 425         bAllOk = bAllOk && is_compatible_gpu(checkres[i]);
 426     }
 427
 428     return bAllOk;
 429 }
 430
 431 //! This function is documented in the header file
 432 void get_gpu_device_info_string(char gmx_unused *s, const gmx_gpu_info_t gmx_unused *gpu_info, int gmx_unused index)
 433 {
 434     assert(s);
 435     assert(gpu_info);
 436
 437     if (index < 0 && index >= gpu_info->n_dev)
 438     {
 439         return;
 440     }
 441
 442     gmx_device_info_t  *dinfo = &gpu_info->gpu_dev[index];
 443
 444     bool                bGpuExists =
 445         dinfo->stat == egpuCompatible ||
 446         dinfo->stat == egpuIncompatible;
 447
 448     if (!bGpuExists)
 449     {
 450         sprintf(s, "#%d: %s, stat: %s",
 451                 index, "N/A",
 452                 gpu_detect_res_str[dinfo->stat]);
 453     }
 454     else
 455     {
 456         sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
 457                 index, dinfo->device_name, dinfo->device_vendor,
 458                 dinfo->device_version,
 459                 gpu_detect_res_str[dinfo->stat]);
 460     }
 461 }
 462
 463 //! This function is documented in the header file
 464 gmx_bool init_gpu(FILE gmx_unused                 *fplog,
 465                   int                              mygpu,
 466                   char                            *result_str,
 467                   const gmx_gpu_info_t gmx_unused *gpu_info,
 468                   const gmx_gpu_opt_t             *gpu_opt
 469                   )
 470 {
 471     assert(result_str);
 472
 473     result_str[0] = 0;
 474
 475     if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
 476     {
 477         char        sbuf[STRLEN];
 478         sprintf(sbuf, "Trying to initialize an inexistent GPU: "
 479                 "there are %d %s-selected GPU(s), but #%d was requested.",
 480                 gpu_opt->n_dev_use, gpu_opt->bUserSet ? "user" : "auto", mygpu);
 481         gmx_incons(sbuf);
 482     }
 483
 484     return TRUE;
 485 }
 486
 487 //! This function is documented in the header file
 488 int get_gpu_device_id(const gmx_gpu_info_t  *,
 489                       const gmx_gpu_opt_t  *gpu_opt,
 490                       int                   idx)
 491 {
 492     assert(gpu_opt);
 493     assert(idx >= 0 && idx < gpu_opt->n_dev_use);
 494
 495     return gpu_opt->dev_use[idx];
 496 }
 497
 498 //! This function is documented in the header file
 499 char* get_ocl_gpu_device_name(const gmx_gpu_info_t *gpu_info,
 500                               const gmx_gpu_opt_t  *gpu_opt,
 501                               int                   idx)
 502 {
 503     assert(gpu_info);
 504     assert(gpu_opt);
 505     assert(idx >= 0 && idx < gpu_opt->n_dev_use);
 506
 507     return gpu_info->gpu_dev[gpu_opt->dev_use[idx]].device_name;
 508 }
 509
 510 //! This function is documented in the header file
 511 size_t sizeof_gpu_dev_info(void)
 512 {
 513     return sizeof(gmx_device_info_t);
 514 }
 515
 516 /*! \brief Prints the name of a kernel function pointer.
 517  *
 518  * \param[in]    kernel   OpenCL kernel
 519  * \returns               CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
 520  */
 521 cl_int dbg_ocl_kernel_name(const cl_kernel kernel)
 522 {
 523     cl_int cl_error;
 524     char   kernel_name[256];
 525     cl_error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME,
 526                                sizeof(kernel_name), &kernel_name, NULL);
 527     if (cl_error)
 528     {
 529         printf("No kernel found!\n");
 530     }
 531     else
 532     {
 533         printf("%s\n", kernel_name);
 534     }
 535     return cl_error;
 536 }
 537
 538 /*! \brief Prints the name of a kernel function pointer.
 539  *
 540  * \param[in]    kernel   OpenCL kernel
 541  * \returns               CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
 542  */
 543 cl_int dbg_ocl_kernel_name_address(void* kernel)
 544 {
 545     cl_int cl_error;
 546     char   kernel_name[256];
 547     cl_error = clGetKernelInfo((cl_kernel)kernel, CL_KERNEL_FUNCTION_NAME,
 548                                sizeof(kernel_name), &kernel_name, NULL);
 549     if (cl_error)
 550     {
 551         printf("No kernel found!\n");
 552     }
 553     else
 554     {
 555         printf("%s\n", kernel_name);
 556     }
 557     return cl_error;
 558 }
 559
 560 void gpu_set_host_malloc_and_free(bool               bUseGpuKernels,
 561                                   gmx_host_alloc_t **nb_alloc,
 562                                   gmx_host_free_t  **nb_free)
 563 {
 564     if (bUseGpuKernels)
 565     {
 566         *nb_alloc = &ocl_pmalloc;
 567         *nb_free  = &ocl_pfree;
 568     }
 569     else
 570     {
 571         *nb_alloc = NULL;
 572         *nb_free  = NULL;
 573     }
 574 }