src/gromacs/hardware/device_management.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team.
   5  * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \libinternal \file
  37  *  \brief Declares functions to manage GPU resources.
  38  *
  39  *  This has several implementations: one for each supported GPU platform,
  40  *  and a stub implementation if the build does not support GPUs.
  41  *
  42  *  \author Anca Hamuraru <anca@streamcomputing.eu>
  43  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
  44  *  \author Teemu Virolainen <teemu@streamcomputing.eu>
  45  *  \author Mark Abraham <mark.j.abraham@gmail.com>
  46  *  \author Szilárd Páll <pall.szilard@gmail.com>
  47  *  \author Artem Zhmurov <zhmurov@gmail.com>
  48  *
  49  * \inlibraryapi
  50  * \ingroup module_hardware
  51  */
  52 #ifndef GMX_HARDWARE_DEVICE_MANAGEMENT_H
  53 #define GMX_HARDWARE_DEVICE_MANAGEMENT_H
  54
  55 #include <memory>
  56 #include <string>
  57 #include <vector>
  58
  59 #include "gromacs/utility/basedefinitions.h"
  60 #include "gromacs/utility/iserializer.h"
  61
  62 struct DeviceInformation;
  63 enum class DeviceVendor : int;
  64
  65 /*! \brief Return whether GPUs can be detected.
  66  *
  67  * Returns true when this is a build of GROMACS configured to support
  68  * GPU usage, GPU detection is not disabled by \c GMX_DISABLE_GPU_DETECTION
  69  * environment variable and a valid device driver, ICD, and/or runtime was
  70  * detected. Does not throw.
  71  *
  72  * \param[out] errorMessage  When returning false on a build configured with
  73  *                           GPU support and non-nullptr was passed,
  74  *                           the string contains a descriptive message about
  75  *                           why GPUs cannot be detected.
  76  */
  77 bool canPerformDeviceDetection(std::string* errorMessage);
  78
  79 /*! \brief Return whether GPU detection is enabled
  80  *
  81  * Returns true when this is a build of GROMACS configured to support
  82  * GPU usage and GPU detection is not disabled by \c GMX_DISABLE_GPU_DETECTION
  83  * environment variable.
  84  *
  85  * Does not throw.
  86  */
  87 bool isDeviceDetectionEnabled();
  88
  89 /*! \brief Return whether GPU detection is functioning correctly
  90  *
  91  * Returns true when this is a build of GROMACS configured to support
  92  * GPU usage, and a valid device driver, ICD, and/or runtime was detected.
  93  *
  94  * This function is not intended to be called from build
  95  * configurations that do not support GPUs, and there will be no
  96  * descriptive message in that case.
  97  *
  98  * \param[out] errorMessage  When returning false on a build configured with
  99  *                           GPU support and non-nullptr was passed,
 100  *                           the string contains a descriptive message about
 101  *                           why GPUs cannot be detected.
 102  *
 103  * Does not throw.
 104  */
 105 bool isDeviceDetectionFunctional(std::string* errorMessage);
 106
 107 /*! \brief Returns an DeviceVendor value corresponding to the input OpenCL vendor name.
 108  *
 109  *  \returns               DeviceVendor value for the input vendor name
 110  */
 111 DeviceVendor getDeviceVendor(const char* vendorName);
 112
 113 /*! \brief Find all GPUs in the system.
 114  *
 115  *  Will detect every GPU supported by the device driver in use.
 116  *  Must only be called if \c canPerformDeviceDetection() has returned true.
 117  *  This routine also checks for the compatibility of each device and fill the
 118  *  deviceInfo array with the required information on each device: ID, device
 119  *  properties, status.
 120  *
 121  *  Note that this function leaves the GPU runtime API error state clean;
 122  *  this is implemented ATM in the CUDA flavor. This invalidates any existing
 123  *  CUDA streams, allocated memory on GPU, etc.
 124  *
 125  *  \todo:  Check if errors do propagate in OpenCL as they do in CUDA and
 126  *          whether there is a mechanism to "clear" them.
 127  *
 128  * \return  Standard vector with the list of devices found
 129  *
 130  *  \throws InternalError if a GPU API returns an unexpected failure (because
 131  *          the call to canDetectGpus() should always prevent this occuring)
 132  */
 133 std::vector<std::unique_ptr<DeviceInformation>> findDevices();
 134
 135 /*! \brief Return a container of device-information handles that are compatible.
 136  *
 137  * This function filters the result of the detection for compatible
 138  * GPUs, based on the previously run compatibility tests.
 139  *
 140  * \param[in] deviceInfoList An information on available devices.
 141  *
 142  * \return  Vector of DeviceInformations on GPUs recorded as compatible
 143  */
 144 std::vector<std::reference_wrapper<DeviceInformation>>
 145 getCompatibleDevices(const std::vector<std::unique_ptr<DeviceInformation>>& deviceInfoList);
 146
 147 /*! \brief Return a container of the IDs of the compatible GPU ids.
 148  *
 149  * This function filters the result of the detection for compatible
 150  * GPUs, based on the previously run compatibility tests.
 151  *
 152  * \param[in] deviceInfoList An information on available devices.
 153  *
 154  * \return  Vector of compatible GPU ids.
 155  */
 156 std::vector<int> getCompatibleDeviceIds(const std::vector<std::unique_ptr<DeviceInformation>>& deviceInfoList);
 157
 158 /*! \brief Return whether \p deviceId is found in \p deviceInfoList and is compatible
 159  *
 160  * This function filters the result of the detection for compatible
 161  * GPUs, based on the previously run compatibility tests.
 162  *
 163  * \param[in] deviceInfoList An information on available devices.
 164  * \param[in] deviceId       The device ID to find in the list.
 165  *
 166  * \throws RangeError If \p deviceId does not match the id of any device in \c deviceInfoList
 167  *
 168  * \return  Whether \c deviceId is compatible.
 169  */
 170 bool deviceIdIsCompatible(const std::vector<std::unique_ptr<DeviceInformation>>& deviceInfoList,
 171                           int                                                    deviceId);
 172
 173 /*! \brief Set the active GPU.
 174  *
 175  * This sets the device for which the device information is passed active. Essential in CUDA, where
 176  * the device buffers and kernel launches are not connected to the device context. In OpenCL, checks
 177  * the device vendor and makes vendor-specific performance adjustments.
 178  *
 179  * \param[in] deviceInfo Information on the device to be set.
 180  *
 181  * Issues a fatal error for any critical errors that occur during
 182  * initialization.
 183  */
 184 void setActiveDevice(const DeviceInformation& deviceInfo);
 185
 186 /*! \brief Releases the GPU device used by the active context at the time of calling (CUDA only).
 187  *
 188  * If \c deviceInfo is nullptr, then it is understood that no device
 189  * was selected so no context is active to be freed. Otherwise, the
 190  * context is explicitly destroyed and therefore all data uploaded to
 191  * the GPU is lost. This must only be called when none of this data is
 192  * required anymore, because subsequent attempts to free memory
 193  * associated with the context will otherwise fail.
 194  *
 195  * Calls \c gmx_warning upon errors.
 196  *
 197  * \todo This should go through all the devices, not only the one currently active.
 198  *       Reseting only one device will not work, e.g. in CUDA tests.
 199  *
 200  * \param[in] deviceInfo Information on the device to be released.
 201  */
 202 void releaseDevice(DeviceInformation* deviceInfo);
 203
 204 /*! \brief Formats and returns a device information string for a given GPU.
 205  *
 206  * Given an index *directly* into the array of available GPUs, returns
 207  * a formatted info string for the respective GPU which includes ID, name,
 208  * compute capability, and detection status.
 209  *
 210  * \param[in] deviceInfo  An information on device that is to be set.
 211  *
 212  * \returns A string describing the device.
 213  */
 214 std::string getDeviceInformationString(const DeviceInformation& deviceInfo);
 215
 216 /*! \brief Return a string describing how compatible the GPU with given \c deviceId is.
 217  *
 218  * \param[in] deviceInfoList An information on available devices.
 219  * \param[in] deviceId       An index of the device to check
 220  * \returns                  A string describing the compatibility status, useful for error messages.
 221  */
 222 std::string getDeviceCompatibilityDescription(const std::vector<std::unique_ptr<DeviceInformation>>& deviceInfoList,
 223                                               int deviceId);
 224
 225 /*! \brief Serialization of information on devices for MPI broadcasting.
 226  *
 227  * \param[in] deviceInfoList  The vector with device informations to serialize.
 228  * \param[in] serializer      Serializing object.
 229  */
 230 void serializeDeviceInformations(const std::vector<std::unique_ptr<DeviceInformation>>& deviceInfoList,
 231                                  gmx::ISerializer*                                      serializer);
 232
 233 /*! \brief Deserialization of information on devices after MPI broadcasting.
 234  *
 235  * \param[in] serializer Serializing object.
 236  *
 237  * \return deviceInfoList   Deserialized vector with device informations.
 238  */
 239 std::vector<std::unique_ptr<DeviceInformation>> deserializeDeviceInformations(gmx::ISerializer* serializer);
 240
 241 #endif // GMX_HARDWARE_DEVICE_MANAGEMENT_H