src/arch/cuda/hybridAPI/hapi.h

   1 #ifndef __HAPI_H_
   2 #define __HAPI_H_
   3 #include <cuda_runtime.h>
   4
   5 /* See hapi_functions.h for the majority of function declarations provided
   6  * by the Hybrid API. */
   7
   8 /******************** DEPRECATED ********************/
   9 // HAPI wrappers whose behavior is controlled by user defined variables,
  10 // which are HAPI_USE_CUDAMALLOCHOST and HAPI_MEMPOOL.
  11 #ifdef HAPI_USE_CUDAMALLOCHOST
  12 #  ifdef HAPI_MEMPOOL
  13 #    define hapiHostMalloc hapiPoolMalloc
  14 #    define hapiHostFree   hapiPoolFree
  15 #  else
  16 #    define hapiHostMalloc cudaMallocHost
  17 #    define hapiHostFree   cudaFreeHost
  18 #  endif // HAPI_MEMPOOL
  19 #else
  20 #  define hapiHostMalloc malloc
  21 #  define hapiHostFree   free
  22 #endif // HAPI_USE_CUDAMALLOCHOST
  23
  24 #ifdef __cplusplus
  25
  26 #include <cstring>
  27 #include <cstdlib>
  28 #include <vector>
  29
  30 /******************** DEPRECATED ********************/
  31 // Contains information about a device buffer, which is used by
  32 // the runtime to perform appropriate operations. Each hapiBufferInfo should
  33 // be associated with a hapiWorkRequest.
  34 typedef struct hapiBufferInfo {
  35   // ID of buffer in the runtime system's buffer table
  36   int id;
  37
  38   // flags to indicate if the buffer should be transferred
  39   bool transfer_to_device;
  40   bool transfer_to_host;
  41
  42   // flag to indicate if the device buffer memory should be freed
  43   // after execution of work request
  44   bool need_free;
  45
  46   // pointer to host data buffer
  47   void* host_buffer;
  48
  49   // size of buffer in bytes
  50   size_t size;
  51
  52   hapiBufferInfo(int _id = -1) : id(_id), transfer_to_device(false),
  53     transfer_to_host(false) {}
  54
  55   hapiBufferInfo(void* _host_buffer, size_t _size, bool _transfer_to_device,
  56       bool _transfer_to_host, bool _need_free, int _id = -1) :
  57     host_buffer(_host_buffer), size(_size), transfer_to_device(_transfer_to_device),
  58     transfer_to_host(_transfer_to_host), need_free(_need_free), id(_id) {}
  59
  60 } hapiBufferInfo;
  61
  62 /******************** DEPRECATED ********************/
  63 // Data structure that ties a kernel, associated buffers, and other variables
  64 // required by the runtime. The user gets a hapiWorkRequest from the runtime,
  65 // fills it in, and enqueues it. The memory associated with it is managed
  66 // by the runtime.
  67 typedef struct hapiWorkRequest {
  68   // parameters for kernel execution
  69   dim3 grid_dim;
  70   dim3 block_dim;
  71   int shared_mem;
  72
  73   // contains information about buffers associated with the kernel
  74   std::vector<hapiBufferInfo> buffers;
  75
  76   // Charm++ callback functions to be executed after certain stages of
  77   // GPU execution
  78   void* host_to_device_cb; // after host to device data transfer
  79   void* kernel_cb; // after kernel execution
  80   void* device_to_host_cb; // after device to host data transfer
  81
  82 #ifdef HAPI_TRACE
  83   // short identifier used for tracing and logging
  84   const char *trace_name;
  85 #endif
  86
  87   // Pointer to host-side function that actually invokes the kernel.
  88   // The user implements this function, using the given CUDA stream and
  89   // device buffers (which are indexed by hapiBufferInfo->id).
  90   // Could be set to NULL if no kernel needs to be executed.
  91   void (*runKernel)(struct hapiWorkRequest* wr, cudaStream_t kernel_stream,
  92                     void** device_buffers);
  93
  94   // flag used for control by the system
  95   int state;
  96
  97   // may be used to pass data to kernel calls
  98   void* user_data;
  99
 100   // flag determining whether user data is freed on destruction
 101   bool free_user_data;
 102
 103   // CUDA stream index provided by the user or assigned by GPUManager
 104   cudaStream_t stream;
 105
 106 #ifdef HAPI_INSTRUMENT_WRS
 107   double phase_start_time;
 108   int chare_index;
 109   char comp_type;
 110   char comp_phase;
 111 #endif
 112
 113   hapiWorkRequest() :
 114     grid_dim(0), block_dim(0), shared_mem(0), host_to_device_cb(NULL),
 115     kernel_cb(NULL), device_to_host_cb(NULL), runKernel(NULL), state(0),
 116     user_data(NULL), free_user_data(false), stream(NULL)
 117   {
 118 #ifdef HAPI_TRACE
 119     trace_name = "";
 120 #endif
 121 #ifdef HAPI_INSTRUMENT_WRS
 122     chare_index = -1;
 123 #endif
 124   }
 125
 126   ~hapiWorkRequest() {
 127     if (free_user_data)
 128       std::free(user_data);
 129   }
 130
 131   void setExecParams(dim3 _grid_dim, dim3 _block_dim, int _shared_mem = 0) {
 132     grid_dim = _grid_dim;
 133     block_dim = _block_dim;
 134     shared_mem = _shared_mem;
 135   }
 136
 137   void addBuffer(void *host_buffer, size_t size, bool transfer_to_device,
 138                  bool transfer_to_host, bool need_free, int id = -1) {
 139     buffers.emplace_back(host_buffer, size, transfer_to_device, transfer_to_host,
 140                          need_free, id);
 141   }
 142
 143   int getBufferID(int i) {
 144     return buffers[i].id;
 145   }
 146
 147   int getBufferCount() {
 148     return buffers.size();
 149   }
 150
 151   void setHostToDeviceCallback(void* cb) {
 152     host_to_device_cb = cb;
 153   }
 154
 155   void setKernelCallback(void* cb) {
 156     kernel_cb = cb;
 157   }
 158
 159   void setDeviceToHostCallback(void* cb) {
 160     device_to_host_cb = cb;
 161   }
 162
 163   void setCallback(void* cb) {
 164     device_to_host_cb = cb;
 165   }
 166
 167 #ifdef HAPI_TRACE
 168   void setTraceName(const char* _trace_name) {
 169     trace_name = _trace_name;
 170   }
 171 #endif
 172
 173   void setRunKernel(void (*_runKernel)(struct hapiWorkRequest*, cudaStream_t, void**)) {
 174     runKernel = _runKernel;
 175   }
 176
 177   void setStream(cudaStream_t _stream) {
 178     stream = _stream;
 179   }
 180
 181   cudaStream_t getStream() {
 182     return stream;
 183   }
 184
 185   void copyUserData(void* ptr, size_t size) {
 186     // make a separate copy to prevent tampering with the original data
 187     free_user_data = true;
 188     user_data = std::malloc(size);
 189     std::memcpy(user_data, ptr, size);
 190   }
 191
 192   void setUserData(void* ptr, bool _free_user_data = false) {
 193     free_user_data = _free_user_data;
 194     user_data = ptr;
 195   }
 196
 197   void* getUserData() {
 198     return user_data;
 199   }
 200
 201 } hapiWorkRequest;
 202
 203 #else /* defined __cplusplus */
 204
 205 /* In C mode, only declare the existence of C++ structs. */
 206 typedef struct hapiBufferInfo hapiBufferInfo;
 207 typedef struct hapiWorkRequest hapiWorkRequest;
 208
 209 #endif /* defined __cplusplus */
 210
 211 // Provides support for detecting errors with CUDA API calls.
 212 #ifndef HAPI_CHECK_OFF
 213 #define hapiCheck(code) hapiErrorDie(code, #code, __FILE__, __LINE__)
 214 #else
 215 #define hapiCheck(code) code
 216 #endif
 217
 218 #ifdef HAPI_INSTRUMENT_WRS
 219 typedef struct hapiRequestTimeInfo {
 220   double transfer_time;
 221   double kernel_time;
 222   double cleanup_time;
 223   int n;
 224
 225 #ifdef __cplusplus
 226   hapiRequestTimeInfo() : transfer_time(0.0), kernel_time(0.0), cleanup_time(0.0),
 227     n(0) {}
 228 #endif /* defined __cplusplus */
 229 } hapiRequestTimeInfo;
 230 #endif /* defined HAPI_INSTRUMENT_WRS */
 231
 232
 233 #ifndef AMPI_INTERNAL_SKIP_FUNCTIONS
 234
 235 #define AMPI_CUSTOM_FUNC(return_type, function_name, ...) \
 236 extern return_type function_name(__VA_ARGS__);
 237
 238 #ifdef __cplusplus
 239 extern "C" {
 240 #endif
 241 #include "hapi_functions.h"
 242 #ifdef __cplusplus
 243 }
 244 #endif
 245
 246 #undef AMPI_CUSTOM_FUNC
 247
 248 #ifdef __cplusplus
 249
 250 // Provide a C++-only stub for this function's default parameter.
 251 static inline void hapiAddCallback(cudaStream_t a, void* b) {
 252   hapiAddCallback(a, b, NULL);
 253 }
 254
 255 // Overloaded C++ wrappers for selecting whether to pool or not using a bool.
 256 static inline cudaError_t hapiMallocHost(void** ptr, size_t size, bool pool) {
 257   return pool ? hapiMallocHostPool(ptr, size) : hapiMallocHost(ptr, size);
 258 }
 259 static inline cudaError_t hapiFreeHost(void* ptr, bool pool) {
 260   return pool ? hapiFreeHostPool(ptr) : hapiFreeHost(ptr);
 261 }
 262
 263 #endif /* defined __cplusplus */
 264
 265 #endif /* !defined AMPI_INTERNAL_SKIP_FUNCTIONS */
 266
 267 #endif // __HAPI_H_