Wrap all RTS functions exposed to AMPI programs in special macros
[charm.git] / src / arch / cuda / hybridAPI / hapi.h
blob8e8896a8141c1e50fc4e391a933707a2aa4fe382
1 #ifndef __HAPI_H_
2 #define __HAPI_H_
3 #include <cuda_runtime.h>
5 /* See hapi_functions.h for the majority of function declarations provided
6 * by the Hybrid API. */
8 /******************** DEPRECATED ********************/
9 // HAPI wrappers whose behavior is controlled by user defined variables,
10 // which are HAPI_USE_CUDAMALLOCHOST and HAPI_MEMPOOL.
11 #ifdef HAPI_USE_CUDAMALLOCHOST
12 # ifdef HAPI_MEMPOOL
13 # define hapiHostMalloc hapiPoolMalloc
14 # define hapiHostFree hapiPoolFree
15 # else
16 # define hapiHostMalloc cudaMallocHost
17 # define hapiHostFree cudaFreeHost
18 # endif // HAPI_MEMPOOL
19 #else
20 # define hapiHostMalloc malloc
21 # define hapiHostFree free
22 #endif // HAPI_USE_CUDAMALLOCHOST
24 #ifdef __cplusplus
26 #include <cstring>
27 #include <cstdlib>
28 #include <vector>
30 /******************** DEPRECATED ********************/
31 // Contains information about a device buffer, which is used by
32 // the runtime to perform appropriate operations. Each hapiBufferInfo should
33 // be associated with a hapiWorkRequest.
34 typedef struct hapiBufferInfo {
35 // ID of buffer in the runtime system's buffer table
36 int id;
38 // flags to indicate if the buffer should be transferred
39 bool transfer_to_device;
40 bool transfer_to_host;
42 // flag to indicate if the device buffer memory should be freed
43 // after execution of work request
44 bool need_free;
46 // pointer to host data buffer
47 void* host_buffer;
49 // size of buffer in bytes
50 size_t size;
52 hapiBufferInfo(int _id = -1) : id(_id), transfer_to_device(false),
53 transfer_to_host(false) {}
55 hapiBufferInfo(void* _host_buffer, size_t _size, bool _transfer_to_device,
56 bool _transfer_to_host, bool _need_free, int _id = -1) :
57 host_buffer(_host_buffer), size(_size), transfer_to_device(_transfer_to_device),
58 transfer_to_host(_transfer_to_host), need_free(_need_free), id(_id) {}
60 } hapiBufferInfo;
62 /******************** DEPRECATED ********************/
63 // Data structure that ties a kernel, associated buffers, and other variables
64 // required by the runtime. The user gets a hapiWorkRequest from the runtime,
65 // fills it in, and enqueues it. The memory associated with it is managed
66 // by the runtime.
67 typedef struct hapiWorkRequest {
68 // parameters for kernel execution
69 dim3 grid_dim;
70 dim3 block_dim;
71 int shared_mem;
73 // contains information about buffers associated with the kernel
74 std::vector<hapiBufferInfo> buffers;
76 // Charm++ callback functions to be executed after certain stages of
77 // GPU execution
78 void* host_to_device_cb; // after host to device data transfer
79 void* kernel_cb; // after kernel execution
80 void* device_to_host_cb; // after device to host data transfer
82 #ifdef HAPI_TRACE
83 // short identifier used for tracing and logging
84 const char *trace_name;
85 #endif
87 // Pointer to host-side function that actually invokes the kernel.
88 // The user implements this function, using the given CUDA stream and
89 // device buffers (which are indexed by hapiBufferInfo->id).
90 // Could be set to NULL if no kernel needs to be executed.
91 void (*runKernel)(struct hapiWorkRequest* wr, cudaStream_t kernel_stream,
92 void** device_buffers);
94 // flag used for control by the system
95 int state;
97 // may be used to pass data to kernel calls
98 void* user_data;
100 // flag determining whether user data is freed on destruction
101 bool free_user_data;
103 // CUDA stream index provided by the user or assigned by GPUManager
104 cudaStream_t stream;
106 #ifdef HAPI_INSTRUMENT_WRS
107 double phase_start_time;
108 int chare_index;
109 char comp_type;
110 char comp_phase;
111 #endif
113 hapiWorkRequest() :
114 grid_dim(0), block_dim(0), shared_mem(0), host_to_device_cb(NULL),
115 kernel_cb(NULL), device_to_host_cb(NULL), runKernel(NULL), state(0),
116 user_data(NULL), free_user_data(false), stream(NULL)
118 #ifdef HAPI_TRACE
119 trace_name = "";
120 #endif
121 #ifdef HAPI_INSTRUMENT_WRS
122 chare_index = -1;
123 #endif
126 ~hapiWorkRequest() {
127 if (free_user_data)
128 std::free(user_data);
131 void setExecParams(dim3 _grid_dim, dim3 _block_dim, int _shared_mem = 0) {
132 grid_dim = _grid_dim;
133 block_dim = _block_dim;
134 shared_mem = _shared_mem;
137 void addBuffer(void *host_buffer, size_t size, bool transfer_to_device,
138 bool transfer_to_host, bool need_free, int id = -1) {
139 buffers.emplace_back(host_buffer, size, transfer_to_device, transfer_to_host,
140 need_free, id);
143 int getBufferID(int i) {
144 return buffers[i].id;
147 int getBufferCount() {
148 return buffers.size();
151 void setHostToDeviceCallback(void* cb) {
152 host_to_device_cb = cb;
155 void setKernelCallback(void* cb) {
156 kernel_cb = cb;
159 void setDeviceToHostCallback(void* cb) {
160 device_to_host_cb = cb;
163 void setCallback(void* cb) {
164 device_to_host_cb = cb;
167 #ifdef HAPI_TRACE
168 void setTraceName(const char* _trace_name) {
169 trace_name = _trace_name;
171 #endif
173 void setRunKernel(void (*_runKernel)(struct hapiWorkRequest*, cudaStream_t, void**)) {
174 runKernel = _runKernel;
177 void setStream(cudaStream_t _stream) {
178 stream = _stream;
181 cudaStream_t getStream() {
182 return stream;
185 void copyUserData(void* ptr, size_t size) {
186 // make a separate copy to prevent tampering with the original data
187 free_user_data = true;
188 user_data = std::malloc(size);
189 std::memcpy(user_data, ptr, size);
192 void setUserData(void* ptr, bool _free_user_data = false) {
193 free_user_data = _free_user_data;
194 user_data = ptr;
197 void* getUserData() {
198 return user_data;
201 } hapiWorkRequest;
203 #else /* defined __cplusplus */
205 /* In C mode, only declare the existence of C++ structs. */
206 typedef struct hapiBufferInfo hapiBufferInfo;
207 typedef struct hapiWorkRequest hapiWorkRequest;
209 #endif /* defined __cplusplus */
211 // Provides support for detecting errors with CUDA API calls.
212 #ifndef HAPI_CHECK_OFF
213 #define hapiCheck(code) hapiErrorDie(code, #code, __FILE__, __LINE__)
214 #else
215 #define hapiCheck(code) code
216 #endif
218 #ifdef HAPI_INSTRUMENT_WRS
219 typedef struct hapiRequestTimeInfo {
220 double transfer_time;
221 double kernel_time;
222 double cleanup_time;
223 int n;
225 #ifdef __cplusplus
226 hapiRequestTimeInfo() : transfer_time(0.0), kernel_time(0.0), cleanup_time(0.0),
227 n(0) {}
228 #endif /* defined __cplusplus */
229 } hapiRequestTimeInfo;
230 #endif /* defined HAPI_INSTRUMENT_WRS */
233 #ifndef AMPI_INTERNAL_SKIP_FUNCTIONS
235 #define AMPI_CUSTOM_FUNC(return_type, function_name, ...) \
236 extern return_type function_name(__VA_ARGS__);
238 #ifdef __cplusplus
239 extern "C" {
240 #endif
241 #include "hapi_functions.h"
242 #ifdef __cplusplus
244 #endif
246 #undef AMPI_CUSTOM_FUNC
248 #ifdef __cplusplus
250 // Provide a C++-only stub for this function's default parameter.
251 static inline void hapiAddCallback(cudaStream_t a, void* b) {
252 hapiAddCallback(a, b, NULL);
255 // Overloaded C++ wrappers for selecting whether to pool or not using a bool.
256 static inline cudaError_t hapiMallocHost(void** ptr, size_t size, bool pool) {
257 return pool ? hapiMallocHostPool(ptr, size) : hapiMallocHost(ptr, size);
259 static inline cudaError_t hapiFreeHost(void* ptr, bool pool) {
260 return pool ? hapiFreeHostPool(ptr) : hapiFreeHost(ptr);
263 #endif /* defined __cplusplus */
265 #endif /* !defined AMPI_INTERNAL_SKIP_FUNCTIONS */
267 #endif // __HAPI_H_