3 #include <cuda_runtime.h>
5 /* See hapi_functions.h for the majority of function declarations provided
6 * by the Hybrid API. */
8 /******************** DEPRECATED ********************/
9 // HAPI wrappers whose behavior is controlled by user defined variables,
10 // which are HAPI_USE_CUDAMALLOCHOST and HAPI_MEMPOOL.
11 #ifdef HAPI_USE_CUDAMALLOCHOST
13 # define hapiHostMalloc hapiPoolMalloc
14 # define hapiHostFree hapiPoolFree
16 # define hapiHostMalloc cudaMallocHost
17 # define hapiHostFree cudaFreeHost
18 # endif // HAPI_MEMPOOL
20 # define hapiHostMalloc malloc
21 # define hapiHostFree free
22 #endif // HAPI_USE_CUDAMALLOCHOST
30 /******************** DEPRECATED ********************/
31 // Contains information about a device buffer, which is used by
32 // the runtime to perform appropriate operations. Each hapiBufferInfo should
33 // be associated with a hapiWorkRequest.
34 typedef struct hapiBufferInfo
{
35 // ID of buffer in the runtime system's buffer table
38 // flags to indicate if the buffer should be transferred
39 bool transfer_to_device
;
40 bool transfer_to_host
;
42 // flag to indicate if the device buffer memory should be freed
43 // after execution of work request
46 // pointer to host data buffer
49 // size of buffer in bytes
52 hapiBufferInfo(int _id
= -1) : id(_id
), transfer_to_device(false),
53 transfer_to_host(false) {}
55 hapiBufferInfo(void* _host_buffer
, size_t _size
, bool _transfer_to_device
,
56 bool _transfer_to_host
, bool _need_free
, int _id
= -1) :
57 host_buffer(_host_buffer
), size(_size
), transfer_to_device(_transfer_to_device
),
58 transfer_to_host(_transfer_to_host
), need_free(_need_free
), id(_id
) {}
62 /******************** DEPRECATED ********************/
63 // Data structure that ties a kernel, associated buffers, and other variables
64 // required by the runtime. The user gets a hapiWorkRequest from the runtime,
65 // fills it in, and enqueues it. The memory associated with it is managed
67 typedef struct hapiWorkRequest
{
68 // parameters for kernel execution
73 // contains information about buffers associated with the kernel
74 std::vector
<hapiBufferInfo
> buffers
;
76 // Charm++ callback functions to be executed after certain stages of
78 void* host_to_device_cb
; // after host to device data transfer
79 void* kernel_cb
; // after kernel execution
80 void* device_to_host_cb
; // after device to host data transfer
83 // short identifier used for tracing and logging
84 const char *trace_name
;
87 // Pointer to host-side function that actually invokes the kernel.
88 // The user implements this function, using the given CUDA stream and
89 // device buffers (which are indexed by hapiBufferInfo->id).
90 // Could be set to NULL if no kernel needs to be executed.
91 void (*runKernel
)(struct hapiWorkRequest
* wr
, cudaStream_t kernel_stream
,
92 void** device_buffers
);
94 // flag used for control by the system
97 // may be used to pass data to kernel calls
100 // flag determining whether user data is freed on destruction
103 // CUDA stream index provided by the user or assigned by GPUManager
106 #ifdef HAPI_INSTRUMENT_WRS
107 double phase_start_time
;
114 grid_dim(0), block_dim(0), shared_mem(0), host_to_device_cb(NULL
),
115 kernel_cb(NULL
), device_to_host_cb(NULL
), runKernel(NULL
), state(0),
116 user_data(NULL
), free_user_data(false), stream(NULL
)
121 #ifdef HAPI_INSTRUMENT_WRS
128 std::free(user_data
);
131 void setExecParams(dim3 _grid_dim
, dim3 _block_dim
, int _shared_mem
= 0) {
132 grid_dim
= _grid_dim
;
133 block_dim
= _block_dim
;
134 shared_mem
= _shared_mem
;
137 void addBuffer(void *host_buffer
, size_t size
, bool transfer_to_device
,
138 bool transfer_to_host
, bool need_free
, int id
= -1) {
139 buffers
.emplace_back(host_buffer
, size
, transfer_to_device
, transfer_to_host
,
143 int getBufferID(int i
) {
144 return buffers
[i
].id
;
147 int getBufferCount() {
148 return buffers
.size();
151 void setHostToDeviceCallback(void* cb
) {
152 host_to_device_cb
= cb
;
155 void setKernelCallback(void* cb
) {
159 void setDeviceToHostCallback(void* cb
) {
160 device_to_host_cb
= cb
;
163 void setCallback(void* cb
) {
164 device_to_host_cb
= cb
;
168 void setTraceName(const char* _trace_name
) {
169 trace_name
= _trace_name
;
173 void setRunKernel(void (*_runKernel
)(struct hapiWorkRequest
*, cudaStream_t
, void**)) {
174 runKernel
= _runKernel
;
177 void setStream(cudaStream_t _stream
) {
181 cudaStream_t
getStream() {
185 void copyUserData(void* ptr
, size_t size
) {
186 // make a separate copy to prevent tampering with the original data
187 free_user_data
= true;
188 user_data
= std::malloc(size
);
189 std::memcpy(user_data
, ptr
, size
);
192 void setUserData(void* ptr
, bool _free_user_data
= false) {
193 free_user_data
= _free_user_data
;
197 void* getUserData() {
203 #else /* defined __cplusplus */
205 /* In C mode, only declare the existence of C++ structs. */
206 typedef struct hapiBufferInfo hapiBufferInfo
;
207 typedef struct hapiWorkRequest hapiWorkRequest
;
209 #endif /* defined __cplusplus */
211 // Provides support for detecting errors with CUDA API calls.
212 #ifndef HAPI_CHECK_OFF
213 #define hapiCheck(code) hapiErrorDie(code, #code, __FILE__, __LINE__)
215 #define hapiCheck(code) code
218 #ifdef HAPI_INSTRUMENT_WRS
219 typedef struct hapiRequestTimeInfo
{
220 double transfer_time
;
226 hapiRequestTimeInfo() : transfer_time(0.0), kernel_time(0.0), cleanup_time(0.0),
228 #endif /* defined __cplusplus */
229 } hapiRequestTimeInfo
;
230 #endif /* defined HAPI_INSTRUMENT_WRS */
233 #ifndef AMPI_INTERNAL_SKIP_FUNCTIONS
235 #define AMPI_CUSTOM_FUNC(return_type, function_name, ...) \
236 extern return_type function_name(__VA_ARGS__);
241 #include "hapi_functions.h"
246 #undef AMPI_CUSTOM_FUNC
250 // Provide a C++-only stub for this function's default parameter.
251 static inline void hapiAddCallback(cudaStream_t a
, void* b
) {
252 hapiAddCallback(a
, b
, NULL
);
255 // Overloaded C++ wrappers for selecting whether to pool or not using a bool.
256 static inline cudaError_t
hapiMallocHost(void** ptr
, size_t size
, bool pool
) {
257 return pool
? hapiMallocHostPool(ptr
, size
) : hapiMallocHost(ptr
, size
);
259 static inline cudaError_t
hapiFreeHost(void* ptr
, bool pool
) {
260 return pool
? hapiFreeHostPool(ptr
) : hapiFreeHost(ptr
);
263 #endif /* defined __cplusplus */
265 #endif /* !defined AMPI_INTERNAL_SKIP_FUNCTIONS */