1 /* CUDA Driver API description.
2 Copyright (C) 2017-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>.
25 This header provides parts of the CUDA Driver API, without having to rely on
26 the proprietary CUDA toolkit. */
33 #define CUDA_VERSION 8000
39 typedef void *CUcontext
;
41 #if defined(__LP64__) || defined(_WIN64)
42 typedef unsigned long long CUdeviceptr
;
44 typedef unsigned CUdeviceptr
;
46 typedef void *CUevent
;
47 typedef void *CUfunction
;
48 typedef void *CUlinkState
;
49 typedef void *CUmodule
;
50 typedef void *CUarray
;
51 typedef size_t (*CUoccupancyB2DSize
)(int);
52 typedef void *CUstream
;
56 CUDA_ERROR_INVALID_VALUE
= 1,
57 CUDA_ERROR_OUT_OF_MEMORY
= 2,
58 CUDA_ERROR_NOT_INITIALIZED
= 3,
59 CUDA_ERROR_DEINITIALIZED
= 4,
60 CUDA_ERROR_NO_DEVICE
= 100,
61 CUDA_ERROR_INVALID_CONTEXT
= 201,
62 CUDA_ERROR_INVALID_HANDLE
= 400,
63 CUDA_ERROR_NOT_FOUND
= 500,
64 CUDA_ERROR_NOT_READY
= 600,
65 CUDA_ERROR_LAUNCH_FAILED
= 719,
66 CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE
= 720,
67 CUDA_ERROR_NOT_PERMITTED
= 800,
68 CUDA_ERROR_NOT_SUPPORTED
= 801,
69 CUDA_ERROR_UNKNOWN
= 999
73 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK
= 1,
74 CU_DEVICE_ATTRIBUTE_WARP_SIZE
= 10,
75 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
= 12,
76 CU_DEVICE_ATTRIBUTE_CLOCK_RATE
= 13,
77 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
= 15,
78 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
= 16,
79 CU_DEVICE_ATTRIBUTE_INTEGRATED
= 18,
80 CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY
= 19,
81 CU_DEVICE_ATTRIBUTE_COMPUTE_MODE
= 20,
82 CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS
= 31,
83 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR
= 39,
84 CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT
= 40,
85 CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING
= 41,
86 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR
= 82
91 CU_EVENT_DISABLE_TIMING
= 2
95 CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
= 0,
96 CU_FUNC_ATTRIBUTE_NUM_REGS
= 4
97 } CUfunction_attribute
;
100 CU_JIT_WALL_TIME
= 2,
101 CU_JIT_INFO_LOG_BUFFER
= 3,
102 CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
= 4,
103 CU_JIT_ERROR_LOG_BUFFER
= 5,
104 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
= 6,
105 CU_JIT_OPTIMIZATION_LEVEL
= 7,
106 CU_JIT_GENERATE_DEBUG_INFO
= 11,
107 CU_JIT_LOG_VERBOSE
= 12,
108 CU_JIT_GENERATE_LINE_INFO
= 13,
116 CU_CTX_SCHED_AUTO
= 0
119 #define CU_LAUNCH_PARAM_END ((void *) 0)
120 #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void *) 1)
121 #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void *) 2)
122 #define CU_MEMHOSTALLOC_DEVICEMAP 0x02U
125 CU_STREAM_DEFAULT
= 0,
126 CU_STREAM_NON_BLOCKING
= 1
130 CU_LIMIT_STACK_SIZE
= 0x00,
131 CU_LIMIT_MALLOC_HEAP_SIZE
= 0x02,
135 CU_MEMORYTYPE_HOST
= 0x01,
136 CU_MEMORYTYPE_DEVICE
= 0x02,
137 CU_MEMORYTYPE_ARRAY
= 0x03,
138 CU_MEMORYTYPE_UNIFIED
= 0x04
142 size_t srcXInBytes
, srcY
;
143 CUmemorytype srcMemoryType
;
145 CUdeviceptr srcDevice
;
149 size_t dstXInBytes
, dstY
;
150 CUmemorytype dstMemoryType
;
152 CUdeviceptr dstDevice
;
156 size_t WidthInBytes
, Height
;
160 size_t srcXInBytes
, srcY
, srcZ
;
162 CUmemorytype srcMemoryType
;
164 CUdeviceptr srcDevice
;
167 size_t srcPitch
, srcHeight
;
169 size_t dstXInBytes
, dstY
, dstZ
;
171 CUmemorytype dstMemoryType
;
173 CUdeviceptr dstDevice
;
176 size_t dstPitch
, dstHeight
;
178 size_t WidthInBytes
, Height
, Depth
;
182 size_t srcXInBytes
, srcY
, srcZ
;
184 CUmemorytype srcMemoryType
;
186 CUdeviceptr srcDevice
;
188 CUcontext srcContext
;
189 size_t srcPitch
, srcHeight
;
191 size_t dstXInBytes
, dstY
, dstZ
;
193 CUmemorytype dstMemoryType
;
195 CUdeviceptr dstDevice
;
197 CUcontext dstContext
;
198 size_t dstPitch
, dstHeight
;
200 size_t WidthInBytes
, Height
, Depth
;
201 } CUDA_MEMCPY3D_PEER
;
203 #define cuCtxCreate cuCtxCreate_v2
204 CUresult
cuCtxCreate (CUcontext
*, unsigned, CUdevice
);
205 #define cuCtxDestroy cuCtxDestroy_v2
206 CUresult
cuCtxDestroy (CUcontext
);
207 CUresult
cuCtxGetCurrent (CUcontext
*);
208 CUresult
cuCtxGetDevice (CUdevice
*);
209 #define cuCtxPopCurrent cuCtxPopCurrent_v2
210 CUresult
cuCtxPopCurrent (CUcontext
*);
211 #define cuCtxPushCurrent cuCtxPushCurrent_v2
212 CUresult
cuCtxPushCurrent (CUcontext
);
213 CUresult
cuCtxSynchronize (void);
214 CUresult
cuCtxSetLimit (CUlimit
, size_t);
215 CUresult
cuDeviceGet (CUdevice
*, int);
216 #define cuDeviceTotalMem cuDeviceTotalMem_v2
217 CUresult
cuDeviceTotalMem (size_t *, CUdevice
);
218 CUresult
cuDeviceGetAttribute (int *, CUdevice_attribute
, CUdevice
);
219 CUresult
cuDeviceGetCount (int *);
220 CUresult
cuDeviceGetName (char *, int, CUdevice
);
221 CUresult
cuEventCreate (CUevent
*, unsigned);
222 #define cuEventDestroy cuEventDestroy_v2
223 CUresult
cuEventDestroy (CUevent
);
224 CUresult
cuEventElapsedTime (float *, CUevent
, CUevent
);
225 CUresult
cuEventQuery (CUevent
);
226 CUresult
cuEventRecord (CUevent
, CUstream
);
227 CUresult
cuEventSynchronize (CUevent
);
228 CUresult
cuFuncGetAttribute (int *, CUfunction_attribute
, CUfunction
);
229 CUresult
cuGetErrorString (CUresult
, const char **);
230 CUresult
cuGetErrorName (CUresult
, const char **);
231 CUresult
cuInit (unsigned);
232 CUresult
cuDriverGetVersion (int *);
233 CUresult
cuLaunchKernel (CUfunction
, unsigned, unsigned, unsigned, unsigned,
234 unsigned, unsigned, unsigned, CUstream
, void **, void **);
235 #define cuLinkAddData cuLinkAddData_v2
236 CUresult
cuLinkAddData (CUlinkState
, CUjitInputType
, void *, size_t, const char *,
237 unsigned, CUjit_option
*, void **);
238 CUresult
cuLinkComplete (CUlinkState
, void **, size_t *);
239 #define cuLinkCreate cuLinkCreate_v2
240 CUresult
cuLinkCreate (unsigned, CUjit_option
*, void **, CUlinkState
*);
241 CUresult
cuLinkDestroy (CUlinkState
);
242 #define cuMemGetInfo cuMemGetInfo_v2
243 CUresult
cuMemGetInfo (size_t *, size_t *);
244 #define cuMemAlloc cuMemAlloc_v2
245 CUresult
cuMemAlloc (CUdeviceptr
*, size_t);
246 #define cuMemAllocHost cuMemAllocHost_v2
247 CUresult
cuMemAllocHost (void **, size_t);
248 CUresult
cuMemHostAlloc (void **, size_t, unsigned int);
249 CUresult
cuMemcpy (CUdeviceptr
, CUdeviceptr
, size_t);
250 CUresult
cuMemcpyPeer (CUdeviceptr
, CUcontext
, CUdeviceptr
, CUcontext
, size_t);
251 CUresult
cuMemcpyPeerAsync (CUdeviceptr
, CUcontext
, CUdeviceptr
, CUcontext
, size_t, CUstream
);
252 #define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
253 CUresult
cuMemcpyDtoDAsync (CUdeviceptr
, CUdeviceptr
, size_t, CUstream
);
254 #define cuMemcpyDtoH cuMemcpyDtoH_v2
255 CUresult
cuMemcpyDtoH (void *, CUdeviceptr
, size_t);
256 #define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
257 CUresult
cuMemcpyDtoHAsync (void *, CUdeviceptr
, size_t, CUstream
);
258 #define cuMemcpyHtoD cuMemcpyHtoD_v2
259 CUresult
cuMemcpyHtoD (CUdeviceptr
, const void *, size_t);
260 #define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
261 CUresult
cuMemcpyHtoDAsync (CUdeviceptr
, const void *, size_t, CUstream
);
262 #define cuMemcpy2D cuMemcpy2D_v2
263 CUresult
cuMemcpy2D (const CUDA_MEMCPY2D
*);
264 #define cuMemcpy2DAsync cuMemcpy2DAsync_v2
265 CUresult
cuMemcpy2DAsync (const CUDA_MEMCPY2D
*, CUstream
);
266 #define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2
267 CUresult
cuMemcpy2DUnaligned (const CUDA_MEMCPY2D
*);
268 #define cuMemcpy3D cuMemcpy3D_v2
269 CUresult
cuMemcpy3D (const CUDA_MEMCPY3D
*);
270 #define cuMemcpy3DAsync cuMemcpy3DAsync_v2
271 CUresult
cuMemcpy3DAsync (const CUDA_MEMCPY3D
*, CUstream
);
272 CUresult
cuMemcpy3DPeer (const CUDA_MEMCPY3D_PEER
*);
273 CUresult
cuMemcpy3DPeerAsync (const CUDA_MEMCPY3D_PEER
*, CUstream
);
274 #define cuMemFree cuMemFree_v2
275 CUresult
cuMemFree (CUdeviceptr
);
276 CUresult
cuMemFreeHost (void *);
277 #define cuMemGetAddressRange cuMemGetAddressRange_v2
278 CUresult
cuMemGetAddressRange (CUdeviceptr
*, size_t *, CUdeviceptr
);
279 #define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v2
280 CUresult
cuMemHostGetDevicePointer (CUdeviceptr
*, void *, unsigned);
281 CUresult
cuModuleGetFunction (CUfunction
*, CUmodule
, const char *);
282 #define cuModuleGetGlobal cuModuleGetGlobal_v2
283 CUresult
cuModuleGetGlobal (CUdeviceptr
*, size_t *, CUmodule
, const char *);
284 CUresult
cuModuleLoad (CUmodule
*, const char *);
285 CUresult
cuModuleLoadData (CUmodule
*, const void *);
286 CUresult
cuModuleUnload (CUmodule
);
287 CUresult
cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction
,
288 CUoccupancyB2DSize
, size_t, int);
289 typedef void (*CUstreamCallback
)(CUstream
, CUresult
, void *);
290 CUresult
cuStreamAddCallback(CUstream
, CUstreamCallback
, void *, unsigned int);
291 CUresult
cuStreamCreate (CUstream
*, unsigned);
292 #define cuStreamDestroy cuStreamDestroy_v2
293 CUresult
cuStreamDestroy (CUstream
);
294 CUresult
cuStreamQuery (CUstream
);
295 CUresult
cuStreamSynchronize (CUstream
);
296 CUresult
cuStreamWaitEvent (CUstream
, CUevent
, unsigned);
302 #endif /* GCC_CUDA_H */