1 /* CUDA Driver API description.
2 Copyright (C) 2017-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>.
25 This header provides parts of the CUDA Driver API, without having to rely on
26 the proprietary CUDA toolkit. */
33 #define CUDA_VERSION 8000
39 typedef void *CUcontext
;
41 #if defined(__LP64__) || defined(_WIN64)
42 typedef unsigned long long CUdeviceptr
;
44 typedef unsigned CUdeviceptr
;
46 typedef void *CUevent
;
47 typedef void *CUfunction
;
48 typedef void *CUlinkState
;
49 typedef void *CUmodule
;
50 typedef void *CUarray
;
51 typedef size_t (*CUoccupancyB2DSize
)(int);
52 typedef void *CUstream
;
56 CUDA_ERROR_INVALID_VALUE
= 1,
57 CUDA_ERROR_OUT_OF_MEMORY
= 2,
58 CUDA_ERROR_NOT_INITIALIZED
= 3,
59 CUDA_ERROR_DEINITIALIZED
= 4,
60 CUDA_ERROR_INVALID_CONTEXT
= 201,
61 CUDA_ERROR_INVALID_HANDLE
= 400,
62 CUDA_ERROR_NOT_FOUND
= 500,
63 CUDA_ERROR_NOT_READY
= 600,
64 CUDA_ERROR_LAUNCH_FAILED
= 719,
65 CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE
= 720,
66 CUDA_ERROR_NOT_PERMITTED
= 800,
67 CUDA_ERROR_NOT_SUPPORTED
= 801,
68 CUDA_ERROR_UNKNOWN
= 999
72 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK
= 1,
73 CU_DEVICE_ATTRIBUTE_WARP_SIZE
= 10,
74 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
= 12,
75 CU_DEVICE_ATTRIBUTE_CLOCK_RATE
= 13,
76 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
= 15,
77 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
= 16,
78 CU_DEVICE_ATTRIBUTE_INTEGRATED
= 18,
79 CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY
= 19,
80 CU_DEVICE_ATTRIBUTE_COMPUTE_MODE
= 20,
81 CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS
= 31,
82 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR
= 39,
83 CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT
= 40,
84 CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING
= 41,
85 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR
= 82
90 CU_EVENT_DISABLE_TIMING
= 2
94 CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
= 0,
95 CU_FUNC_ATTRIBUTE_NUM_REGS
= 4
96 } CUfunction_attribute
;
100 CU_JIT_INFO_LOG_BUFFER
= 3,
101 CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
= 4,
102 CU_JIT_ERROR_LOG_BUFFER
= 5,
103 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
= 6,
104 CU_JIT_OPTIMIZATION_LEVEL
= 7,
105 CU_JIT_GENERATE_DEBUG_INFO
= 11,
106 CU_JIT_LOG_VERBOSE
= 12,
107 CU_JIT_GENERATE_LINE_INFO
= 13,
115 CU_CTX_SCHED_AUTO
= 0
118 #define CU_LAUNCH_PARAM_END ((void *) 0)
119 #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void *) 1)
120 #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void *) 2)
121 #define CU_MEMHOSTALLOC_DEVICEMAP 0x02U
124 CU_STREAM_DEFAULT
= 0,
125 CU_STREAM_NON_BLOCKING
= 1
129 CU_LIMIT_STACK_SIZE
= 0x00,
130 CU_LIMIT_MALLOC_HEAP_SIZE
= 0x02,
134 CU_MEMORYTYPE_HOST
= 0x01,
135 CU_MEMORYTYPE_DEVICE
= 0x02,
136 CU_MEMORYTYPE_ARRAY
= 0x03,
137 CU_MEMORYTYPE_UNIFIED
= 0x04
141 size_t srcXInBytes
, srcY
;
142 CUmemorytype srcMemoryType
;
144 CUdeviceptr srcDevice
;
148 size_t dstXInBytes
, dstY
;
149 CUmemorytype dstMemoryType
;
151 CUdeviceptr dstDevice
;
155 size_t WidthInBytes
, Height
;
159 size_t srcXInBytes
, srcY
, srcZ
;
161 CUmemorytype srcMemoryType
;
163 CUdeviceptr srcDevice
;
166 size_t srcPitch
, srcHeight
;
168 size_t dstXInBytes
, dstY
, dstZ
;
170 CUmemorytype dstMemoryType
;
172 CUdeviceptr dstDevice
;
175 size_t dstPitch
, dstHeight
;
177 size_t WidthInBytes
, Height
, Depth
;
181 size_t srcXInBytes
, srcY
, srcZ
;
183 CUmemorytype srcMemoryType
;
185 CUdeviceptr srcDevice
;
187 CUcontext srcContext
;
188 size_t srcPitch
, srcHeight
;
190 size_t dstXInBytes
, dstY
, dstZ
;
192 CUmemorytype dstMemoryType
;
194 CUdeviceptr dstDevice
;
196 CUcontext dstContext
;
197 size_t dstPitch
, dstHeight
;
199 size_t WidthInBytes
, Height
, Depth
;
200 } CUDA_MEMCPY3D_PEER
;
202 #define cuCtxCreate cuCtxCreate_v2
203 CUresult
cuCtxCreate (CUcontext
*, unsigned, CUdevice
);
204 #define cuCtxDestroy cuCtxDestroy_v2
205 CUresult
cuCtxDestroy (CUcontext
);
206 CUresult
cuCtxGetCurrent (CUcontext
*);
207 CUresult
cuCtxGetDevice (CUdevice
*);
208 #define cuCtxPopCurrent cuCtxPopCurrent_v2
209 CUresult
cuCtxPopCurrent (CUcontext
*);
210 #define cuCtxPushCurrent cuCtxPushCurrent_v2
211 CUresult
cuCtxPushCurrent (CUcontext
);
212 CUresult
cuCtxSynchronize (void);
213 CUresult
cuCtxSetLimit (CUlimit
, size_t);
214 CUresult
cuDeviceGet (CUdevice
*, int);
215 #define cuDeviceTotalMem cuDeviceTotalMem_v2
216 CUresult
cuDeviceTotalMem (size_t *, CUdevice
);
217 CUresult
cuDeviceGetAttribute (int *, CUdevice_attribute
, CUdevice
);
218 CUresult
cuDeviceGetCount (int *);
219 CUresult
cuDeviceGetName (char *, int, CUdevice
);
220 CUresult
cuEventCreate (CUevent
*, unsigned);
221 #define cuEventDestroy cuEventDestroy_v2
222 CUresult
cuEventDestroy (CUevent
);
223 CUresult
cuEventElapsedTime (float *, CUevent
, CUevent
);
224 CUresult
cuEventQuery (CUevent
);
225 CUresult
cuEventRecord (CUevent
, CUstream
);
226 CUresult
cuEventSynchronize (CUevent
);
227 CUresult
cuFuncGetAttribute (int *, CUfunction_attribute
, CUfunction
);
228 CUresult
cuGetErrorString (CUresult
, const char **);
229 CUresult
cuGetErrorName (CUresult
, const char **);
230 CUresult
cuInit (unsigned);
231 CUresult
cuDriverGetVersion (int *);
232 CUresult
cuLaunchKernel (CUfunction
, unsigned, unsigned, unsigned, unsigned,
233 unsigned, unsigned, unsigned, CUstream
, void **, void **);
234 #define cuLinkAddData cuLinkAddData_v2
235 CUresult
cuLinkAddData (CUlinkState
, CUjitInputType
, void *, size_t, const char *,
236 unsigned, CUjit_option
*, void **);
237 CUresult
cuLinkComplete (CUlinkState
, void **, size_t *);
238 #define cuLinkCreate cuLinkCreate_v2
239 CUresult
cuLinkCreate (unsigned, CUjit_option
*, void **, CUlinkState
*);
240 CUresult
cuLinkDestroy (CUlinkState
);
241 #define cuMemGetInfo cuMemGetInfo_v2
242 CUresult
cuMemGetInfo (size_t *, size_t *);
243 #define cuMemAlloc cuMemAlloc_v2
244 CUresult
cuMemAlloc (CUdeviceptr
*, size_t);
245 #define cuMemAllocHost cuMemAllocHost_v2
246 CUresult
cuMemAllocHost (void **, size_t);
247 CUresult
cuMemHostAlloc (void **, size_t, unsigned int);
248 CUresult
cuMemcpy (CUdeviceptr
, CUdeviceptr
, size_t);
249 CUresult
cuMemcpyPeer (CUdeviceptr
, CUcontext
, CUdeviceptr
, CUcontext
, size_t);
250 CUresult
cuMemcpyPeerAsync (CUdeviceptr
, CUcontext
, CUdeviceptr
, CUcontext
, size_t, CUstream
);
251 #define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
252 CUresult
cuMemcpyDtoDAsync (CUdeviceptr
, CUdeviceptr
, size_t, CUstream
);
253 #define cuMemcpyDtoH cuMemcpyDtoH_v2
254 CUresult
cuMemcpyDtoH (void *, CUdeviceptr
, size_t);
255 #define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
256 CUresult
cuMemcpyDtoHAsync (void *, CUdeviceptr
, size_t, CUstream
);
257 #define cuMemcpyHtoD cuMemcpyHtoD_v2
258 CUresult
cuMemcpyHtoD (CUdeviceptr
, const void *, size_t);
259 #define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
260 CUresult
cuMemcpyHtoDAsync (CUdeviceptr
, const void *, size_t, CUstream
);
261 #define cuMemcpy2D cuMemcpy2D_v2
262 CUresult
cuMemcpy2D (const CUDA_MEMCPY2D
*);
263 #define cuMemcpy2DAsync cuMemcpy2DAsync_v2
264 CUresult
cuMemcpy2DAsync (const CUDA_MEMCPY2D
*, CUstream
);
265 #define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2
266 CUresult
cuMemcpy2DUnaligned (const CUDA_MEMCPY2D
*);
267 #define cuMemcpy3D cuMemcpy3D_v2
268 CUresult
cuMemcpy3D (const CUDA_MEMCPY3D
*);
269 #define cuMemcpy3DAsync cuMemcpy3DAsync_v2
270 CUresult
cuMemcpy3DAsync (const CUDA_MEMCPY3D
*, CUstream
);
271 CUresult
cuMemcpy3DPeer (const CUDA_MEMCPY3D_PEER
*);
272 CUresult
cuMemcpy3DPeerAsync (const CUDA_MEMCPY3D_PEER
*, CUstream
);
273 #define cuMemFree cuMemFree_v2
274 CUresult
cuMemFree (CUdeviceptr
);
275 CUresult
cuMemFreeHost (void *);
276 #define cuMemGetAddressRange cuMemGetAddressRange_v2
277 CUresult
cuMemGetAddressRange (CUdeviceptr
*, size_t *, CUdeviceptr
);
278 #define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v2
279 CUresult
cuMemHostGetDevicePointer (CUdeviceptr
*, void *, unsigned);
280 CUresult
cuModuleGetFunction (CUfunction
*, CUmodule
, const char *);
281 #define cuModuleGetGlobal cuModuleGetGlobal_v2
282 CUresult
cuModuleGetGlobal (CUdeviceptr
*, size_t *, CUmodule
, const char *);
283 CUresult
cuModuleLoad (CUmodule
*, const char *);
284 CUresult
cuModuleLoadData (CUmodule
*, const void *);
285 CUresult
cuModuleUnload (CUmodule
);
286 CUresult
cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction
,
287 CUoccupancyB2DSize
, size_t, int);
288 typedef void (*CUstreamCallback
)(CUstream
, CUresult
, void *);
289 CUresult
cuStreamAddCallback(CUstream
, CUstreamCallback
, void *, unsigned int);
290 CUresult
cuStreamCreate (CUstream
*, unsigned);
291 #define cuStreamDestroy cuStreamDestroy_v2
292 CUresult
cuStreamDestroy (CUstream
);
293 CUresult
cuStreamQuery (CUstream
);
294 CUresult
cuStreamSynchronize (CUstream
);
295 CUresult
cuStreamWaitEvent (CUstream
, CUevent
, unsigned);
301 #endif /* GCC_CUDA_H */