Daily bump.
[official-gcc.git] / include / cuda / cuda.h
blob0dca4b3a5c0b055dcc13ceef7b2559c22fd73a36
1 /* CUDA Driver API description.
2 Copyright (C) 2017-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>.
25 This header provides parts of the CUDA Driver API, without having to rely on
26 the proprietary CUDA toolkit. */
28 #ifndef GCC_CUDA_H
29 #define GCC_CUDA_H
31 #include <stdlib.h>
33 #define CUDA_VERSION 8000
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
39 typedef void *CUcontext;
40 typedef int CUdevice;
41 #if defined(__LP64__) || defined(_WIN64)
42 typedef unsigned long long CUdeviceptr;
43 #else
44 typedef unsigned CUdeviceptr;
45 #endif
46 typedef void *CUevent;
47 typedef void *CUfunction;
48 typedef void *CUlinkState;
49 typedef void *CUmodule;
50 typedef void *CUarray;
51 typedef size_t (*CUoccupancyB2DSize)(int);
52 typedef void *CUstream;
54 typedef enum {
55 CUDA_SUCCESS = 0,
56 CUDA_ERROR_INVALID_VALUE = 1,
57 CUDA_ERROR_OUT_OF_MEMORY = 2,
58 CUDA_ERROR_NOT_INITIALIZED = 3,
59 CUDA_ERROR_DEINITIALIZED = 4,
60 CUDA_ERROR_NO_DEVICE = 100,
61 CUDA_ERROR_INVALID_CONTEXT = 201,
62 CUDA_ERROR_INVALID_HANDLE = 400,
63 CUDA_ERROR_NOT_FOUND = 500,
64 CUDA_ERROR_NOT_READY = 600,
65 CUDA_ERROR_LAUNCH_FAILED = 719,
66 CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720,
67 CUDA_ERROR_NOT_PERMITTED = 800,
68 CUDA_ERROR_NOT_SUPPORTED = 801,
69 CUDA_ERROR_UNKNOWN = 999
70 } CUresult;
72 typedef enum {
73 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
74 CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
75 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
76 CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
77 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
78 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
79 CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
80 CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
81 CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
82 CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
83 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
84 CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
85 CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
86 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
87 } CUdevice_attribute;
89 enum {
90 CU_EVENT_DEFAULT = 0,
91 CU_EVENT_DISABLE_TIMING = 2
94 typedef enum {
95 CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
96 CU_FUNC_ATTRIBUTE_NUM_REGS = 4
97 } CUfunction_attribute;
99 typedef enum {
100 CU_JIT_WALL_TIME = 2,
101 CU_JIT_INFO_LOG_BUFFER = 3,
102 CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4,
103 CU_JIT_ERROR_LOG_BUFFER = 5,
104 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6,
105 CU_JIT_OPTIMIZATION_LEVEL = 7,
106 CU_JIT_GENERATE_DEBUG_INFO = 11,
107 CU_JIT_LOG_VERBOSE = 12,
108 CU_JIT_GENERATE_LINE_INFO = 13,
109 } CUjit_option;
111 typedef enum {
112 CU_JIT_INPUT_PTX = 1
113 } CUjitInputType;
115 enum {
116 CU_CTX_SCHED_AUTO = 0
119 #define CU_LAUNCH_PARAM_END ((void *) 0)
120 #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void *) 1)
121 #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void *) 2)
122 #define CU_MEMHOSTALLOC_DEVICEMAP 0x02U
124 enum {
125 CU_STREAM_DEFAULT = 0,
126 CU_STREAM_NON_BLOCKING = 1
129 typedef enum {
130 CU_LIMIT_STACK_SIZE = 0x00,
131 CU_LIMIT_MALLOC_HEAP_SIZE = 0x02,
132 } CUlimit;
134 typedef enum {
135 CU_MEMORYTYPE_HOST = 0x01,
136 CU_MEMORYTYPE_DEVICE = 0x02,
137 CU_MEMORYTYPE_ARRAY = 0x03,
138 CU_MEMORYTYPE_UNIFIED = 0x04
139 } CUmemorytype;
141 typedef struct {
142 size_t srcXInBytes, srcY;
143 CUmemorytype srcMemoryType;
144 const void *srcHost;
145 CUdeviceptr srcDevice;
146 CUarray srcArray;
147 size_t srcPitch;
149 size_t dstXInBytes, dstY;
150 CUmemorytype dstMemoryType;
151 void *dstHost;
152 CUdeviceptr dstDevice;
153 CUarray dstArray;
154 size_t dstPitch;
156 size_t WidthInBytes, Height;
157 } CUDA_MEMCPY2D;
159 typedef struct {
160 size_t srcXInBytes, srcY, srcZ;
161 size_t srcLOD;
162 CUmemorytype srcMemoryType;
163 const void *srcHost;
164 CUdeviceptr srcDevice;
165 CUarray srcArray;
166 void *reserved0;
167 size_t srcPitch, srcHeight;
169 size_t dstXInBytes, dstY, dstZ;
170 size_t dstLOD;
171 CUmemorytype dstMemoryType;
172 void *dstHost;
173 CUdeviceptr dstDevice;
174 CUarray dstArray;
175 void *reserved1;
176 size_t dstPitch, dstHeight;
178 size_t WidthInBytes, Height, Depth;
179 } CUDA_MEMCPY3D;
181 typedef struct {
182 size_t srcXInBytes, srcY, srcZ;
183 size_t srcLOD;
184 CUmemorytype srcMemoryType;
185 const void *srcHost;
186 CUdeviceptr srcDevice;
187 CUarray srcArray;
188 CUcontext srcContext;
189 size_t srcPitch, srcHeight;
191 size_t dstXInBytes, dstY, dstZ;
192 size_t dstLOD;
193 CUmemorytype dstMemoryType;
194 void *dstHost;
195 CUdeviceptr dstDevice;
196 CUarray dstArray;
197 CUcontext dstContext;
198 size_t dstPitch, dstHeight;
200 size_t WidthInBytes, Height, Depth;
201 } CUDA_MEMCPY3D_PEER;
203 #define cuCtxCreate cuCtxCreate_v2
204 CUresult cuCtxCreate (CUcontext *, unsigned, CUdevice);
205 #define cuCtxDestroy cuCtxDestroy_v2
206 CUresult cuCtxDestroy (CUcontext);
207 CUresult cuCtxGetCurrent (CUcontext *);
208 CUresult cuCtxGetDevice (CUdevice *);
209 #define cuCtxPopCurrent cuCtxPopCurrent_v2
210 CUresult cuCtxPopCurrent (CUcontext *);
211 #define cuCtxPushCurrent cuCtxPushCurrent_v2
212 CUresult cuCtxPushCurrent (CUcontext);
213 CUresult cuCtxSynchronize (void);
214 CUresult cuCtxSetLimit (CUlimit, size_t);
215 CUresult cuDeviceGet (CUdevice *, int);
216 #define cuDeviceTotalMem cuDeviceTotalMem_v2
217 CUresult cuDeviceTotalMem (size_t *, CUdevice);
218 CUresult cuDeviceGetAttribute (int *, CUdevice_attribute, CUdevice);
219 CUresult cuDeviceGetCount (int *);
220 CUresult cuDeviceGetName (char *, int, CUdevice);
221 CUresult cuEventCreate (CUevent *, unsigned);
222 #define cuEventDestroy cuEventDestroy_v2
223 CUresult cuEventDestroy (CUevent);
224 CUresult cuEventElapsedTime (float *, CUevent, CUevent);
225 CUresult cuEventQuery (CUevent);
226 CUresult cuEventRecord (CUevent, CUstream);
227 CUresult cuEventSynchronize (CUevent);
228 CUresult cuFuncGetAttribute (int *, CUfunction_attribute, CUfunction);
229 CUresult cuGetErrorString (CUresult, const char **);
230 CUresult cuGetErrorName (CUresult, const char **);
231 CUresult cuInit (unsigned);
232 CUresult cuDriverGetVersion (int *);
233 CUresult cuLaunchKernel (CUfunction, unsigned, unsigned, unsigned, unsigned,
234 unsigned, unsigned, unsigned, CUstream, void **, void **);
235 #define cuLinkAddData cuLinkAddData_v2
236 CUresult cuLinkAddData (CUlinkState, CUjitInputType, void *, size_t, const char *,
237 unsigned, CUjit_option *, void **);
238 CUresult cuLinkComplete (CUlinkState, void **, size_t *);
239 #define cuLinkCreate cuLinkCreate_v2
240 CUresult cuLinkCreate (unsigned, CUjit_option *, void **, CUlinkState *);
241 CUresult cuLinkDestroy (CUlinkState);
242 #define cuMemGetInfo cuMemGetInfo_v2
243 CUresult cuMemGetInfo (size_t *, size_t *);
244 #define cuMemAlloc cuMemAlloc_v2
245 CUresult cuMemAlloc (CUdeviceptr *, size_t);
246 #define cuMemAllocHost cuMemAllocHost_v2
247 CUresult cuMemAllocHost (void **, size_t);
248 CUresult cuMemHostAlloc (void **, size_t, unsigned int);
249 CUresult cuMemcpy (CUdeviceptr, CUdeviceptr, size_t);
250 CUresult cuMemcpyPeer (CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, size_t);
251 CUresult cuMemcpyPeerAsync (CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, size_t, CUstream);
252 #define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
253 CUresult cuMemcpyDtoDAsync (CUdeviceptr, CUdeviceptr, size_t, CUstream);
254 #define cuMemcpyDtoH cuMemcpyDtoH_v2
255 CUresult cuMemcpyDtoH (void *, CUdeviceptr, size_t);
256 #define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
257 CUresult cuMemcpyDtoHAsync (void *, CUdeviceptr, size_t, CUstream);
258 #define cuMemcpyHtoD cuMemcpyHtoD_v2
259 CUresult cuMemcpyHtoD (CUdeviceptr, const void *, size_t);
260 #define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
261 CUresult cuMemcpyHtoDAsync (CUdeviceptr, const void *, size_t, CUstream);
262 #define cuMemcpy2D cuMemcpy2D_v2
263 CUresult cuMemcpy2D (const CUDA_MEMCPY2D *);
264 #define cuMemcpy2DAsync cuMemcpy2DAsync_v2
265 CUresult cuMemcpy2DAsync (const CUDA_MEMCPY2D *, CUstream);
266 #define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2
267 CUresult cuMemcpy2DUnaligned (const CUDA_MEMCPY2D *);
268 #define cuMemcpy3D cuMemcpy3D_v2
269 CUresult cuMemcpy3D (const CUDA_MEMCPY3D *);
270 #define cuMemcpy3DAsync cuMemcpy3DAsync_v2
271 CUresult cuMemcpy3DAsync (const CUDA_MEMCPY3D *, CUstream);
272 CUresult cuMemcpy3DPeer (const CUDA_MEMCPY3D_PEER *);
273 CUresult cuMemcpy3DPeerAsync (const CUDA_MEMCPY3D_PEER *, CUstream);
274 #define cuMemFree cuMemFree_v2
275 CUresult cuMemFree (CUdeviceptr);
276 CUresult cuMemFreeHost (void *);
277 #define cuMemGetAddressRange cuMemGetAddressRange_v2
278 CUresult cuMemGetAddressRange (CUdeviceptr *, size_t *, CUdeviceptr);
279 #define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v2
280 CUresult cuMemHostGetDevicePointer (CUdeviceptr *, void *, unsigned);
281 CUresult cuModuleGetFunction (CUfunction *, CUmodule, const char *);
282 #define cuModuleGetGlobal cuModuleGetGlobal_v2
283 CUresult cuModuleGetGlobal (CUdeviceptr *, size_t *, CUmodule, const char *);
284 CUresult cuModuleLoad (CUmodule *, const char *);
285 CUresult cuModuleLoadData (CUmodule *, const void *);
286 CUresult cuModuleUnload (CUmodule);
287 CUresult cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction,
288 CUoccupancyB2DSize, size_t, int);
289 typedef void (*CUstreamCallback)(CUstream, CUresult, void *);
290 CUresult cuStreamAddCallback(CUstream, CUstreamCallback, void *, unsigned int);
291 CUresult cuStreamCreate (CUstream *, unsigned);
292 #define cuStreamDestroy cuStreamDestroy_v2
293 CUresult cuStreamDestroy (CUstream);
294 CUresult cuStreamQuery (CUstream);
295 CUresult cuStreamSynchronize (CUstream);
296 CUresult cuStreamWaitEvent (CUstream, CUevent, unsigned);
298 #ifdef __cplusplus
300 #endif
302 #endif /* GCC_CUDA_H */