1 //===--- cuda/dynamic_cuda/cuda.h --------------------------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The parts of the cuda api that are presently in use by the openmp cuda plugin
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef DYNAMIC_CUDA_CUDA_H_INCLUDED
14 #define DYNAMIC_CUDA_CUDA_H_INCLUDED
15 
16 #include <cstddef>
17 #include <cstdint>
18 
19 typedef int CUdevice;
20 typedef uintptr_t CUdeviceptr;
21 typedef struct CUmod_st *CUmodule;
22 typedef struct CUctx_st *CUcontext;
23 typedef struct CUfunc_st *CUfunction;
24 typedef struct CUstream_st *CUstream;
25 typedef struct CUevent_st *CUevent;
26 
27 typedef enum cudaError_enum {
28   CUDA_SUCCESS = 0,
29   CUDA_ERROR_INVALID_VALUE = 1,
30   CUDA_ERROR_NO_DEVICE = 100,
31   CUDA_ERROR_INVALID_HANDLE = 400,
32 } CUresult;
33 
34 typedef enum CUstream_flags_enum {
35   CU_STREAM_DEFAULT = 0x0,
36   CU_STREAM_NON_BLOCKING = 0x1,
37 } CUstream_flags;
38 
39 typedef enum CUlimit_enum {
40   CU_LIMIT_STACK_SIZE = 0x0,
41   CU_LIMIT_PRINTF_FIFO_SIZE = 0x1,
42   CU_LIMIT_MALLOC_HEAP_SIZE = 0x2,
43   CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x3,
44   CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x4,
45   CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x5,
46   CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x6,
47   CU_LIMIT_MAX
48 } CUlimit;
49 
50 typedef enum CUdevice_attribute_enum {
51   CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
52   CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
53   CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
54   CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
55   CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
56   CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
57   CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
58   CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
59   CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
60   CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
61   CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
62   CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
63   CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
64   CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
65   CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
66   CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
67   CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
68   CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
69   CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
70   CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
71   CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
72   CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
73   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
74   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
75   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
76   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
77   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
78   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
79   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
80   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
81   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
82   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
83   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
84   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
85   CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
86   CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
87   CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
88   CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
89   CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
90   CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
91   CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
92   CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
93   CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,
94   CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
95   CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
96   CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
97   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
98   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
99   CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,
100   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
101   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
102   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
103   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
104   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
105   CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
106   CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
107   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
108   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
109   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
110   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,
111   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,
112   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,
113   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,
114   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,
115   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,
116   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
117   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
118   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
119   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
120   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
121   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
122   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
123   CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
124   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
125   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
126   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
127   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
128   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
129   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
130   CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
131   CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
132   CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
133   CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,
134   CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,
135   CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,
136   CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
137   CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
138   CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
139   CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
140   CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
141   CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,
142   CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,
143   CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,
144   CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,
145   CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,
146   CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
147   CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,
148   CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,
149   CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,
150   CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,
151   CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,
152   CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97,
153   CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98,
154   CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99,
155   CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100,
156   CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101,
157   CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102,
158   CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102,
159   CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
160   CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
161   CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
162   CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106,
163   CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107,
164   CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108,
165   CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109,
166   CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110,
167   CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111,
168   CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112,
169   CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113,
170   CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114,
171   CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
172   CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116,
173   CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117,
174   CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118,
175   CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119,
176   CU_DEVICE_ATTRIBUTE_MAX,
177 } CUdevice_attribute;
178 
179 typedef enum CUfunction_attribute_enum {
180   CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
181 } CUfunction_attribute;
182 
183 typedef enum CUctx_flags_enum {
184   CU_CTX_SCHED_BLOCKING_SYNC = 0x04,
185   CU_CTX_SCHED_MASK = 0x07,
186 } CUctx_flags;
187 
188 typedef enum CUmemAttach_flags_enum {
189   CU_MEM_ATTACH_GLOBAL = 0x1,
190   CU_MEM_ATTACH_HOST = 0x2,
191   CU_MEM_ATTACH_SINGLE = 0x4,
192 } CUmemAttach_flags;
193 
194 typedef enum CUcomputeMode_enum {
195   CU_COMPUTEMODE_DEFAULT = 0,
196   CU_COMPUTEMODE_PROHIBITED = 2,
197   CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3,
198 } CUcompute_mode;
199 
200 typedef enum CUevent_flags_enum {
201   CU_EVENT_DEFAULT = 0x0,
202   CU_EVENT_BLOCKING_SYNC = 0x1,
203   CU_EVENT_DISABLE_TIMING = 0x2,
204   CU_EVENT_INTERPROCESS = 0x4
205 } CUevent_flags;
206 
207 CUresult cuCtxGetDevice(CUdevice *);
208 CUresult cuDeviceGet(CUdevice *, int);
209 CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
210 CUresult cuDeviceGetCount(int *);
211 CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction);
212 
213 // Device info
214 CUresult cuDeviceGetName(char *, int, CUdevice);
215 CUresult cuDeviceTotalMem(size_t *, CUdevice);
216 CUresult cuDriverGetVersion(int *);
217 
218 CUresult cuGetErrorString(CUresult, const char **);
219 CUresult cuInit(unsigned);
220 CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,
221                         unsigned, unsigned, unsigned, CUstream, void **,
222                         void **);
223 
224 CUresult cuMemAlloc(CUdeviceptr *, size_t);
225 CUresult cuMemAllocHost(void **, size_t);
226 CUresult cuMemAllocManaged(CUdeviceptr *, size_t, unsigned int);
227 
228 CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream);
229 CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t);
230 CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream);
231 CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t);
232 CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream);
233 
234 CUresult cuMemFree(CUdeviceptr);
235 CUresult cuMemFreeHost(void *);
236 
237 CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *);
238 CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *);
239 
240 CUresult cuModuleUnload(CUmodule);
241 CUresult cuStreamCreate(CUstream *, unsigned);
242 CUresult cuStreamDestroy(CUstream);
243 CUresult cuStreamSynchronize(CUstream);
244 CUresult cuCtxSetCurrent(CUcontext);
245 CUresult cuDevicePrimaryCtxRelease(CUdevice);
246 CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *);
247 CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned);
248 CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice);
249 CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *,
250                             void **);
251 
252 CUresult cuDeviceCanAccessPeer(int *, CUdevice, CUdevice);
253 CUresult cuCtxEnablePeerAccess(CUcontext, unsigned);
254 CUresult cuMemcpyPeerAsync(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext,
255                            size_t, CUstream);
256 
257 CUresult cuCtxGetLimit(size_t *, CUlimit);
258 CUresult cuCtxSetLimit(CUlimit, size_t);
259 
260 CUresult cuEventCreate(CUevent *, unsigned int);
261 CUresult cuEventRecord(CUevent, CUstream);
262 CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int);
263 CUresult cuEventSynchronize(CUevent);
264 CUresult cuEventDestroy(CUevent);
265 
266 #endif
267