1 //===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implement subset of cuda api by calling into cuda library via dlopen
10 // Does the dlopen/dlsym calls as part of the call to cuInit
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "cuda.h"
15 #include "Debug.h"
16 #include "dlwrap.h"
17 
18 #include <string>
19 #include <unordered_map>
20 
21 #include <dlfcn.h>
22 
23 DLWRAP_INITIALIZE();
24 
25 DLWRAP_INTERNAL(cuInit, 1);
26 
27 DLWRAP(cuCtxGetDevice, 1);
28 DLWRAP(cuDeviceGet, 2);
29 DLWRAP(cuDeviceGetAttribute, 3);
30 DLWRAP(cuDeviceGetCount, 1);
31 DLWRAP(cuFuncGetAttribute, 3);
32 
33 // Device info
34 DLWRAP(cuDeviceGetName, 3);
35 DLWRAP(cuDeviceTotalMem, 2);
36 DLWRAP(cuDriverGetVersion, 1);
37 
38 DLWRAP(cuGetErrorString, 2);
39 DLWRAP(cuLaunchKernel, 11);
40 
41 DLWRAP(cuMemAlloc, 2);
42 DLWRAP(cuMemAllocHost, 2);
43 DLWRAP(cuMemAllocManaged, 3);
44 
45 DLWRAP(cuMemcpyDtoDAsync, 4);
46 DLWRAP(cuMemcpyDtoH, 3);
47 DLWRAP(cuMemcpyDtoHAsync, 4);
48 DLWRAP(cuMemcpyHtoD, 3);
49 DLWRAP(cuMemcpyHtoDAsync, 4);
50 
51 DLWRAP(cuMemFree, 1);
52 DLWRAP(cuMemFreeHost, 1);
53 DLWRAP(cuModuleGetFunction, 3);
54 DLWRAP(cuModuleGetGlobal, 4);
55 
56 DLWRAP(cuModuleUnload, 1);
57 DLWRAP(cuStreamCreate, 2);
58 DLWRAP(cuStreamDestroy, 1);
59 DLWRAP(cuStreamSynchronize, 1);
60 DLWRAP(cuCtxSetCurrent, 1);
61 DLWRAP(cuDevicePrimaryCtxRelease, 1);
62 DLWRAP(cuDevicePrimaryCtxGetState, 3);
63 DLWRAP(cuDevicePrimaryCtxSetFlags, 2);
64 DLWRAP(cuDevicePrimaryCtxRetain, 2);
65 DLWRAP(cuModuleLoadDataEx, 5);
66 
67 DLWRAP(cuDeviceCanAccessPeer, 3);
68 DLWRAP(cuCtxEnablePeerAccess, 2);
69 DLWRAP(cuMemcpyPeerAsync, 6);
70 
71 DLWRAP(cuCtxGetLimit, 2);
72 DLWRAP(cuCtxSetLimit, 2);
73 
74 DLWRAP(cuEventCreate, 2);
75 DLWRAP(cuEventRecord, 2);
76 DLWRAP(cuStreamWaitEvent, 3);
77 DLWRAP(cuEventSynchronize, 1);
78 DLWRAP(cuEventDestroy, 1);
79 
80 DLWRAP_FINALIZE();
81 
82 #ifndef DYNAMIC_CUDA_PATH
83 #define DYNAMIC_CUDA_PATH "libcuda.so"
84 #endif
85 
86 #define TARGET_NAME CUDA
87 #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
88 
checkForCUDA()89 static bool checkForCUDA() {
90   // return true if dlopen succeeded and all functions found
91 
92   // Prefer _v2 versions of functions if found in the library
93   std::unordered_map<std::string, const char *> TryFirst = {
94       {"cuMemAlloc", "cuMemAlloc_v2"},
95       {"cuMemFree", "cuMemFree_v2"},
96       {"cuMemcpyDtoH", "cuMemcpyDtoH_v2"},
97       {"cuMemcpyHtoD", "cuMemcpyHtoD_v2"},
98       {"cuStreamDestroy", "cuStreamDestroy_v2"},
99       {"cuModuleGetGlobal", "cuModuleGetGlobal_v2"},
100       {"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"},
101       {"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"},
102       {"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"},
103       {"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"},
104       {"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"},
105   };
106 
107   const char *CudaLib = DYNAMIC_CUDA_PATH;
108   void *DynlibHandle = dlopen(CudaLib, RTLD_NOW);
109   if (!DynlibHandle) {
110     DP("Unable to load library '%s': %s!\n", CudaLib, dlerror());
111     return false;
112   }
113 
114   for (size_t I = 0; I < dlwrap::size(); I++) {
115     const char *Sym = dlwrap::symbol(I);
116 
117     auto It = TryFirst.find(Sym);
118     if (It != TryFirst.end()) {
119       const char *First = It->second;
120       void *P = dlsym(DynlibHandle, First);
121       if (P) {
122         DP("Implementing %s with dlsym(%s) -> %p\n", Sym, First, P);
123         *dlwrap::pointer(I) = P;
124         continue;
125       }
126     }
127 
128     void *P = dlsym(DynlibHandle, Sym);
129     if (P == nullptr) {
130       DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib);
131       return false;
132     }
133     DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P);
134 
135     *dlwrap::pointer(I) = P;
136   }
137 
138   return true;
139 }
140 
cuInit(unsigned X)141 CUresult cuInit(unsigned X) {
142   // Note: Called exactly once from cuda rtl.cpp in a global constructor so
143   // does not need to handle being called repeatedly or concurrently
144   if (!checkForCUDA()) {
145     return CUDA_ERROR_INVALID_HANDLE;
146   }
147   return dlwrap_cuInit(X);
148 }
149