1 //===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implement subset of cuda api by calling into cuda library via dlopen 10 // Does the dlopen/dlsym calls as part of the call to cuInit 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "cuda.h" 15 #include "Debug.h" 16 #include "dlwrap.h" 17 18 #include <string> 19 #include <unordered_map> 20 21 #include <dlfcn.h> 22 23 DLWRAP_INITIALIZE(); 24 25 DLWRAP_INTERNAL(cuInit, 1); 26 27 DLWRAP(cuCtxGetDevice, 1); 28 DLWRAP(cuDeviceGet, 2); 29 DLWRAP(cuDeviceGetAttribute, 3); 30 DLWRAP(cuDeviceGetCount, 1); 31 DLWRAP(cuFuncGetAttribute, 3); 32 33 // Device info 34 DLWRAP(cuDeviceGetName, 3); 35 DLWRAP(cuDeviceTotalMem, 2); 36 DLWRAP(cuDriverGetVersion, 1); 37 38 DLWRAP(cuGetErrorString, 2); 39 DLWRAP(cuLaunchKernel, 11); 40 41 DLWRAP(cuMemAlloc, 2); 42 DLWRAP(cuMemAllocHost, 2); 43 DLWRAP(cuMemAllocManaged, 3); 44 45 DLWRAP(cuMemcpyDtoDAsync, 4); 46 DLWRAP(cuMemcpyDtoH, 3); 47 DLWRAP(cuMemcpyDtoHAsync, 4); 48 DLWRAP(cuMemcpyHtoD, 3); 49 DLWRAP(cuMemcpyHtoDAsync, 4); 50 51 DLWRAP(cuMemFree, 1); 52 DLWRAP(cuMemFreeHost, 1); 53 DLWRAP(cuModuleGetFunction, 3); 54 DLWRAP(cuModuleGetGlobal, 4); 55 56 DLWRAP(cuModuleUnload, 1); 57 DLWRAP(cuStreamCreate, 2); 58 DLWRAP(cuStreamDestroy, 1); 59 DLWRAP(cuStreamSynchronize, 1); 60 DLWRAP(cuCtxSetCurrent, 1); 61 DLWRAP(cuDevicePrimaryCtxRelease, 1); 62 DLWRAP(cuDevicePrimaryCtxGetState, 3); 63 DLWRAP(cuDevicePrimaryCtxSetFlags, 2); 64 DLWRAP(cuDevicePrimaryCtxRetain, 2); 65 DLWRAP(cuModuleLoadDataEx, 5); 66 67 DLWRAP(cuDeviceCanAccessPeer, 3); 68 DLWRAP(cuCtxEnablePeerAccess, 2); 69 DLWRAP(cuMemcpyPeerAsync, 6); 70 71 DLWRAP(cuCtxGetLimit, 2); 72 DLWRAP(cuCtxSetLimit, 2); 73 74 DLWRAP(cuEventCreate, 2); 75 DLWRAP(cuEventRecord, 2); 76 DLWRAP(cuStreamWaitEvent, 3); 77 DLWRAP(cuEventSynchronize, 1); 78 DLWRAP(cuEventDestroy, 1); 79 80 DLWRAP_FINALIZE(); 81 82 #ifndef DYNAMIC_CUDA_PATH 83 #define DYNAMIC_CUDA_PATH "libcuda.so" 84 #endif 85 86 #define TARGET_NAME CUDA 87 #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL" 88 89 static bool checkForCUDA() { 90 // return true if dlopen succeeded and all functions found 91 92 // Prefer _v2 versions of functions if found in the library 93 std::unordered_map<std::string, const char *> TryFirst = { 94 {"cuMemAlloc", "cuMemAlloc_v2"}, 95 {"cuMemFree", "cuMemFree_v2"}, 96 {"cuMemcpyDtoH", "cuMemcpyDtoH_v2"}, 97 {"cuMemcpyHtoD", "cuMemcpyHtoD_v2"}, 98 {"cuStreamDestroy", "cuStreamDestroy_v2"}, 99 {"cuModuleGetGlobal", "cuModuleGetGlobal_v2"}, 100 {"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"}, 101 {"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"}, 102 {"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"}, 103 {"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"}, 104 {"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"}, 105 }; 106 107 const char *CudaLib = DYNAMIC_CUDA_PATH; 108 void *DynlibHandle = dlopen(CudaLib, RTLD_NOW); 109 if (!DynlibHandle) { 110 DP("Unable to load library '%s': %s!\n", CudaLib, dlerror()); 111 return false; 112 } 113 114 for (size_t I = 0; I < dlwrap::size(); I++) { 115 const char *Sym = dlwrap::symbol(I); 116 117 auto It = TryFirst.find(Sym); 118 if (It != TryFirst.end()) { 119 const char *First = It->second; 120 void *P = dlsym(DynlibHandle, First); 121 if (P) { 122 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, First, P); 123 *dlwrap::pointer(I) = P; 124 continue; 125 } 126 } 127 128 void *P = dlsym(DynlibHandle, Sym); 129 if (P == nullptr) { 130 DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib); 131 return false; 132 } 133 DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P); 134 135 *dlwrap::pointer(I) = P; 136 } 137 138 return true; 139 } 140 141 CUresult cuInit(unsigned X) { 142 // Note: Called exactly once from cuda rtl.cpp in a global constructor so 143 // does not need to handle being called repeatedly or concurrently 144 if (!checkForCUDA()) { 145 return CUDA_ERROR_INVALID_HANDLE; 146 } 147 return dlwrap_cuInit(X); 148 } 149