1 //===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a pass that serializes a gpu module into CUBIN blob and 10 // adds that blob as a string attribute of the module. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Dialect/GPU/Passes.h" 14 15 #if MLIR_GPU_TO_CUBIN_PASS_ENABLE 16 #include "mlir/Pass/Pass.h" 17 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" 18 #include "mlir/Target/LLVMIR/Export.h" 19 #include "llvm/Support/TargetSelect.h" 20 21 #include <cuda.h> 22 23 using namespace mlir; 24 25 static void emitCudaError(const llvm::Twine &expr, const char *buffer, 26 CUresult result, Location loc) { 27 const char *error; 28 cuGetErrorString(result, &error); 29 emitError(loc, expr.concat(" failed with error code ") 30 .concat(llvm::Twine{error}) 31 .concat("[") 32 .concat(buffer) 33 .concat("]")); 34 } 35 36 #define RETURN_ON_CUDA_ERROR(expr) \ 37 do { \ 38 if (auto status = (expr)) { \ 39 emitCudaError(#expr, jitErrorBuffer, status, loc); \ 40 return {}; \ 41 } \ 42 } while (false) 43 44 namespace { 45 class SerializeToCubinPass 46 : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> { 47 public: 48 SerializeToCubinPass(); 49 50 StringRef getArgument() const override { return "gpu-to-cubin"; } 51 StringRef getDescription() const override { 52 return "Lower GPU kernel function to CUBIN binary annotations"; 53 } 54 55 private: 56 void getDependentDialects(DialectRegistry ®istry) const override; 57 58 // Serializes PTX to CUBIN. 59 std::unique_ptr<std::vector<char>> 60 serializeISA(const std::string &isa) override; 61 }; 62 } // namespace 63 64 // Sets the 'option' to 'value' unless it already has a value. 65 static void maybeSetOption(Pass::Option<std::string> &option, 66 const char *value) { 67 if (!option.hasValue()) 68 option = value; 69 } 70 71 SerializeToCubinPass::SerializeToCubinPass() { 72 maybeSetOption(this->triple, "nvptx64-nvidia-cuda"); 73 maybeSetOption(this->chip, "sm_35"); 74 maybeSetOption(this->features, "+ptx60"); 75 } 76 77 void SerializeToCubinPass::getDependentDialects( 78 DialectRegistry ®istry) const { 79 registerNVVMDialectTranslation(registry); 80 gpu::SerializeToBlobPass::getDependentDialects(registry); 81 } 82 83 std::unique_ptr<std::vector<char>> 84 SerializeToCubinPass::serializeISA(const std::string &isa) { 85 Location loc = getOperation().getLoc(); 86 char jitErrorBuffer[4096] = {0}; 87 88 RETURN_ON_CUDA_ERROR(cuInit(0)); 89 90 // Linking requires a device context. 91 CUdevice device; 92 RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0)); 93 CUcontext context; 94 RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device)); 95 CUlinkState linkState; 96 97 CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER, 98 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; 99 void *jitOptionsVals[] = {jitErrorBuffer, 100 reinterpret_cast<void *>(sizeof(jitErrorBuffer))}; 101 102 RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */ 103 jitOptions, /* jit options */ 104 jitOptionsVals, /* jit option values */ 105 &linkState)); 106 107 auto kernelName = getOperation().getName().str(); 108 RETURN_ON_CUDA_ERROR(cuLinkAddData( 109 linkState, CUjitInputType::CU_JIT_INPUT_PTX, 110 const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(), 111 kernelName.c_str(), 0, /* number of jit options */ 112 nullptr, /* jit options */ 113 nullptr /* jit option values */ 114 )); 115 116 void *cubinData; 117 size_t cubinSize; 118 RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize)); 119 120 char *cubinAsChar = static_cast<char *>(cubinData); 121 auto result = 122 std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize); 123 124 // This will also destroy the cubin data. 125 RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState)); 126 RETURN_ON_CUDA_ERROR(cuCtxDestroy(context)); 127 128 return result; 129 } 130 131 // Register pass to serialize GPU kernel functions to a CUBIN binary annotation. 132 void mlir::registerGpuSerializeToCubinPass() { 133 PassRegistration<SerializeToCubinPass> registerSerializeToCubin( 134 [] { 135 // Initialize LLVM NVPTX backend. 136 LLVMInitializeNVPTXTarget(); 137 LLVMInitializeNVPTXTargetInfo(); 138 LLVMInitializeNVPTXTargetMC(); 139 LLVMInitializeNVPTXAsmPrinter(); 140 141 return std::make_unique<SerializeToCubinPass>(); 142 }); 143 } 144 #else // MLIR_GPU_TO_CUBIN_PASS_ENABLE 145 void mlir::registerGpuSerializeToCubinPass() {} 146 #endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE 147