1 //===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a pass that serializes a gpu module into CUBIN blob and 10 // adds that blob as a string attribute of the module. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Dialect/GPU/Transforms/Passes.h" 15 16 #if MLIR_GPU_TO_CUBIN_PASS_ENABLE 17 #include "mlir/Pass/Pass.h" 18 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" 19 #include "mlir/Target/LLVMIR/Export.h" 20 #include "llvm/Support/TargetSelect.h" 21 22 #include <cuda.h> 23 24 using namespace mlir; 25 26 static void emitCudaError(const llvm::Twine &expr, const char *buffer, 27 CUresult result, Location loc) { 28 const char *error; 29 cuGetErrorString(result, &error); 30 emitError(loc, expr.concat(" failed with error code ") 31 .concat(llvm::Twine{error}) 32 .concat("[") 33 .concat(buffer) 34 .concat("]")); 35 } 36 37 #define RETURN_ON_CUDA_ERROR(expr) \ 38 do { \ 39 if (auto status = (expr)) { \ 40 emitCudaError(#expr, jitErrorBuffer, status, loc); \ 41 return {}; \ 42 } \ 43 } while (false) 44 45 namespace { 46 class SerializeToCubinPass 47 : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> { 48 public: 49 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass) 50 51 SerializeToCubinPass(); 52 53 StringRef getArgument() const override { return "gpu-to-cubin"; } 54 StringRef getDescription() const override { 55 return "Lower GPU kernel function to CUBIN binary annotations"; 56 } 57 58 private: 59 void getDependentDialects(DialectRegistry ®istry) const override; 60 61 // Serializes PTX to CUBIN. 62 std::unique_ptr<std::vector<char>> 63 serializeISA(const std::string &isa) override; 64 }; 65 } // namespace 66 67 // Sets the 'option' to 'value' unless it already has a value. 68 static void maybeSetOption(Pass::Option<std::string> &option, 69 const char *value) { 70 if (!option.hasValue()) 71 option = value; 72 } 73 74 SerializeToCubinPass::SerializeToCubinPass() { 75 maybeSetOption(this->triple, "nvptx64-nvidia-cuda"); 76 maybeSetOption(this->chip, "sm_35"); 77 maybeSetOption(this->features, "+ptx60"); 78 } 79 80 void SerializeToCubinPass::getDependentDialects( 81 DialectRegistry ®istry) const { 82 registerNVVMDialectTranslation(registry); 83 gpu::SerializeToBlobPass::getDependentDialects(registry); 84 } 85 86 std::unique_ptr<std::vector<char>> 87 SerializeToCubinPass::serializeISA(const std::string &isa) { 88 Location loc = getOperation().getLoc(); 89 char jitErrorBuffer[4096] = {0}; 90 91 RETURN_ON_CUDA_ERROR(cuInit(0)); 92 93 // Linking requires a device context. 94 CUdevice device; 95 RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0)); 96 CUcontext context; 97 RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device)); 98 CUlinkState linkState; 99 100 CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER, 101 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; 102 void *jitOptionsVals[] = {jitErrorBuffer, 103 reinterpret_cast<void *>(sizeof(jitErrorBuffer))}; 104 105 RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */ 106 jitOptions, /* jit options */ 107 jitOptionsVals, /* jit option values */ 108 &linkState)); 109 110 auto kernelName = getOperation().getName().str(); 111 RETURN_ON_CUDA_ERROR(cuLinkAddData( 112 linkState, CUjitInputType::CU_JIT_INPUT_PTX, 113 const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(), 114 kernelName.c_str(), 0, /* number of jit options */ 115 nullptr, /* jit options */ 116 nullptr /* jit option values */ 117 )); 118 119 void *cubinData; 120 size_t cubinSize; 121 RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize)); 122 123 char *cubinAsChar = static_cast<char *>(cubinData); 124 auto result = 125 std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize); 126 127 // This will also destroy the cubin data. 128 RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState)); 129 RETURN_ON_CUDA_ERROR(cuCtxDestroy(context)); 130 131 return result; 132 } 133 134 // Register pass to serialize GPU kernel functions to a CUBIN binary annotation. 135 void mlir::registerGpuSerializeToCubinPass() { 136 PassRegistration<SerializeToCubinPass> registerSerializeToCubin( 137 [] { 138 // Initialize LLVM NVPTX backend. 139 LLVMInitializeNVPTXTarget(); 140 LLVMInitializeNVPTXTargetInfo(); 141 LLVMInitializeNVPTXTargetMC(); 142 LLVMInitializeNVPTXAsmPrinter(); 143 144 return std::make_unique<SerializeToCubinPass>(); 145 }); 146 } 147 #else // MLIR_GPU_TO_CUBIN_PASS_ENABLE 148 void mlir::registerGpuSerializeToCubinPass() {} 149 #endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE 150