1 //===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a pass that serializes a gpu module into CUBIN blob and 10 // adds that blob as a string attribute of the module. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Dialect/GPU/Passes.h" 14 15 #if MLIR_GPU_TO_CUBIN_PASS_ENABLE 16 #include "mlir/Pass/Pass.h" 17 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" 18 #include "mlir/Target/LLVMIR/Export.h" 19 #include "llvm/Support/TargetSelect.h" 20 21 #include <cuda.h> 22 23 using namespace mlir; 24 25 static void emitCudaError(const llvm::Twine &expr, const char *buffer, 26 CUresult result, Location loc) { 27 const char *error; 28 cuGetErrorString(result, &error); 29 emitError(loc, expr.concat(" failed with error code ") 30 .concat(llvm::Twine{error}) 31 .concat("[") 32 .concat(buffer) 33 .concat("]")); 34 } 35 36 #define RETURN_ON_CUDA_ERROR(expr) \ 37 do { \ 38 if (auto status = (expr)) { \ 39 emitCudaError(#expr, jitErrorBuffer, status, loc); \ 40 return {}; \ 41 } \ 42 } while (false) 43 44 namespace { 45 class SerializeToCubinPass 46 : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> { 47 public: 48 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass) 49 50 SerializeToCubinPass(); 51 52 StringRef getArgument() const override { return "gpu-to-cubin"; } 53 StringRef getDescription() const override { 54 return "Lower GPU kernel function to CUBIN binary annotations"; 55 } 56 57 private: 58 void getDependentDialects(DialectRegistry ®istry) const override; 59 60 // Serializes PTX to CUBIN. 61 std::unique_ptr<std::vector<char>> 62 serializeISA(const std::string &isa) override; 63 }; 64 } // namespace 65 66 // Sets the 'option' to 'value' unless it already has a value. 67 static void maybeSetOption(Pass::Option<std::string> &option, 68 const char *value) { 69 if (!option.hasValue()) 70 option = value; 71 } 72 73 SerializeToCubinPass::SerializeToCubinPass() { 74 maybeSetOption(this->triple, "nvptx64-nvidia-cuda"); 75 maybeSetOption(this->chip, "sm_35"); 76 maybeSetOption(this->features, "+ptx60"); 77 } 78 79 void SerializeToCubinPass::getDependentDialects( 80 DialectRegistry ®istry) const { 81 registerNVVMDialectTranslation(registry); 82 gpu::SerializeToBlobPass::getDependentDialects(registry); 83 } 84 85 std::unique_ptr<std::vector<char>> 86 SerializeToCubinPass::serializeISA(const std::string &isa) { 87 Location loc = getOperation().getLoc(); 88 char jitErrorBuffer[4096] = {0}; 89 90 RETURN_ON_CUDA_ERROR(cuInit(0)); 91 92 // Linking requires a device context. 93 CUdevice device; 94 RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0)); 95 CUcontext context; 96 RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device)); 97 CUlinkState linkState; 98 99 CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER, 100 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; 101 void *jitOptionsVals[] = {jitErrorBuffer, 102 reinterpret_cast<void *>(sizeof(jitErrorBuffer))}; 103 104 RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */ 105 jitOptions, /* jit options */ 106 jitOptionsVals, /* jit option values */ 107 &linkState)); 108 109 auto kernelName = getOperation().getName().str(); 110 RETURN_ON_CUDA_ERROR(cuLinkAddData( 111 linkState, CUjitInputType::CU_JIT_INPUT_PTX, 112 const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(), 113 kernelName.c_str(), 0, /* number of jit options */ 114 nullptr, /* jit options */ 115 nullptr /* jit option values */ 116 )); 117 118 void *cubinData; 119 size_t cubinSize; 120 RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize)); 121 122 char *cubinAsChar = static_cast<char *>(cubinData); 123 auto result = 124 std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize); 125 126 // This will also destroy the cubin data. 127 RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState)); 128 RETURN_ON_CUDA_ERROR(cuCtxDestroy(context)); 129 130 return result; 131 } 132 133 // Register pass to serialize GPU kernel functions to a CUBIN binary annotation. 134 void mlir::registerGpuSerializeToCubinPass() { 135 PassRegistration<SerializeToCubinPass> registerSerializeToCubin( 136 [] { 137 // Initialize LLVM NVPTX backend. 138 LLVMInitializeNVPTXTarget(); 139 LLVMInitializeNVPTXTargetInfo(); 140 LLVMInitializeNVPTXTargetMC(); 141 LLVMInitializeNVPTXAsmPrinter(); 142 143 return std::make_unique<SerializeToCubinPass>(); 144 }); 145 } 146 #else // MLIR_GPU_TO_CUBIN_PASS_ENABLE 147 void mlir::registerGpuSerializeToCubinPass() {} 148 #endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE 149