1 //===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a pass that serializes a gpu module into CUBIN blob and 10 // adds that blob as a string attribute of the module. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Dialect/GPU/Passes.h" 14 15 #if MLIR_GPU_TO_CUBIN_PASS_ENABLE 16 #include "mlir/Pass/Pass.h" 17 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" 18 #include "mlir/Target/LLVMIR/Export.h" 19 #include "llvm/Support/TargetSelect.h" 20 21 #include <cuda.h> 22 23 using namespace mlir; 24 25 static void emitCudaError(const llvm::Twine &expr, const char *buffer, 26 CUresult result, Location loc) { 27 const char *error; 28 cuGetErrorString(result, &error); 29 emitError(loc, expr.concat(" failed with error code ") 30 .concat(llvm::Twine{error}) 31 .concat("[") 32 .concat(buffer) 33 .concat("]")); 34 } 35 36 #define RETURN_ON_CUDA_ERROR(expr) \ 37 do { \ 38 if (auto status = (expr)) { \ 39 emitCudaError(#expr, jitErrorBuffer, status, loc); \ 40 return {}; \ 41 } \ 42 } while (false) 43 44 namespace { 45 class SerializeToCubinPass 46 : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> { 47 public: 48 SerializeToCubinPass(); 49 50 private: 51 void getDependentDialects(DialectRegistry ®istry) const override; 52 53 // Serializes PTX to CUBIN. 54 std::unique_ptr<std::vector<char>> 55 serializeISA(const std::string &isa) override; 56 }; 57 } // namespace 58 59 // Sets the 'option' to 'value' unless it already has a value. 60 static void maybeSetOption(Pass::Option<std::string> &option, 61 const char *value) { 62 if (!option.hasValue()) 63 option = value; 64 } 65 66 SerializeToCubinPass::SerializeToCubinPass() { 67 maybeSetOption(this->triple, "nvptx64-nvidia-cuda"); 68 maybeSetOption(this->chip, "sm_35"); 69 maybeSetOption(this->features, "+ptx60"); 70 } 71 72 void SerializeToCubinPass::getDependentDialects( 73 DialectRegistry ®istry) const { 74 registerNVVMDialectTranslation(registry); 75 gpu::SerializeToBlobPass::getDependentDialects(registry); 76 } 77 78 std::unique_ptr<std::vector<char>> 79 SerializeToCubinPass::serializeISA(const std::string &isa) { 80 Location loc = getOperation().getLoc(); 81 char jitErrorBuffer[4096] = {0}; 82 83 RETURN_ON_CUDA_ERROR(cuInit(0)); 84 85 // Linking requires a device context. 86 CUdevice device; 87 RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0)); 88 CUcontext context; 89 RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device)); 90 CUlinkState linkState; 91 92 CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER, 93 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; 94 void *jitOptionsVals[] = {jitErrorBuffer, 95 reinterpret_cast<void *>(sizeof(jitErrorBuffer))}; 96 97 RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */ 98 jitOptions, /* jit options */ 99 jitOptionsVals, /* jit option values */ 100 &linkState)); 101 102 auto kernelName = getOperation().getName().str(); 103 RETURN_ON_CUDA_ERROR(cuLinkAddData( 104 linkState, CUjitInputType::CU_JIT_INPUT_PTX, 105 const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(), 106 kernelName.c_str(), 0, /* number of jit options */ 107 nullptr, /* jit options */ 108 nullptr /* jit option values */ 109 )); 110 111 void *cubinData; 112 size_t cubinSize; 113 RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize)); 114 115 char *cubinAsChar = static_cast<char *>(cubinData); 116 auto result = 117 std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize); 118 119 // This will also destroy the cubin data. 120 RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState)); 121 RETURN_ON_CUDA_ERROR(cuCtxDestroy(context)); 122 123 return result; 124 } 125 126 // Register pass to serialize GPU kernel functions to a CUBIN binary annotation. 127 void mlir::registerGpuSerializeToCubinPass() { 128 PassRegistration<SerializeToCubinPass> registerSerializeToCubin( 129 "gpu-to-cubin", "Lower GPU kernel function to CUBIN binary annotations", 130 [] { 131 // Initialize LLVM NVPTX backend. 132 LLVMInitializeNVPTXTarget(); 133 LLVMInitializeNVPTXTargetInfo(); 134 LLVMInitializeNVPTXTargetMC(); 135 LLVMInitializeNVPTXAsmPrinter(); 136 137 return std::make_unique<SerializeToCubinPass>(); 138 }); 139 } 140 #else // MLIR_GPU_TO_CUBIN_PASS_ENABLE 141 void mlir::registerGpuSerializeToCubinPass() {} 142 #endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE 143