1 //===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass that serializes a gpu module into CUBIN blob and
10 // adds that blob as a string attribute of the module.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "mlir/Dialect/GPU/Transforms/Passes.h"
15
16 #if MLIR_GPU_TO_CUBIN_PASS_ENABLE
17 #include "mlir/Pass/Pass.h"
18 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
19 #include "mlir/Target/LLVMIR/Export.h"
20 #include "llvm/Support/TargetSelect.h"
21
22 #include <cuda.h>
23
24 using namespace mlir;
25
emitCudaError(const llvm::Twine & expr,const char * buffer,CUresult result,Location loc)26 static void emitCudaError(const llvm::Twine &expr, const char *buffer,
27 CUresult result, Location loc) {
28 const char *error;
29 cuGetErrorString(result, &error);
30 emitError(loc, expr.concat(" failed with error code ")
31 .concat(llvm::Twine{error})
32 .concat("[")
33 .concat(buffer)
34 .concat("]"));
35 }
36
37 #define RETURN_ON_CUDA_ERROR(expr) \
38 do { \
39 if (auto status = (expr)) { \
40 emitCudaError(#expr, jitErrorBuffer, status, loc); \
41 return {}; \
42 } \
43 } while (false)
44
45 namespace {
46 class SerializeToCubinPass
47 : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> {
48 public:
49 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass)
50
51 SerializeToCubinPass();
52
getArgument() const53 StringRef getArgument() const override { return "gpu-to-cubin"; }
getDescription() const54 StringRef getDescription() const override {
55 return "Lower GPU kernel function to CUBIN binary annotations";
56 }
57
58 private:
59 void getDependentDialects(DialectRegistry ®istry) const override;
60
61 // Serializes PTX to CUBIN.
62 std::unique_ptr<std::vector<char>>
63 serializeISA(const std::string &isa) override;
64 };
65 } // namespace
66
67 // Sets the 'option' to 'value' unless it already has a value.
maybeSetOption(Pass::Option<std::string> & option,const char * value)68 static void maybeSetOption(Pass::Option<std::string> &option,
69 const char *value) {
70 if (!option.hasValue())
71 option = value;
72 }
73
SerializeToCubinPass()74 SerializeToCubinPass::SerializeToCubinPass() {
75 maybeSetOption(this->triple, "nvptx64-nvidia-cuda");
76 maybeSetOption(this->chip, "sm_35");
77 maybeSetOption(this->features, "+ptx60");
78 }
79
getDependentDialects(DialectRegistry & registry) const80 void SerializeToCubinPass::getDependentDialects(
81 DialectRegistry ®istry) const {
82 registerNVVMDialectTranslation(registry);
83 gpu::SerializeToBlobPass::getDependentDialects(registry);
84 }
85
86 std::unique_ptr<std::vector<char>>
serializeISA(const std::string & isa)87 SerializeToCubinPass::serializeISA(const std::string &isa) {
88 Location loc = getOperation().getLoc();
89 char jitErrorBuffer[4096] = {0};
90
91 RETURN_ON_CUDA_ERROR(cuInit(0));
92
93 // Linking requires a device context.
94 CUdevice device;
95 RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
96 CUcontext context;
97 RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device));
98 CUlinkState linkState;
99
100 CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
101 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
102 void *jitOptionsVals[] = {jitErrorBuffer,
103 reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
104
105 RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */
106 jitOptions, /* jit options */
107 jitOptionsVals, /* jit option values */
108 &linkState));
109
110 auto kernelName = getOperation().getName().str();
111 RETURN_ON_CUDA_ERROR(cuLinkAddData(
112 linkState, CUjitInputType::CU_JIT_INPUT_PTX,
113 const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(),
114 kernelName.c_str(), 0, /* number of jit options */
115 nullptr, /* jit options */
116 nullptr /* jit option values */
117 ));
118
119 void *cubinData;
120 size_t cubinSize;
121 RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
122
123 char *cubinAsChar = static_cast<char *>(cubinData);
124 auto result =
125 std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
126
127 // This will also destroy the cubin data.
128 RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
129 RETURN_ON_CUDA_ERROR(cuCtxDestroy(context));
130
131 return result;
132 }
133
134 // Register pass to serialize GPU kernel functions to a CUBIN binary annotation.
registerGpuSerializeToCubinPass()135 void mlir::registerGpuSerializeToCubinPass() {
136 PassRegistration<SerializeToCubinPass> registerSerializeToCubin([] {
137 // Initialize LLVM NVPTX backend.
138 LLVMInitializeNVPTXTarget();
139 LLVMInitializeNVPTXTargetInfo();
140 LLVMInitializeNVPTXTargetMC();
141 LLVMInitializeNVPTXAsmPrinter();
142
143 return std::make_unique<SerializeToCubinPass>();
144 });
145 }
146 #else // MLIR_GPU_TO_CUBIN_PASS_ENABLE
registerGpuSerializeToCubinPass()147 void mlir::registerGpuSerializeToCubinPass() {}
148 #endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE
149