1 //===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass that serializes a gpu module into CUBIN blob and
10 // adds that blob as a string attribute of the module.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "mlir/Dialect/GPU/Passes.h"
14 
15 #if MLIR_GPU_TO_CUBIN_PASS_ENABLE
16 #include "mlir/Pass/Pass.h"
17 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
18 #include "mlir/Target/LLVMIR/Export.h"
19 #include "llvm/Support/TargetSelect.h"
20 
21 #include <cuda.h>
22 
23 using namespace mlir;
24 
25 static void emitCudaError(const llvm::Twine &expr, const char *buffer,
26                           CUresult result, Location loc) {
27   const char *error;
28   cuGetErrorString(result, &error);
29   emitError(loc, expr.concat(" failed with error code ")
30                      .concat(llvm::Twine{error})
31                      .concat("[")
32                      .concat(buffer)
33                      .concat("]"));
34 }
35 
36 #define RETURN_ON_CUDA_ERROR(expr)                                             \
37   do {                                                                         \
38     if (auto status = (expr)) {                                                \
39       emitCudaError(#expr, jitErrorBuffer, status, loc);                       \
40       return {};                                                               \
41     }                                                                          \
42   } while (false)
43 
44 namespace {
45 class SerializeToCubinPass
46     : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> {
47 public:
48   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass)
49 
50   SerializeToCubinPass();
51 
52   StringRef getArgument() const override { return "gpu-to-cubin"; }
53   StringRef getDescription() const override {
54     return "Lower GPU kernel function to CUBIN binary annotations";
55   }
56 
57 private:
58   void getDependentDialects(DialectRegistry &registry) const override;
59 
60   // Serializes PTX to CUBIN.
61   std::unique_ptr<std::vector<char>>
62   serializeISA(const std::string &isa) override;
63 };
64 } // namespace
65 
66 // Sets the 'option' to 'value' unless it already has a value.
67 static void maybeSetOption(Pass::Option<std::string> &option,
68                            const char *value) {
69   if (!option.hasValue())
70     option = value;
71 }
72 
73 SerializeToCubinPass::SerializeToCubinPass() {
74   maybeSetOption(this->triple, "nvptx64-nvidia-cuda");
75   maybeSetOption(this->chip, "sm_35");
76   maybeSetOption(this->features, "+ptx60");
77 }
78 
79 void SerializeToCubinPass::getDependentDialects(
80     DialectRegistry &registry) const {
81   registerNVVMDialectTranslation(registry);
82   gpu::SerializeToBlobPass::getDependentDialects(registry);
83 }
84 
85 std::unique_ptr<std::vector<char>>
86 SerializeToCubinPass::serializeISA(const std::string &isa) {
87   Location loc = getOperation().getLoc();
88   char jitErrorBuffer[4096] = {0};
89 
90   RETURN_ON_CUDA_ERROR(cuInit(0));
91 
92   // Linking requires a device context.
93   CUdevice device;
94   RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
95   CUcontext context;
96   RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device));
97   CUlinkState linkState;
98 
99   CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
100                                CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
101   void *jitOptionsVals[] = {jitErrorBuffer,
102                             reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
103 
104   RETURN_ON_CUDA_ERROR(cuLinkCreate(2,              /* number of jit options */
105                                     jitOptions,     /* jit options */
106                                     jitOptionsVals, /* jit option values */
107                                     &linkState));
108 
109   auto kernelName = getOperation().getName().str();
110   RETURN_ON_CUDA_ERROR(cuLinkAddData(
111       linkState, CUjitInputType::CU_JIT_INPUT_PTX,
112       const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(),
113       kernelName.c_str(), 0, /* number of jit options */
114       nullptr,               /* jit options */
115       nullptr                /* jit option values */
116       ));
117 
118   void *cubinData;
119   size_t cubinSize;
120   RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
121 
122   char *cubinAsChar = static_cast<char *>(cubinData);
123   auto result =
124       std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
125 
126   // This will also destroy the cubin data.
127   RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
128   RETURN_ON_CUDA_ERROR(cuCtxDestroy(context));
129 
130   return result;
131 }
132 
133 // Register pass to serialize GPU kernel functions to a CUBIN binary annotation.
134 void mlir::registerGpuSerializeToCubinPass() {
135   PassRegistration<SerializeToCubinPass> registerSerializeToCubin(
136       [] {
137         // Initialize LLVM NVPTX backend.
138         LLVMInitializeNVPTXTarget();
139         LLVMInitializeNVPTXTargetInfo();
140         LLVMInitializeNVPTXTargetMC();
141         LLVMInitializeNVPTXAsmPrinter();
142 
143         return std::make_unique<SerializeToCubinPass>();
144       });
145 }
146 #else  // MLIR_GPU_TO_CUBIN_PASS_ENABLE
147 void mlir::registerGpuSerializeToCubinPass() {}
148 #endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE
149