1 //===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass that serializes a gpu module into HSAco blob and
10 // adds that blob as a string attribute of the module.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "mlir/Dialect/GPU/Passes.h"
14 #include "mlir/IR/Location.h"
15 #include "mlir/IR/MLIRContext.h"
16 
17 #if MLIR_GPU_TO_HSACO_PASS_ENABLE
18 #include "mlir/ExecutionEngine/OptUtils.h"
19 #include "mlir/Pass/Pass.h"
20 #include "mlir/Support/FileUtilities.h"
21 #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
22 #include "mlir/Target/LLVMIR/Export.h"
23 
24 #include "llvm/MC/MCAsmBackend.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCCodeEmitter.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCObjectFileInfo.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/TargetRegistry.h"
34 
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/FileUtilities.h"
37 #include "llvm/Support/Program.h"
38 #include "llvm/Support/SourceMgr.h"
39 #include "llvm/Support/TargetSelect.h"
40 #include "llvm/Support/WithColor.h"
41 
42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetOptions.h"
44 
45 #include "lld/Common/Driver.h"
46 
47 #include <mutex>
48 
49 using namespace mlir;
50 
51 namespace {
52 class SerializeToHsacoPass
53     : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> {
54 public:
55   SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features,
56                        int optLevel);
57   SerializeToHsacoPass(const SerializeToHsacoPass &other);
58   StringRef getArgument() const override { return "gpu-to-hsaco"; }
59   StringRef getDescription() const override {
60     return "Lower GPU kernel function to HSACO binary annotations";
61   }
62 
63 protected:
64   Option<int> optLevel{
65       *this, "opt-level",
66       llvm::cl::desc("Optimization level for HSACO compilation"),
67       llvm::cl::init(2)};
68 
69   Option<std::string> rocmPath{*this, "rocm-path",
70                                llvm::cl::desc("Path to ROCm install")};
71 
72   /// Adds LLVM optimization passes
73   LogicalResult optimizeLlvm(llvm::Module &llvmModule,
74                              llvm::TargetMachine &targetMachine) override;
75 
76 private:
77   void getDependentDialects(DialectRegistry &registry) const override;
78 
79   // Serializes ROCDL to HSACO.
80   std::unique_ptr<std::vector<char>>
81   serializeISA(const std::string &isa) override;
82 
83   std::unique_ptr<SmallVectorImpl<char>> assembleIsa(const std::string &isa);
84   std::unique_ptr<std::vector<char>>
85   createHsaco(const SmallVectorImpl<char> &isaBinary);
86 
87   std::string getRocmPath();
88 };
89 } // end namespace
90 
91 SerializeToHsacoPass::SerializeToHsacoPass(const SerializeToHsacoPass &other)
92     : PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass>(other) {}
93 
94 /// Get a user-specified path to ROCm
95 // Tries, in order, the --rocm-path option, the ROCM_PATH environment variable
96 // and a compile-time default
97 std::string SerializeToHsacoPass::getRocmPath() {
98   if (rocmPath.getNumOccurrences() > 0)
99     return rocmPath.getValue();
100 
101   return __DEFAULT_ROCM_PATH__;
102 }
103 
104 // Sets the 'option' to 'value' unless it already has a value.
105 static void maybeSetOption(Pass::Option<std::string> &option,
106                            function_ref<std::string()> getValue) {
107   if (!option.hasValue())
108     option = getValue();
109 }
110 
111 SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch,
112                                            StringRef features, int optLevel) {
113   maybeSetOption(this->triple, [&triple] { return triple.str(); });
114   maybeSetOption(this->chip, [&arch] { return arch.str(); });
115   maybeSetOption(this->features, [&features] { return features.str(); });
116   if (this->optLevel.getNumOccurrences() == 0)
117     this->optLevel.setValue(optLevel);
118 }
119 
120 void SerializeToHsacoPass::getDependentDialects(
121     DialectRegistry &registry) const {
122   registerROCDLDialectTranslation(registry);
123   gpu::SerializeToBlobPass::getDependentDialects(registry);
124 }
125 
126 LogicalResult
127 SerializeToHsacoPass::optimizeLlvm(llvm::Module &llvmModule,
128                                    llvm::TargetMachine &targetMachine) {
129   int optLevel = this->optLevel.getValue();
130   if (optLevel < 0 || optLevel > 3)
131     return getOperation().emitError()
132            << "Invalid HSA optimization level" << optLevel << "\n";
133 
134   targetMachine.setOptLevel(static_cast<llvm::CodeGenOpt::Level>(optLevel));
135 
136   auto transformer =
137       makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine);
138   auto error = transformer(&llvmModule);
139   if (error) {
140     InFlightDiagnostic mlirError = getOperation()->emitError();
141     llvm::handleAllErrors(
142         std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {
143           mlirError << "Could not optimize LLVM IR: " << ei.message() << "\n";
144         });
145     return mlirError;
146   }
147   return success();
148 }
149 
150 std::unique_ptr<SmallVectorImpl<char>>
151 SerializeToHsacoPass::assembleIsa(const std::string &isa) {
152   auto loc = getOperation().getLoc();
153 
154   SmallVector<char, 0> result;
155   llvm::raw_svector_ostream os(result);
156 
157   llvm::Triple triple(llvm::Triple::normalize(this->triple));
158   std::string error;
159   const llvm::Target *target =
160       llvm::TargetRegistry::lookupTarget(triple.normalize(), error);
161   if (!target) {
162     emitError(loc, Twine("failed to lookup target: ") + error);
163     return {};
164   }
165 
166   llvm::SourceMgr srcMgr;
167   srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa),
168                             llvm::SMLoc());
169 
170   const llvm::MCTargetOptions mcOptions;
171   std::unique_ptr<llvm::MCRegisterInfo> mri(
172       target->createMCRegInfo(this->triple));
173   std::unique_ptr<llvm::MCAsmInfo> mai(
174       target->createMCAsmInfo(*mri, this->triple, mcOptions));
175   mai->setRelaxELFRelocations(true);
176   std::unique_ptr<llvm::MCSubtargetInfo> sti(
177       target->createMCSubtargetInfo(this->triple, this->chip, this->features));
178 
179   llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr,
180                       &mcOptions);
181   std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo(
182       ctx, /*PIC=*/false, /*LargeCodeModel=*/false));
183   ctx.setObjectFileInfo(mofi.get());
184 
185   SmallString<128> cwd;
186   if (!llvm::sys::fs::current_path(cwd))
187     ctx.setCompilationDir(cwd);
188 
189   std::unique_ptr<llvm::MCStreamer> mcStreamer;
190   std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
191 
192   llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx);
193   llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions);
194   mcStreamer.reset(target->createMCObjectStreamer(
195       triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
196       mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
197       *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
198       /*DWARFMustBeAtTheEnd*/ false));
199   mcStreamer->setUseAssemblerInfoForParsing(true);
200 
201   std::unique_ptr<llvm::MCAsmParser> parser(
202       createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
203   std::unique_ptr<llvm::MCTargetAsmParser> tap(
204       target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
205 
206   if (!tap) {
207     emitError(loc, "assembler initialization error");
208     return {};
209   }
210 
211   parser->setTargetParser(*tap);
212   parser->Run(false);
213 
214   return std::make_unique<SmallVector<char, 0>>(std::move(result));
215 }
216 
217 std::unique_ptr<std::vector<char>>
218 SerializeToHsacoPass::createHsaco(const SmallVectorImpl<char> &isaBinary) {
219   auto loc = getOperation().getLoc();
220 
221   // Save the ISA binary to a temp file.
222   int tempIsaBinaryFd = -1;
223   SmallString<128> tempIsaBinaryFilename;
224   if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd,
225                                          tempIsaBinaryFilename)) {
226     emitError(loc, "temporary file for ISA binary creation error");
227     return {};
228   }
229   llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
230   llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
231   tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size());
232   tempIsaBinaryOs.close();
233 
234   // Create a temp file for HSA code object.
235   int tempHsacoFD = -1;
236   SmallString<128> tempHsacoFilename;
237   if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD,
238                                          tempHsacoFilename)) {
239     emitError(loc, "temporary file for HSA code object creation error");
240     return {};
241   }
242   llvm::FileRemover cleanupHsaco(tempHsacoFilename);
243 
244   {
245     static std::mutex mutex;
246     const std::lock_guard<std::mutex> lock(mutex);
247     // Invoke lld. Expect a true return value from lld.
248     if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(),
249                          "-o", tempHsacoFilename.c_str()},
250                         /*canEarlyExit=*/false, llvm::outs(), llvm::errs())) {
251       emitError(loc, "lld invocation error");
252       return {};
253     }
254   }
255 
256   // Load the HSA code object.
257   auto hsacoFile = openInputFile(tempHsacoFilename);
258   if (!hsacoFile) {
259     emitError(loc, "read HSA code object from temp file error");
260     return {};
261   }
262 
263   StringRef buffer = hsacoFile->getBuffer();
264   return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end());
265 }
266 
267 std::unique_ptr<std::vector<char>>
268 SerializeToHsacoPass::serializeISA(const std::string &isa) {
269   auto isaBinary = assembleIsa(isa);
270   if (!isaBinary)
271     return {};
272   return createHsaco(*isaBinary);
273 }
274 
275 // Register pass to serialize GPU kernel functions to a HSACO binary annotation.
276 void mlir::registerGpuSerializeToHsacoPass() {
277   PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO(
278       [] {
279         // Initialize LLVM AMDGPU backend.
280         LLVMInitializeAMDGPUAsmParser();
281         LLVMInitializeAMDGPUAsmPrinter();
282         LLVMInitializeAMDGPUTarget();
283         LLVMInitializeAMDGPUTargetInfo();
284         LLVMInitializeAMDGPUTargetMC();
285 
286         return std::make_unique<SerializeToHsacoPass>("amdgcn-amd-amdhsa", "",
287                                                       "", 2);
288       });
289 }
290 #else  // MLIR_GPU_TO_HSACO_PASS_ENABLE
291 void mlir::registerGpuSerializeToHsacoPass() {}
292 #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE
293