1 //===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass that serializes a gpu module into HSAco blob and
10 // adds that blob as a string attribute of the module.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "mlir/Dialect/GPU/Passes.h"
14 #include "mlir/IR/Location.h"
15 #include "mlir/IR/MLIRContext.h"
16 
17 #if MLIR_GPU_TO_HSACO_PASS_ENABLE
18 #include "mlir/ExecutionEngine/OptUtils.h"
19 #include "mlir/Pass/Pass.h"
20 #include "mlir/Support/FileUtilities.h"
21 #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
22 #include "mlir/Target/LLVMIR/Export.h"
23 
24 #include "llvm/MC/MCAsmBackend.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCCodeEmitter.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCObjectFileInfo.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/FileUtilities.h"
36 #include "llvm/Support/LineIterator.h"
37 #include "llvm/Support/Program.h"
38 #include "llvm/Support/SourceMgr.h"
39 #include "llvm/Support/TargetSelect.h"
40 #include "llvm/Support/WithColor.h"
41 
42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetOptions.h"
44 
45 #include "lld/Common/Driver.h"
46 
47 #include "hip/hip_version.h"
48 
49 #include <mutex>
50 
51 using namespace mlir;
52 
53 namespace {
54 class SerializeToHsacoPass
55     : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> {
56 public:
57   SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features,
58                        int optLevel);
59   SerializeToHsacoPass(const SerializeToHsacoPass &other);
60   StringRef getArgument() const override { return "gpu-to-hsaco"; }
61   StringRef getDescription() const override {
62     return "Lower GPU kernel function to HSACO binary annotations";
63   }
64 
65 protected:
66   Option<int> optLevel{
67       *this, "opt-level",
68       llvm::cl::desc("Optimization level for HSACO compilation"),
69       llvm::cl::init(2)};
70 
71   /// Adds LLVM optimization passes
72   LogicalResult optimizeLlvm(llvm::Module &llvmModule,
73                              llvm::TargetMachine &targetMachine) override;
74 
75 private:
76   void getDependentDialects(DialectRegistry &registry) const override;
77 
78   // Serializes ROCDL to HSACO.
79   std::unique_ptr<std::vector<char>>
80   serializeISA(const std::string &isa) override;
81 
82   std::unique_ptr<SmallVectorImpl<char>> assembleIsa(const std::string &isa);
83   std::unique_ptr<std::vector<char>>
84   createHsaco(const SmallVectorImpl<char> &isaBinary);
85 };
86 } // namespace
87 
88 SerializeToHsacoPass::SerializeToHsacoPass(const SerializeToHsacoPass &other)
89     : PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass>(other) {}
90 static std::string getDefaultChip() {
91   const char kDefaultChip[] = "gfx900";
92 
93   // Locate rocm_agent_enumerator.
94   const char kRocmAgentEnumerator[] = "rocm_agent_enumerator";
95   llvm::ErrorOr<std::string> rocmAgentEnumerator = llvm::sys::findProgramByName(
96       kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"});
97   if (!rocmAgentEnumerator) {
98     llvm::WithColor::warning(llvm::errs())
99         << kRocmAgentEnumerator << "couldn't be located under " << __ROCM_PATH__
100         << "/bin\n";
101     return kDefaultChip;
102   }
103 
104   // Prepare temp file to hold the outputs.
105   int tempFd = -1;
106   SmallString<128> tempFilename;
107   if (llvm::sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd,
108                                          tempFilename)) {
109     llvm::WithColor::warning(llvm::errs())
110         << "temporary file for " << kRocmAgentEnumerator << " creation error\n";
111     return kDefaultChip;
112   }
113   llvm::FileRemover cleanup(tempFilename);
114 
115   // Invoke rocm_agent_enumerator.
116   std::string errorMessage;
117   SmallVector<StringRef, 2> args{"-t", "GPU"};
118   Optional<StringRef> redirects[3] = {{""}, tempFilename.str(), {""}};
119   int result =
120       llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None,
121                                 redirects, 0, 0, &errorMessage);
122   if (result) {
123     llvm::WithColor::warning(llvm::errs())
124         << kRocmAgentEnumerator << " invocation error: " << errorMessage
125         << "\n";
126     return kDefaultChip;
127   }
128 
129   // Load and parse the result.
130   auto gfxIsaList = openInputFile(tempFilename);
131   if (!gfxIsaList) {
132     llvm::WithColor::error(llvm::errs())
133         << "read ROCm agent list temp file error\n";
134     return kDefaultChip;
135   }
136   for (llvm::line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) {
137     // Skip the line with content "gfx000".
138     if (*lines == "gfx000")
139       continue;
140     // Use the first ISA version found.
141     return lines->str();
142   }
143 
144   return kDefaultChip;
145 }
146 
147 // Sets the 'option' to 'value' unless it already has a value.
148 static void maybeSetOption(Pass::Option<std::string> &option,
149                            function_ref<std::string()> getValue) {
150   if (!option.hasValue())
151     option = getValue();
152 }
153 
154 SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch,
155                                            StringRef features, int optLevel) {
156   maybeSetOption(this->triple, [&triple] { return triple.str(); });
157   maybeSetOption(this->chip, [&arch] { return arch.str(); });
158   maybeSetOption(this->features, [&features] { return features.str(); });
159   if (this->optLevel.getNumOccurrences() == 0)
160     this->optLevel.setValue(optLevel);
161 }
162 
163 void SerializeToHsacoPass::getDependentDialects(
164     DialectRegistry &registry) const {
165   registerROCDLDialectTranslation(registry);
166   gpu::SerializeToBlobPass::getDependentDialects(registry);
167 }
168 
169 LogicalResult
170 SerializeToHsacoPass::optimizeLlvm(llvm::Module &llvmModule,
171                                    llvm::TargetMachine &targetMachine) {
172   int optLevel = this->optLevel.getValue();
173   if (optLevel < 0 || optLevel > 3)
174     return getOperation().emitError()
175            << "Invalid HSA optimization level" << optLevel << "\n";
176 
177   targetMachine.setOptLevel(static_cast<llvm::CodeGenOpt::Level>(optLevel));
178 
179   auto transformer =
180       makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine);
181   auto error = transformer(&llvmModule);
182   if (error) {
183     InFlightDiagnostic mlirError = getOperation()->emitError();
184     llvm::handleAllErrors(
185         std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {
186           mlirError << "Could not optimize LLVM IR: " << ei.message() << "\n";
187         });
188     return mlirError;
189   }
190   return success();
191 }
192 
193 std::unique_ptr<SmallVectorImpl<char>>
194 SerializeToHsacoPass::assembleIsa(const std::string &isa) {
195   auto loc = getOperation().getLoc();
196 
197   SmallVector<char, 0> result;
198   llvm::raw_svector_ostream os(result);
199 
200   llvm::Triple triple(llvm::Triple::normalize(this->triple));
201   std::string error;
202   const llvm::Target *target =
203       llvm::TargetRegistry::lookupTarget(triple.normalize(), error);
204   if (!target) {
205     emitError(loc, Twine("failed to lookup target: ") + error);
206     return {};
207   }
208 
209   llvm::SourceMgr srcMgr;
210   srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa),
211                             llvm::SMLoc());
212 
213   const llvm::MCTargetOptions mcOptions;
214   std::unique_ptr<llvm::MCRegisterInfo> mri(
215       target->createMCRegInfo(this->triple));
216   std::unique_ptr<llvm::MCAsmInfo> mai(
217       target->createMCAsmInfo(*mri, this->triple, mcOptions));
218   mai->setRelaxELFRelocations(true);
219   std::unique_ptr<llvm::MCSubtargetInfo> sti(
220       target->createMCSubtargetInfo(this->triple, this->chip, this->features));
221 
222   llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr,
223                       &mcOptions);
224   std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo(
225       ctx, /*PIC=*/false, /*LargeCodeModel=*/false));
226   ctx.setObjectFileInfo(mofi.get());
227 
228   SmallString<128> cwd;
229   if (!llvm::sys::fs::current_path(cwd))
230     ctx.setCompilationDir(cwd);
231 
232   std::unique_ptr<llvm::MCStreamer> mcStreamer;
233   std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
234 
235   llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx);
236   llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions);
237   mcStreamer.reset(target->createMCObjectStreamer(
238       triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
239       mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
240       *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
241       /*DWARFMustBeAtTheEnd*/ false));
242   mcStreamer->setUseAssemblerInfoForParsing(true);
243 
244   std::unique_ptr<llvm::MCAsmParser> parser(
245       createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
246   std::unique_ptr<llvm::MCTargetAsmParser> tap(
247       target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
248 
249   if (!tap) {
250     emitError(loc, "assembler initialization error");
251     return {};
252   }
253 
254   parser->setTargetParser(*tap);
255   parser->Run(false);
256 
257   return std::make_unique<SmallVector<char, 0>>(std::move(result));
258 }
259 
260 std::unique_ptr<std::vector<char>>
261 SerializeToHsacoPass::createHsaco(const SmallVectorImpl<char> &isaBinary) {
262   auto loc = getOperation().getLoc();
263 
264   // Save the ISA binary to a temp file.
265   int tempIsaBinaryFd = -1;
266   SmallString<128> tempIsaBinaryFilename;
267   if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd,
268                                          tempIsaBinaryFilename)) {
269     emitError(loc, "temporary file for ISA binary creation error");
270     return {};
271   }
272   llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
273   llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
274   tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size());
275   tempIsaBinaryOs.close();
276 
277   // Create a temp file for HSA code object.
278   int tempHsacoFD = -1;
279   SmallString<128> tempHsacoFilename;
280   if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD,
281                                          tempHsacoFilename)) {
282     emitError(loc, "temporary file for HSA code object creation error");
283     return {};
284   }
285   llvm::FileRemover cleanupHsaco(tempHsacoFilename);
286 
287   {
288     static std::mutex mutex;
289     const std::lock_guard<std::mutex> lock(mutex);
290     // Invoke lld. Expect a true return value from lld.
291     if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(),
292                          "-o", tempHsacoFilename.c_str()},
293                         /*canEarlyExit=*/false, llvm::outs(), llvm::errs())) {
294       emitError(loc, "lld invocation error");
295       return {};
296     }
297   }
298 
299   // Load the HSA code object.
300   auto hsacoFile = openInputFile(tempHsacoFilename);
301   if (!hsacoFile) {
302     emitError(loc, "read HSA code object from temp file error");
303     return {};
304   }
305 
306   StringRef buffer = hsacoFile->getBuffer();
307   return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end());
308 }
309 
310 std::unique_ptr<std::vector<char>>
311 SerializeToHsacoPass::serializeISA(const std::string &isa) {
312   auto isaBinary = assembleIsa(isa);
313   if (!isaBinary)
314     return {};
315   return createHsaco(*isaBinary);
316 }
317 
318 // Register pass to serialize GPU kernel functions to a HSACO binary annotation.
319 void mlir::registerGpuSerializeToHsacoPass() {
320   PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO(
321       [] {
322         // Initialize LLVM AMDGPU backend.
323         LLVMInitializeAMDGPUAsmParser();
324         LLVMInitializeAMDGPUAsmPrinter();
325         LLVMInitializeAMDGPUTarget();
326         LLVMInitializeAMDGPUTargetInfo();
327         LLVMInitializeAMDGPUTargetMC();
328 
329         return std::make_unique<SerializeToHsacoPass>("amdgcn-amd-amdhsa", "",
330                                                       "", 2);
331       });
332 }
333 #else  // MLIR_GPU_TO_HSACO_PASS_ENABLE
334 void mlir::registerGpuSerializeToHsacoPass() {}
335 #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE
336