1 //===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass that serializes a gpu module into HSAco blob and
10 // adds that blob as a string attribute of the module.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "mlir/Dialect/GPU/Passes.h"
14 
15 #if MLIR_GPU_TO_HSACO_PASS_ENABLE
16 #include "mlir/Pass/Pass.h"
17 #include "mlir/Support/FileUtilities.h"
18 #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
19 #include "mlir/Target/LLVMIR/Export.h"
20 
21 #include "llvm/MC/MCAsmBackend.h"
22 #include "llvm/MC/MCAsmInfo.h"
23 #include "llvm/MC/MCCodeEmitter.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCObjectFileInfo.h"
26 #include "llvm/MC/MCObjectWriter.h"
27 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 
31 #include "llvm/MC/TargetRegistry.h"
32 #include "llvm/Support/FileUtilities.h"
33 #include "llvm/Support/LineIterator.h"
34 #include "llvm/Support/Program.h"
35 #include "llvm/Support/TargetSelect.h"
36 #include "llvm/Support/WithColor.h"
37 #include "llvm/Target/TargetOptions.h"
38 
39 #include "lld/Common/Driver.h"
40 
41 #include "hip/hip_version.h"
42 
43 #include <mutex>
44 
45 using namespace mlir;
46 
47 namespace {
48 class SerializeToHsacoPass
49     : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> {
50 public:
51   SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features);
52   StringRef getArgument() const override { return "gpu-to-hsaco"; }
53   StringRef getDescription() const override {
54     return "Lower GPU kernel function to HSACO binary annotations";
55   }
56 
57 private:
58   void getDependentDialects(DialectRegistry &registry) const override;
59 
60   // Serializes ROCDL to HSACO.
61   std::unique_ptr<std::vector<char>>
62   serializeISA(const std::string &isa) override;
63 
64   std::unique_ptr<SmallVectorImpl<char>> assembleIsa(const std::string &isa);
65   std::unique_ptr<std::vector<char>>
66   createHsaco(const SmallVectorImpl<char> &isaBinary);
67 };
68 } // namespace
69 
70 static std::string getDefaultChip() {
71   const char kDefaultChip[] = "gfx900";
72 
73   // Locate rocm_agent_enumerator.
74   const char kRocmAgentEnumerator[] = "rocm_agent_enumerator";
75   llvm::ErrorOr<std::string> rocmAgentEnumerator = llvm::sys::findProgramByName(
76       kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"});
77   if (!rocmAgentEnumerator) {
78     llvm::WithColor::warning(llvm::errs())
79         << kRocmAgentEnumerator << "couldn't be located under " << __ROCM_PATH__
80         << "/bin\n";
81     return kDefaultChip;
82   }
83 
84   // Prepare temp file to hold the outputs.
85   int tempFd = -1;
86   SmallString<128> tempFilename;
87   if (llvm::sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd,
88                                          tempFilename)) {
89     llvm::WithColor::warning(llvm::errs())
90         << "temporary file for " << kRocmAgentEnumerator << " creation error\n";
91     return kDefaultChip;
92   }
93   llvm::FileRemover cleanup(tempFilename);
94 
95   // Invoke rocm_agent_enumerator.
96   std::string errorMessage;
97   SmallVector<StringRef, 2> args{"-t", "GPU"};
98   Optional<StringRef> redirects[3] = {{""}, tempFilename.str(), {""}};
99   int result =
100       llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None,
101                                 redirects, 0, 0, &errorMessage);
102   if (result) {
103     llvm::WithColor::warning(llvm::errs())
104         << kRocmAgentEnumerator << " invocation error: " << errorMessage
105         << "\n";
106     return kDefaultChip;
107   }
108 
109   // Load and parse the result.
110   auto gfxIsaList = openInputFile(tempFilename);
111   if (!gfxIsaList) {
112     llvm::WithColor::error(llvm::errs())
113         << "read ROCm agent list temp file error\n";
114     return kDefaultChip;
115   }
116   for (llvm::line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) {
117     // Skip the line with content "gfx000".
118     if (*lines == "gfx000")
119       continue;
120     // Use the first ISA version found.
121     return lines->str();
122   }
123 
124   return kDefaultChip;
125 }
126 
127 // Sets the 'option' to 'value' unless it already has a value.
128 static void maybeSetOption(Pass::Option<std::string> &option,
129                            function_ref<std::string()> getValue) {
130   if (!option.hasValue())
131     option = getValue();
132 }
133 
134 SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch,
135                                            StringRef features) {
136   maybeSetOption(this->triple, [&triple] { return triple.str(); });
137   maybeSetOption(this->chip, [&arch] { return arch.str(); });
138   maybeSetOption(this->features, [&features] { return features.str(); });
139 }
140 
141 void SerializeToHsacoPass::getDependentDialects(
142     DialectRegistry &registry) const {
143   registerROCDLDialectTranslation(registry);
144   gpu::SerializeToBlobPass::getDependentDialects(registry);
145 }
146 
147 std::unique_ptr<SmallVectorImpl<char>>
148 SerializeToHsacoPass::assembleIsa(const std::string &isa) {
149   auto loc = getOperation().getLoc();
150 
151   SmallVector<char, 0> result;
152   llvm::raw_svector_ostream os(result);
153 
154   llvm::Triple triple(llvm::Triple::normalize(this->triple));
155   std::string error;
156   const llvm::Target *target =
157       llvm::TargetRegistry::lookupTarget(triple.normalize(), error);
158   if (!target) {
159     emitError(loc, Twine("failed to lookup target: ") + error);
160     return {};
161   }
162 
163   llvm::SourceMgr srcMgr;
164   srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa),
165                             llvm::SMLoc());
166 
167   const llvm::MCTargetOptions mcOptions;
168   std::unique_ptr<llvm::MCRegisterInfo> mri(
169       target->createMCRegInfo(this->triple));
170   std::unique_ptr<llvm::MCAsmInfo> mai(
171       target->createMCAsmInfo(*mri, this->triple, mcOptions));
172   mai->setRelaxELFRelocations(true);
173 
174   llvm::MCContext ctx(triple, mai.get(), mri.get(), &srcMgr, &mcOptions);
175   std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo(
176       ctx, /*PIC=*/false, /*LargeCodeModel=*/false));
177   ctx.setObjectFileInfo(mofi.get());
178 
179   SmallString<128> cwd;
180   if (!llvm::sys::fs::current_path(cwd))
181     ctx.setCompilationDir(cwd);
182 
183   std::unique_ptr<llvm::MCStreamer> mcStreamer;
184   std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
185   std::unique_ptr<llvm::MCSubtargetInfo> sti(
186       target->createMCSubtargetInfo(this->triple, this->chip, this->features));
187 
188   llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx);
189   llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions);
190   mcStreamer.reset(target->createMCObjectStreamer(
191       triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
192       mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
193       *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
194       /*DWARFMustBeAtTheEnd*/ false));
195   mcStreamer->setUseAssemblerInfoForParsing(true);
196 
197   std::unique_ptr<llvm::MCAsmParser> parser(
198       createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
199   std::unique_ptr<llvm::MCTargetAsmParser> tap(
200       target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
201 
202   if (!tap) {
203     emitError(loc, "assembler initialization error");
204     return {};
205   }
206 
207   parser->setTargetParser(*tap);
208   parser->Run(false);
209 
210   return std::make_unique<SmallVector<char, 0>>(std::move(result));
211 }
212 
213 std::unique_ptr<std::vector<char>>
214 SerializeToHsacoPass::createHsaco(const SmallVectorImpl<char> &isaBinary) {
215   auto loc = getOperation().getLoc();
216 
217   // Save the ISA binary to a temp file.
218   int tempIsaBinaryFd = -1;
219   SmallString<128> tempIsaBinaryFilename;
220   if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd,
221                                          tempIsaBinaryFilename)) {
222     emitError(loc, "temporary file for ISA binary creation error");
223     return {};
224   }
225   llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
226   llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
227   tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size());
228   tempIsaBinaryOs.close();
229 
230   // Create a temp file for HSA code object.
231   int tempHsacoFD = -1;
232   SmallString<128> tempHsacoFilename;
233   if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD,
234                                          tempHsacoFilename)) {
235     emitError(loc, "temporary file for HSA code object creation error");
236     return {};
237   }
238   llvm::FileRemover cleanupHsaco(tempHsacoFilename);
239 
240   {
241     static std::mutex mutex;
242     const std::lock_guard<std::mutex> lock(mutex);
243     // Invoke lld. Expect a true return value from lld.
244     if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(),
245                          "-o", tempHsacoFilename.c_str()},
246                         /*canEarlyExit=*/false, llvm::outs(), llvm::errs())) {
247       emitError(loc, "lld invocation error");
248       return {};
249     }
250   }
251 
252   // Load the HSA code object.
253   auto hsacoFile = openInputFile(tempHsacoFilename);
254   if (!hsacoFile) {
255     emitError(loc, "read HSA code object from temp file error");
256     return {};
257   }
258 
259   StringRef buffer = hsacoFile->getBuffer();
260   return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end());
261 }
262 
263 std::unique_ptr<std::vector<char>>
264 SerializeToHsacoPass::serializeISA(const std::string &isa) {
265   auto isaBinary = assembleIsa(isa);
266   if (!isaBinary)
267     return {};
268   return createHsaco(*isaBinary);
269 }
270 
271 // Register pass to serialize GPU kernel functions to a HSACO binary annotation.
272 void mlir::registerGpuSerializeToHsacoPass() {
273   PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO(
274       [] {
275         // Initialize LLVM AMDGPU backend.
276         LLVMInitializeAMDGPUAsmParser();
277         LLVMInitializeAMDGPUAsmPrinter();
278         LLVMInitializeAMDGPUTarget();
279         LLVMInitializeAMDGPUTargetInfo();
280         LLVMInitializeAMDGPUTargetMC();
281 
282         return std::make_unique<SerializeToHsacoPass>("amdgcn-amd-amdhsa", "",
283                                                       "");
284       });
285 }
286 #else  // MLIR_GPU_TO_HSACO_PASS_ENABLE
287 void mlir::registerGpuSerializeToHsacoPass() {}
288 #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE
289