1 //===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass that serializes a gpu module into HSAco blob and
10 // adds that blob as a string attribute of the module.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "mlir/Dialect/GPU/Passes.h"
14 
15 #if MLIR_GPU_TO_HSACO_PASS_ENABLE
16 #include "mlir/Pass/Pass.h"
17 #include "mlir/Support/FileUtilities.h"
18 #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
19 #include "mlir/Target/LLVMIR/Export.h"
20 
21 #include "llvm/MC/MCAsmBackend.h"
22 #include "llvm/MC/MCAsmInfo.h"
23 #include "llvm/MC/MCCodeEmitter.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCObjectFileInfo.h"
26 #include "llvm/MC/MCObjectWriter.h"
27 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 
31 #include "llvm/Support/FileUtilities.h"
32 #include "llvm/Support/LineIterator.h"
33 #include "llvm/Support/Program.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/TargetSelect.h"
36 #include "llvm/Support/WithColor.h"
37 #include "llvm/Target/TargetOptions.h"
38 
39 #include "lld/Common/Driver.h"
40 
41 #include "hip/hip_version.h"
42 
43 #include <mutex>
44 
45 using namespace mlir;
46 
47 namespace {
48 class SerializeToHsacoPass
49     : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> {
50 public:
51   SerializeToHsacoPass();
52 
53 private:
54   void getDependentDialects(DialectRegistry &registry) const override;
55 
56   // Serializes ROCDL to HSACO.
57   std::unique_ptr<std::vector<char>>
58   serializeISA(const std::string &isa) override;
59 
60   std::unique_ptr<SmallVectorImpl<char>> assembleIsa(const std::string &isa);
61   std::unique_ptr<std::vector<char>>
62   createHsaco(const SmallVectorImpl<char> &isaBinary);
63 };
64 } // namespace
65 
66 static std::string getDefaultChip() {
67   const char kDefaultChip[] = "gfx900";
68 
69   // Locate rocm_agent_enumerator.
70   const char kRocmAgentEnumerator[] = "rocm_agent_enumerator";
71   llvm::ErrorOr<std::string> rocmAgentEnumerator = llvm::sys::findProgramByName(
72       kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"});
73   if (!rocmAgentEnumerator) {
74     llvm::WithColor::warning(llvm::errs())
75         << kRocmAgentEnumerator << "couldn't be located under " << __ROCM_PATH__
76         << "/bin\n";
77     return kDefaultChip;
78   }
79 
80   // Prepare temp file to hold the outputs.
81   int tempFd = -1;
82   SmallString<128> tempFilename;
83   if (llvm::sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd,
84                                          tempFilename)) {
85     llvm::WithColor::warning(llvm::errs())
86         << "temporary file for " << kRocmAgentEnumerator << " creation error\n";
87     return kDefaultChip;
88   }
89   llvm::FileRemover cleanup(tempFilename);
90 
91   // Invoke rocm_agent_enumerator.
92   std::string errorMessage;
93   SmallVector<StringRef, 2> args{"-t", "GPU"};
94   Optional<StringRef> redirects[3] = {{""}, tempFilename.str(), {""}};
95   int result =
96       llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None,
97                                 redirects, 0, 0, &errorMessage);
98   if (result) {
99     llvm::WithColor::warning(llvm::errs())
100         << kRocmAgentEnumerator << " invocation error: " << errorMessage
101         << "\n";
102     return kDefaultChip;
103   }
104 
105   // Load and parse the result.
106   auto gfxIsaList = openInputFile(tempFilename);
107   if (!gfxIsaList) {
108     llvm::WithColor::error(llvm::errs())
109         << "read ROCm agent list temp file error\n";
110     return kDefaultChip;
111   }
112   for (llvm::line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) {
113     // Skip the line with content "gfx000".
114     if (*lines == "gfx000")
115       continue;
116     // Use the first ISA version found.
117     return lines->str();
118   }
119 
120   return kDefaultChip;
121 }
122 
123 // Sets the 'option' to 'value' unless it already has a value.
124 static void maybeSetOption(Pass::Option<std::string> &option,
125                            function_ref<std::string()> getValue) {
126   if (!option.hasValue())
127     option = getValue();
128 }
129 
130 SerializeToHsacoPass::SerializeToHsacoPass() {
131   maybeSetOption(this->triple, [] { return "amdgcn-amd-amdhsa"; });
132   maybeSetOption(this->chip, [] {
133     static auto chip = getDefaultChip();
134     return chip;
135   });
136 }
137 
138 void SerializeToHsacoPass::getDependentDialects(
139     DialectRegistry &registry) const {
140   registerROCDLDialectTranslation(registry);
141   gpu::SerializeToBlobPass::getDependentDialects(registry);
142 }
143 
144 std::unique_ptr<SmallVectorImpl<char>>
145 SerializeToHsacoPass::assembleIsa(const std::string &isa) {
146   auto loc = getOperation().getLoc();
147 
148   SmallVector<char, 0> result;
149   llvm::raw_svector_ostream os(result);
150 
151   llvm::Triple triple(llvm::Triple::normalize(this->triple));
152   std::string error;
153   const llvm::Target *target =
154       llvm::TargetRegistry::lookupTarget(triple.normalize(), error);
155   if (!target) {
156     emitError(loc, Twine("failed to lookup target: ") + error);
157     return {};
158   }
159 
160   llvm::SourceMgr srcMgr;
161   srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa),
162                             llvm::SMLoc());
163 
164   const llvm::MCTargetOptions mcOptions;
165   std::unique_ptr<llvm::MCRegisterInfo> mri(
166       target->createMCRegInfo(this->triple));
167   std::unique_ptr<llvm::MCAsmInfo> mai(
168       target->createMCAsmInfo(*mri, this->triple, mcOptions));
169   mai->setRelaxELFRelocations(true);
170 
171   llvm::MCObjectFileInfo mofi;
172   llvm::MCContext ctx(mai.get(), mri.get(), &mofi, &srcMgr, &mcOptions);
173   mofi.InitMCObjectFileInfo(triple, false, ctx, false);
174 
175   SmallString<128> cwd;
176   if (!llvm::sys::fs::current_path(cwd))
177     ctx.setCompilationDir(cwd);
178 
179   std::unique_ptr<llvm::MCStreamer> mcStreamer;
180   std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
181   std::unique_ptr<llvm::MCSubtargetInfo> sti(
182       target->createMCSubtargetInfo(this->triple, this->chip, this->features));
183 
184   llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx);
185   llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions);
186   mcStreamer.reset(target->createMCObjectStreamer(
187       triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
188       mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
189       *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
190       /*DWARFMustBeAtTheEnd*/ false));
191   mcStreamer->setUseAssemblerInfoForParsing(true);
192 
193   std::unique_ptr<llvm::MCAsmParser> parser(
194       createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
195   std::unique_ptr<llvm::MCTargetAsmParser> tap(
196       target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
197 
198   if (!tap) {
199     emitError(loc, "assembler initialization error");
200     return {};
201   }
202 
203   parser->setTargetParser(*tap);
204   parser->Run(false);
205 
206   return std::make_unique<SmallVector<char, 0>>(std::move(result));
207 }
208 
209 std::unique_ptr<std::vector<char>>
210 SerializeToHsacoPass::createHsaco(const SmallVectorImpl<char> &isaBinary) {
211   auto loc = getOperation().getLoc();
212 
213   // Save the ISA binary to a temp file.
214   int tempIsaBinaryFd = -1;
215   SmallString<128> tempIsaBinaryFilename;
216   if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd,
217                                          tempIsaBinaryFilename)) {
218     emitError(loc, "temporary file for ISA binary creation error");
219     return {};
220   }
221   llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
222   llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
223   tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size());
224   tempIsaBinaryOs.close();
225 
226   // Create a temp file for HSA code object.
227   int tempHsacoFD = -1;
228   SmallString<128> tempHsacoFilename;
229   if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD,
230                                          tempHsacoFilename)) {
231     emitError(loc, "temporary file for HSA code object creation error");
232     return {};
233   }
234   llvm::FileRemover cleanupHsaco(tempHsacoFilename);
235 
236   {
237     static std::mutex mutex;
238     const std::lock_guard<std::mutex> lock(mutex);
239     // Invoke lld. Expect a true return value from lld.
240     if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(),
241                          "-o", tempHsacoFilename.c_str()},
242                         /*canEarlyExit=*/false, llvm::outs(), llvm::errs())) {
243       emitError(loc, "lld invocation error");
244       return {};
245     }
246   }
247 
248   // Load the HSA code object.
249   auto hsacoFile = openInputFile(tempHsacoFilename);
250   if (!hsacoFile) {
251     emitError(loc, "read HSA code object from temp file error");
252     return {};
253   }
254 
255   StringRef buffer = hsacoFile->getBuffer();
256   return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end());
257 }
258 
259 std::unique_ptr<std::vector<char>>
260 SerializeToHsacoPass::serializeISA(const std::string &isa) {
261   auto isaBinary = assembleIsa(isa);
262   if (!isaBinary)
263     return {};
264   return createHsaco(*isaBinary);
265 }
266 
267 // Register pass to serialize GPU kernel functions to a HSACO binary annotation.
268 void mlir::registerGpuSerializeToHsacoPass() {
269   PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO(
270       "gpu-to-hsaco", "Lower GPU kernel function to HSACO binary annotations",
271       [] {
272         // Initialize LLVM AMDGPU backend.
273         LLVMInitializeAMDGPUAsmParser();
274         LLVMInitializeAMDGPUAsmPrinter();
275         LLVMInitializeAMDGPUTarget();
276         LLVMInitializeAMDGPUTargetInfo();
277         LLVMInitializeAMDGPUTargetMC();
278 
279         return std::make_unique<SerializeToHsacoPass>();
280       });
281 }
282 #else  // MLIR_GPU_TO_HSACO_PASS_ENABLE
283 void mlir::registerGpuSerializeToHsacoPass() {}
284 #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE
285