1 //===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a pass that serializes a gpu module into HSAco blob and 10 // adds that blob as a string attribute of the module. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Dialect/GPU/Passes.h" 14 15 #if MLIR_GPU_TO_HSACO_PASS_ENABLE 16 #include "mlir/Pass/Pass.h" 17 #include "mlir/Support/FileUtilities.h" 18 #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" 19 #include "mlir/Target/LLVMIR/Export.h" 20 21 #include "llvm/MC/MCAsmBackend.h" 22 #include "llvm/MC/MCAsmInfo.h" 23 #include "llvm/MC/MCCodeEmitter.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCObjectFileInfo.h" 26 #include "llvm/MC/MCObjectWriter.h" 27 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 28 #include "llvm/MC/MCStreamer.h" 29 #include "llvm/MC/MCSubtargetInfo.h" 30 31 #include "llvm/MC/TargetRegistry.h" 32 #include "llvm/Support/FileUtilities.h" 33 #include "llvm/Support/LineIterator.h" 34 #include "llvm/Support/Program.h" 35 #include "llvm/Support/TargetSelect.h" 36 #include "llvm/Support/WithColor.h" 37 #include "llvm/Target/TargetOptions.h" 38 39 #include "lld/Common/Driver.h" 40 41 #include "hip/hip_version.h" 42 43 #include <mutex> 44 45 using namespace mlir; 46 47 namespace { 48 class SerializeToHsacoPass 49 : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> { 50 public: 51 SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features); 52 StringRef getArgument() const override { return "gpu-to-hsaco"; } 53 StringRef getDescription() const override { 54 return "Lower GPU kernel function to HSACO binary annotations"; 55 } 56 57 private: 58 void getDependentDialects(DialectRegistry ®istry) const override; 59 60 // Serializes ROCDL to HSACO. 61 std::unique_ptr<std::vector<char>> 62 serializeISA(const std::string &isa) override; 63 64 std::unique_ptr<SmallVectorImpl<char>> assembleIsa(const std::string &isa); 65 std::unique_ptr<std::vector<char>> 66 createHsaco(const SmallVectorImpl<char> &isaBinary); 67 }; 68 } // namespace 69 70 static std::string getDefaultChip() { 71 const char kDefaultChip[] = "gfx900"; 72 73 // Locate rocm_agent_enumerator. 74 const char kRocmAgentEnumerator[] = "rocm_agent_enumerator"; 75 llvm::ErrorOr<std::string> rocmAgentEnumerator = llvm::sys::findProgramByName( 76 kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"}); 77 if (!rocmAgentEnumerator) { 78 llvm::WithColor::warning(llvm::errs()) 79 << kRocmAgentEnumerator << "couldn't be located under " << __ROCM_PATH__ 80 << "/bin\n"; 81 return kDefaultChip; 82 } 83 84 // Prepare temp file to hold the outputs. 85 int tempFd = -1; 86 SmallString<128> tempFilename; 87 if (llvm::sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd, 88 tempFilename)) { 89 llvm::WithColor::warning(llvm::errs()) 90 << "temporary file for " << kRocmAgentEnumerator << " creation error\n"; 91 return kDefaultChip; 92 } 93 llvm::FileRemover cleanup(tempFilename); 94 95 // Invoke rocm_agent_enumerator. 96 std::string errorMessage; 97 SmallVector<StringRef, 2> args{"-t", "GPU"}; 98 Optional<StringRef> redirects[3] = {{""}, tempFilename.str(), {""}}; 99 int result = 100 llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None, 101 redirects, 0, 0, &errorMessage); 102 if (result) { 103 llvm::WithColor::warning(llvm::errs()) 104 << kRocmAgentEnumerator << " invocation error: " << errorMessage 105 << "\n"; 106 return kDefaultChip; 107 } 108 109 // Load and parse the result. 110 auto gfxIsaList = openInputFile(tempFilename); 111 if (!gfxIsaList) { 112 llvm::WithColor::error(llvm::errs()) 113 << "read ROCm agent list temp file error\n"; 114 return kDefaultChip; 115 } 116 for (llvm::line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) { 117 // Skip the line with content "gfx000". 118 if (*lines == "gfx000") 119 continue; 120 // Use the first ISA version found. 121 return lines->str(); 122 } 123 124 return kDefaultChip; 125 } 126 127 // Sets the 'option' to 'value' unless it already has a value. 128 static void maybeSetOption(Pass::Option<std::string> &option, 129 function_ref<std::string()> getValue) { 130 if (!option.hasValue()) 131 option = getValue(); 132 } 133 134 SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch, 135 StringRef features) { 136 maybeSetOption(this->triple, [&triple] { return triple.str(); }); 137 maybeSetOption(this->chip, [&arch] { return arch.str(); }); 138 maybeSetOption(this->features, [&features] { return features.str(); }); 139 } 140 141 void SerializeToHsacoPass::getDependentDialects( 142 DialectRegistry ®istry) const { 143 registerROCDLDialectTranslation(registry); 144 gpu::SerializeToBlobPass::getDependentDialects(registry); 145 } 146 147 std::unique_ptr<SmallVectorImpl<char>> 148 SerializeToHsacoPass::assembleIsa(const std::string &isa) { 149 auto loc = getOperation().getLoc(); 150 151 SmallVector<char, 0> result; 152 llvm::raw_svector_ostream os(result); 153 154 llvm::Triple triple(llvm::Triple::normalize(this->triple)); 155 std::string error; 156 const llvm::Target *target = 157 llvm::TargetRegistry::lookupTarget(triple.normalize(), error); 158 if (!target) { 159 emitError(loc, Twine("failed to lookup target: ") + error); 160 return {}; 161 } 162 163 llvm::SourceMgr srcMgr; 164 srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), 165 llvm::SMLoc()); 166 167 const llvm::MCTargetOptions mcOptions; 168 std::unique_ptr<llvm::MCRegisterInfo> mri( 169 target->createMCRegInfo(this->triple)); 170 std::unique_ptr<llvm::MCAsmInfo> mai( 171 target->createMCAsmInfo(*mri, this->triple, mcOptions)); 172 mai->setRelaxELFRelocations(true); 173 174 llvm::MCContext ctx(triple, mai.get(), mri.get(), &srcMgr, &mcOptions); 175 std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo( 176 ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); 177 ctx.setObjectFileInfo(mofi.get()); 178 179 SmallString<128> cwd; 180 if (!llvm::sys::fs::current_path(cwd)) 181 ctx.setCompilationDir(cwd); 182 183 std::unique_ptr<llvm::MCStreamer> mcStreamer; 184 std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo()); 185 std::unique_ptr<llvm::MCSubtargetInfo> sti( 186 target->createMCSubtargetInfo(this->triple, this->chip, this->features)); 187 188 llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx); 189 llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); 190 mcStreamer.reset(target->createMCObjectStreamer( 191 triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab), 192 mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce), 193 *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, 194 /*DWARFMustBeAtTheEnd*/ false)); 195 mcStreamer->setUseAssemblerInfoForParsing(true); 196 197 std::unique_ptr<llvm::MCAsmParser> parser( 198 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); 199 std::unique_ptr<llvm::MCTargetAsmParser> tap( 200 target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); 201 202 if (!tap) { 203 emitError(loc, "assembler initialization error"); 204 return {}; 205 } 206 207 parser->setTargetParser(*tap); 208 parser->Run(false); 209 210 return std::make_unique<SmallVector<char, 0>>(std::move(result)); 211 } 212 213 std::unique_ptr<std::vector<char>> 214 SerializeToHsacoPass::createHsaco(const SmallVectorImpl<char> &isaBinary) { 215 auto loc = getOperation().getLoc(); 216 217 // Save the ISA binary to a temp file. 218 int tempIsaBinaryFd = -1; 219 SmallString<128> tempIsaBinaryFilename; 220 if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd, 221 tempIsaBinaryFilename)) { 222 emitError(loc, "temporary file for ISA binary creation error"); 223 return {}; 224 } 225 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); 226 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); 227 tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size()); 228 tempIsaBinaryOs.close(); 229 230 // Create a temp file for HSA code object. 231 int tempHsacoFD = -1; 232 SmallString<128> tempHsacoFilename; 233 if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD, 234 tempHsacoFilename)) { 235 emitError(loc, "temporary file for HSA code object creation error"); 236 return {}; 237 } 238 llvm::FileRemover cleanupHsaco(tempHsacoFilename); 239 240 { 241 static std::mutex mutex; 242 const std::lock_guard<std::mutex> lock(mutex); 243 // Invoke lld. Expect a true return value from lld. 244 if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(), 245 "-o", tempHsacoFilename.c_str()}, 246 /*canEarlyExit=*/false, llvm::outs(), llvm::errs())) { 247 emitError(loc, "lld invocation error"); 248 return {}; 249 } 250 } 251 252 // Load the HSA code object. 253 auto hsacoFile = openInputFile(tempHsacoFilename); 254 if (!hsacoFile) { 255 emitError(loc, "read HSA code object from temp file error"); 256 return {}; 257 } 258 259 StringRef buffer = hsacoFile->getBuffer(); 260 return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end()); 261 } 262 263 std::unique_ptr<std::vector<char>> 264 SerializeToHsacoPass::serializeISA(const std::string &isa) { 265 auto isaBinary = assembleIsa(isa); 266 if (!isaBinary) 267 return {}; 268 return createHsaco(*isaBinary); 269 } 270 271 // Register pass to serialize GPU kernel functions to a HSACO binary annotation. 272 void mlir::registerGpuSerializeToHsacoPass() { 273 PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO( 274 [] { 275 // Initialize LLVM AMDGPU backend. 276 LLVMInitializeAMDGPUAsmParser(); 277 LLVMInitializeAMDGPUAsmPrinter(); 278 LLVMInitializeAMDGPUTarget(); 279 LLVMInitializeAMDGPUTargetInfo(); 280 LLVMInitializeAMDGPUTargetMC(); 281 282 return std::make_unique<SerializeToHsacoPass>("amdgcn-amd-amdhsa", "", 283 ""); 284 }); 285 } 286 #else // MLIR_GPU_TO_HSACO_PASS_ENABLE 287 void mlir::registerGpuSerializeToHsacoPass() {} 288 #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE 289