1 //===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a pass that serializes a gpu module into HSAco blob and 10 // adds that blob as a string attribute of the module. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Dialect/GPU/Passes.h" 14 15 #if MLIR_GPU_TO_HSACO_PASS_ENABLE 16 #include "mlir/Pass/Pass.h" 17 #include "mlir/Support/FileUtilities.h" 18 #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" 19 #include "mlir/Target/LLVMIR/Export.h" 20 21 #include "llvm/MC/MCAsmBackend.h" 22 #include "llvm/MC/MCAsmInfo.h" 23 #include "llvm/MC/MCCodeEmitter.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCObjectFileInfo.h" 26 #include "llvm/MC/MCObjectWriter.h" 27 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 28 #include "llvm/MC/MCStreamer.h" 29 #include "llvm/MC/MCSubtargetInfo.h" 30 31 #include "llvm/Support/FileUtilities.h" 32 #include "llvm/Support/LineIterator.h" 33 #include "llvm/Support/Program.h" 34 #include "llvm/Support/TargetRegistry.h" 35 #include "llvm/Support/TargetSelect.h" 36 #include "llvm/Support/WithColor.h" 37 #include "llvm/Target/TargetOptions.h" 38 39 #include "lld/Common/Driver.h" 40 41 #include "hip/hip_version.h" 42 43 #include <mutex> 44 45 using namespace mlir; 46 47 namespace { 48 class SerializeToHsacoPass 49 : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> { 50 public: 51 SerializeToHsacoPass(); 52 53 private: 54 void getDependentDialects(DialectRegistry ®istry) const override; 55 56 // Serializes ROCDL to HSACO. 57 std::unique_ptr<std::vector<char>> 58 serializeISA(const std::string &isa) override; 59 60 std::unique_ptr<SmallVectorImpl<char>> assembleIsa(const std::string &isa); 61 std::unique_ptr<std::vector<char>> 62 createHsaco(const SmallVectorImpl<char> &isaBinary); 63 }; 64 } // namespace 65 66 static std::string getDefaultChip() { 67 const char kDefaultChip[] = "gfx900"; 68 69 // Locate rocm_agent_enumerator. 70 const char kRocmAgentEnumerator[] = "rocm_agent_enumerator"; 71 llvm::ErrorOr<std::string> rocmAgentEnumerator = llvm::sys::findProgramByName( 72 kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"}); 73 if (!rocmAgentEnumerator) { 74 llvm::WithColor::warning(llvm::errs()) 75 << kRocmAgentEnumerator << "couldn't be located under " << __ROCM_PATH__ 76 << "/bin\n"; 77 return kDefaultChip; 78 } 79 80 // Prepare temp file to hold the outputs. 81 int tempFd = -1; 82 SmallString<128> tempFilename; 83 if (llvm::sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd, 84 tempFilename)) { 85 llvm::WithColor::warning(llvm::errs()) 86 << "temporary file for " << kRocmAgentEnumerator << " creation error\n"; 87 return kDefaultChip; 88 } 89 llvm::FileRemover cleanup(tempFilename); 90 91 // Invoke rocm_agent_enumerator. 92 std::string errorMessage; 93 SmallVector<StringRef, 2> args{"-t", "GPU"}; 94 Optional<StringRef> redirects[3] = {{""}, tempFilename.str(), {""}}; 95 int result = 96 llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None, 97 redirects, 0, 0, &errorMessage); 98 if (result) { 99 llvm::WithColor::warning(llvm::errs()) 100 << kRocmAgentEnumerator << " invocation error: " << errorMessage 101 << "\n"; 102 return kDefaultChip; 103 } 104 105 // Load and parse the result. 106 auto gfxIsaList = openInputFile(tempFilename); 107 if (!gfxIsaList) { 108 llvm::WithColor::error(llvm::errs()) 109 << "read ROCm agent list temp file error\n"; 110 return kDefaultChip; 111 } 112 for (llvm::line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) { 113 // Skip the line with content "gfx000". 114 if (*lines == "gfx000") 115 continue; 116 // Use the first ISA version found. 117 return lines->str(); 118 } 119 120 return kDefaultChip; 121 } 122 123 // Sets the 'option' to 'value' unless it already has a value. 124 static void maybeSetOption(Pass::Option<std::string> &option, 125 function_ref<std::string()> getValue) { 126 if (!option.hasValue()) 127 option = getValue(); 128 } 129 130 SerializeToHsacoPass::SerializeToHsacoPass() { 131 maybeSetOption(this->triple, [] { return "amdgcn-amd-amdhsa"; }); 132 maybeSetOption(this->chip, [] { 133 static auto chip = getDefaultChip(); 134 return chip; 135 }); 136 } 137 138 void SerializeToHsacoPass::getDependentDialects( 139 DialectRegistry ®istry) const { 140 registerROCDLDialectTranslation(registry); 141 gpu::SerializeToBlobPass::getDependentDialects(registry); 142 } 143 144 std::unique_ptr<SmallVectorImpl<char>> 145 SerializeToHsacoPass::assembleIsa(const std::string &isa) { 146 auto loc = getOperation().getLoc(); 147 148 SmallVector<char, 0> result; 149 llvm::raw_svector_ostream os(result); 150 151 llvm::Triple triple(llvm::Triple::normalize(this->triple)); 152 std::string error; 153 const llvm::Target *target = 154 llvm::TargetRegistry::lookupTarget(triple.normalize(), error); 155 if (!target) { 156 emitError(loc, Twine("failed to lookup target: ") + error); 157 return {}; 158 } 159 160 llvm::SourceMgr srcMgr; 161 srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), 162 llvm::SMLoc()); 163 164 const llvm::MCTargetOptions mcOptions; 165 std::unique_ptr<llvm::MCRegisterInfo> mri( 166 target->createMCRegInfo(this->triple)); 167 std::unique_ptr<llvm::MCAsmInfo> mai( 168 target->createMCAsmInfo(*mri, this->triple, mcOptions)); 169 mai->setRelaxELFRelocations(true); 170 171 llvm::MCContext ctx(triple, mai.get(), mri.get(), &srcMgr, &mcOptions); 172 std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo( 173 ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); 174 ctx.setObjectFileInfo(mofi.get()); 175 176 SmallString<128> cwd; 177 if (!llvm::sys::fs::current_path(cwd)) 178 ctx.setCompilationDir(cwd); 179 180 std::unique_ptr<llvm::MCStreamer> mcStreamer; 181 std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo()); 182 std::unique_ptr<llvm::MCSubtargetInfo> sti( 183 target->createMCSubtargetInfo(this->triple, this->chip, this->features)); 184 185 llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx); 186 llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); 187 mcStreamer.reset(target->createMCObjectStreamer( 188 triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab), 189 mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce), 190 *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, 191 /*DWARFMustBeAtTheEnd*/ false)); 192 mcStreamer->setUseAssemblerInfoForParsing(true); 193 194 std::unique_ptr<llvm::MCAsmParser> parser( 195 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); 196 std::unique_ptr<llvm::MCTargetAsmParser> tap( 197 target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); 198 199 if (!tap) { 200 emitError(loc, "assembler initialization error"); 201 return {}; 202 } 203 204 parser->setTargetParser(*tap); 205 parser->Run(false); 206 207 return std::make_unique<SmallVector<char, 0>>(std::move(result)); 208 } 209 210 std::unique_ptr<std::vector<char>> 211 SerializeToHsacoPass::createHsaco(const SmallVectorImpl<char> &isaBinary) { 212 auto loc = getOperation().getLoc(); 213 214 // Save the ISA binary to a temp file. 215 int tempIsaBinaryFd = -1; 216 SmallString<128> tempIsaBinaryFilename; 217 if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd, 218 tempIsaBinaryFilename)) { 219 emitError(loc, "temporary file for ISA binary creation error"); 220 return {}; 221 } 222 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); 223 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); 224 tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size()); 225 tempIsaBinaryOs.close(); 226 227 // Create a temp file for HSA code object. 228 int tempHsacoFD = -1; 229 SmallString<128> tempHsacoFilename; 230 if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD, 231 tempHsacoFilename)) { 232 emitError(loc, "temporary file for HSA code object creation error"); 233 return {}; 234 } 235 llvm::FileRemover cleanupHsaco(tempHsacoFilename); 236 237 { 238 static std::mutex mutex; 239 const std::lock_guard<std::mutex> lock(mutex); 240 // Invoke lld. Expect a true return value from lld. 241 if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(), 242 "-o", tempHsacoFilename.c_str()}, 243 /*canEarlyExit=*/false, llvm::outs(), llvm::errs())) { 244 emitError(loc, "lld invocation error"); 245 return {}; 246 } 247 } 248 249 // Load the HSA code object. 250 auto hsacoFile = openInputFile(tempHsacoFilename); 251 if (!hsacoFile) { 252 emitError(loc, "read HSA code object from temp file error"); 253 return {}; 254 } 255 256 StringRef buffer = hsacoFile->getBuffer(); 257 return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end()); 258 } 259 260 std::unique_ptr<std::vector<char>> 261 SerializeToHsacoPass::serializeISA(const std::string &isa) { 262 auto isaBinary = assembleIsa(isa); 263 if (!isaBinary) 264 return {}; 265 return createHsaco(*isaBinary); 266 } 267 268 // Register pass to serialize GPU kernel functions to a HSACO binary annotation. 269 void mlir::registerGpuSerializeToHsacoPass() { 270 PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO( 271 "gpu-to-hsaco", "Lower GPU kernel function to HSACO binary annotations", 272 [] { 273 // Initialize LLVM AMDGPU backend. 274 LLVMInitializeAMDGPUAsmParser(); 275 LLVMInitializeAMDGPUAsmPrinter(); 276 LLVMInitializeAMDGPUTarget(); 277 LLVMInitializeAMDGPUTargetInfo(); 278 LLVMInitializeAMDGPUTargetMC(); 279 280 return std::make_unique<SerializeToHsacoPass>(); 281 }); 282 } 283 #else // MLIR_GPU_TO_HSACO_PASS_ENABLE 284 void mlir::registerGpuSerializeToHsacoPass() {} 285 #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE 286