1 //===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a pass that serializes a gpu module into HSAco blob and 10 // adds that blob as a string attribute of the module. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Dialect/GPU/Passes.h" 14 #include "mlir/IR/Location.h" 15 #include "mlir/IR/MLIRContext.h" 16 17 #if MLIR_GPU_TO_HSACO_PASS_ENABLE 18 #include "mlir/ExecutionEngine/OptUtils.h" 19 #include "mlir/Pass/Pass.h" 20 #include "mlir/Support/FileUtilities.h" 21 #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" 22 #include "mlir/Target/LLVMIR/Export.h" 23 24 #include "llvm/MC/MCAsmBackend.h" 25 #include "llvm/MC/MCAsmInfo.h" 26 #include "llvm/MC/MCCodeEmitter.h" 27 #include "llvm/MC/MCContext.h" 28 #include "llvm/MC/MCObjectFileInfo.h" 29 #include "llvm/MC/MCObjectWriter.h" 30 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 31 #include "llvm/MC/MCStreamer.h" 32 #include "llvm/MC/MCSubtargetInfo.h" 33 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/FileUtilities.h" 36 #include "llvm/Support/LineIterator.h" 37 #include "llvm/Support/Program.h" 38 #include "llvm/Support/SourceMgr.h" 39 #include "llvm/Support/TargetSelect.h" 40 #include "llvm/Support/WithColor.h" 41 42 #include "llvm/Target/TargetMachine.h" 43 #include "llvm/Target/TargetOptions.h" 44 45 #include "lld/Common/Driver.h" 46 47 #include "hip/hip_version.h" 48 49 #include <mutex> 50 51 using namespace mlir; 52 53 namespace { 54 class SerializeToHsacoPass 55 : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> { 56 public: 57 SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, 58 int optLevel); 59 SerializeToHsacoPass(const SerializeToHsacoPass &other); 60 StringRef getArgument() const override { return "gpu-to-hsaco"; } 61 StringRef getDescription() const override { 62 return "Lower GPU kernel function to HSACO binary annotations"; 63 } 64 65 protected: 66 Option<int> optLevel{ 67 *this, "opt-level", 68 llvm::cl::desc("Optimization level for HSACO compilation"), 69 llvm::cl::init(2)}; 70 71 /// Adds LLVM optimization passes 72 LogicalResult optimizeLlvm(llvm::Module &llvmModule, 73 llvm::TargetMachine &targetMachine) override; 74 75 private: 76 void getDependentDialects(DialectRegistry ®istry) const override; 77 78 // Serializes ROCDL to HSACO. 79 std::unique_ptr<std::vector<char>> 80 serializeISA(const std::string &isa) override; 81 82 std::unique_ptr<SmallVectorImpl<char>> assembleIsa(const std::string &isa); 83 std::unique_ptr<std::vector<char>> 84 createHsaco(const SmallVectorImpl<char> &isaBinary); 85 }; 86 } // namespace 87 88 SerializeToHsacoPass::SerializeToHsacoPass(const SerializeToHsacoPass &other) 89 : PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass>(other) {} 90 static std::string getDefaultChip() { 91 const char kDefaultChip[] = "gfx900"; 92 93 // Locate rocm_agent_enumerator. 94 const char kRocmAgentEnumerator[] = "rocm_agent_enumerator"; 95 llvm::ErrorOr<std::string> rocmAgentEnumerator = llvm::sys::findProgramByName( 96 kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"}); 97 if (!rocmAgentEnumerator) { 98 llvm::WithColor::warning(llvm::errs()) 99 << kRocmAgentEnumerator << "couldn't be located under " << __ROCM_PATH__ 100 << "/bin\n"; 101 return kDefaultChip; 102 } 103 104 // Prepare temp file to hold the outputs. 105 int tempFd = -1; 106 SmallString<128> tempFilename; 107 if (llvm::sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd, 108 tempFilename)) { 109 llvm::WithColor::warning(llvm::errs()) 110 << "temporary file for " << kRocmAgentEnumerator << " creation error\n"; 111 return kDefaultChip; 112 } 113 llvm::FileRemover cleanup(tempFilename); 114 115 // Invoke rocm_agent_enumerator. 116 std::string errorMessage; 117 SmallVector<StringRef, 2> args{"-t", "GPU"}; 118 Optional<StringRef> redirects[3] = {{""}, tempFilename.str(), {""}}; 119 int result = 120 llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None, 121 redirects, 0, 0, &errorMessage); 122 if (result) { 123 llvm::WithColor::warning(llvm::errs()) 124 << kRocmAgentEnumerator << " invocation error: " << errorMessage 125 << "\n"; 126 return kDefaultChip; 127 } 128 129 // Load and parse the result. 130 auto gfxIsaList = openInputFile(tempFilename); 131 if (!gfxIsaList) { 132 llvm::WithColor::error(llvm::errs()) 133 << "read ROCm agent list temp file error\n"; 134 return kDefaultChip; 135 } 136 for (llvm::line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) { 137 // Skip the line with content "gfx000". 138 if (*lines == "gfx000") 139 continue; 140 // Use the first ISA version found. 141 return lines->str(); 142 } 143 144 return kDefaultChip; 145 } 146 147 // Sets the 'option' to 'value' unless it already has a value. 148 static void maybeSetOption(Pass::Option<std::string> &option, 149 function_ref<std::string()> getValue) { 150 if (!option.hasValue()) 151 option = getValue(); 152 } 153 154 SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch, 155 StringRef features, int optLevel) { 156 maybeSetOption(this->triple, [&triple] { return triple.str(); }); 157 maybeSetOption(this->chip, [&arch] { return arch.str(); }); 158 maybeSetOption(this->features, [&features] { return features.str(); }); 159 if (this->optLevel.getNumOccurrences() == 0) 160 this->optLevel.setValue(optLevel); 161 } 162 163 void SerializeToHsacoPass::getDependentDialects( 164 DialectRegistry ®istry) const { 165 registerROCDLDialectTranslation(registry); 166 gpu::SerializeToBlobPass::getDependentDialects(registry); 167 } 168 169 LogicalResult 170 SerializeToHsacoPass::optimizeLlvm(llvm::Module &llvmModule, 171 llvm::TargetMachine &targetMachine) { 172 int optLevel = this->optLevel.getValue(); 173 if (optLevel < 0 || optLevel > 3) 174 return getOperation().emitError() 175 << "Invalid HSA optimization level" << optLevel << "\n"; 176 177 targetMachine.setOptLevel(static_cast<llvm::CodeGenOpt::Level>(optLevel)); 178 179 auto transformer = 180 makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); 181 auto error = transformer(&llvmModule); 182 if (error) { 183 InFlightDiagnostic mlirError = getOperation()->emitError(); 184 llvm::handleAllErrors( 185 std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) { 186 mlirError << "Could not optimize LLVM IR: " << ei.message() << "\n"; 187 }); 188 return mlirError; 189 } 190 return success(); 191 } 192 193 std::unique_ptr<SmallVectorImpl<char>> 194 SerializeToHsacoPass::assembleIsa(const std::string &isa) { 195 auto loc = getOperation().getLoc(); 196 197 SmallVector<char, 0> result; 198 llvm::raw_svector_ostream os(result); 199 200 llvm::Triple triple(llvm::Triple::normalize(this->triple)); 201 std::string error; 202 const llvm::Target *target = 203 llvm::TargetRegistry::lookupTarget(triple.normalize(), error); 204 if (!target) { 205 emitError(loc, Twine("failed to lookup target: ") + error); 206 return {}; 207 } 208 209 llvm::SourceMgr srcMgr; 210 srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), 211 llvm::SMLoc()); 212 213 const llvm::MCTargetOptions mcOptions; 214 std::unique_ptr<llvm::MCRegisterInfo> mri( 215 target->createMCRegInfo(this->triple)); 216 std::unique_ptr<llvm::MCAsmInfo> mai( 217 target->createMCAsmInfo(*mri, this->triple, mcOptions)); 218 mai->setRelaxELFRelocations(true); 219 std::unique_ptr<llvm::MCSubtargetInfo> sti( 220 target->createMCSubtargetInfo(this->triple, this->chip, this->features)); 221 222 llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, 223 &mcOptions); 224 std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo( 225 ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); 226 ctx.setObjectFileInfo(mofi.get()); 227 228 SmallString<128> cwd; 229 if (!llvm::sys::fs::current_path(cwd)) 230 ctx.setCompilationDir(cwd); 231 232 std::unique_ptr<llvm::MCStreamer> mcStreamer; 233 std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo()); 234 235 llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx); 236 llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); 237 mcStreamer.reset(target->createMCObjectStreamer( 238 triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab), 239 mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce), 240 *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, 241 /*DWARFMustBeAtTheEnd*/ false)); 242 mcStreamer->setUseAssemblerInfoForParsing(true); 243 244 std::unique_ptr<llvm::MCAsmParser> parser( 245 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); 246 std::unique_ptr<llvm::MCTargetAsmParser> tap( 247 target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); 248 249 if (!tap) { 250 emitError(loc, "assembler initialization error"); 251 return {}; 252 } 253 254 parser->setTargetParser(*tap); 255 parser->Run(false); 256 257 return std::make_unique<SmallVector<char, 0>>(std::move(result)); 258 } 259 260 std::unique_ptr<std::vector<char>> 261 SerializeToHsacoPass::createHsaco(const SmallVectorImpl<char> &isaBinary) { 262 auto loc = getOperation().getLoc(); 263 264 // Save the ISA binary to a temp file. 265 int tempIsaBinaryFd = -1; 266 SmallString<128> tempIsaBinaryFilename; 267 if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd, 268 tempIsaBinaryFilename)) { 269 emitError(loc, "temporary file for ISA binary creation error"); 270 return {}; 271 } 272 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); 273 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); 274 tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size()); 275 tempIsaBinaryOs.close(); 276 277 // Create a temp file for HSA code object. 278 int tempHsacoFD = -1; 279 SmallString<128> tempHsacoFilename; 280 if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD, 281 tempHsacoFilename)) { 282 emitError(loc, "temporary file for HSA code object creation error"); 283 return {}; 284 } 285 llvm::FileRemover cleanupHsaco(tempHsacoFilename); 286 287 { 288 static std::mutex mutex; 289 const std::lock_guard<std::mutex> lock(mutex); 290 // Invoke lld. Expect a true return value from lld. 291 if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(), 292 "-o", tempHsacoFilename.c_str()}, 293 /*canEarlyExit=*/false, llvm::outs(), llvm::errs())) { 294 emitError(loc, "lld invocation error"); 295 return {}; 296 } 297 } 298 299 // Load the HSA code object. 300 auto hsacoFile = openInputFile(tempHsacoFilename); 301 if (!hsacoFile) { 302 emitError(loc, "read HSA code object from temp file error"); 303 return {}; 304 } 305 306 StringRef buffer = hsacoFile->getBuffer(); 307 return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end()); 308 } 309 310 std::unique_ptr<std::vector<char>> 311 SerializeToHsacoPass::serializeISA(const std::string &isa) { 312 auto isaBinary = assembleIsa(isa); 313 if (!isaBinary) 314 return {}; 315 return createHsaco(*isaBinary); 316 } 317 318 // Register pass to serialize GPU kernel functions to a HSACO binary annotation. 319 void mlir::registerGpuSerializeToHsacoPass() { 320 PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO( 321 [] { 322 // Initialize LLVM AMDGPU backend. 323 LLVMInitializeAMDGPUAsmParser(); 324 LLVMInitializeAMDGPUAsmPrinter(); 325 LLVMInitializeAMDGPUTarget(); 326 LLVMInitializeAMDGPUTargetInfo(); 327 LLVMInitializeAMDGPUTargetMC(); 328 329 return std::make_unique<SerializeToHsacoPass>("amdgcn-amd-amdhsa", "", 330 "", 2); 331 }); 332 } 333 #else // MLIR_GPU_TO_HSACO_PASS_ENABLE 334 void mlir::registerGpuSerializeToHsacoPass() {} 335 #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE 336