1 //===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a pass that serializes a gpu module into HSAco blob and 10 // adds that blob as a string attribute of the module. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "mlir/Dialect/GPU/Passes.h" 14 #include "mlir/IR/Location.h" 15 #include "mlir/IR/MLIRContext.h" 16 17 #if MLIR_GPU_TO_HSACO_PASS_ENABLE 18 #include "mlir/ExecutionEngine/OptUtils.h" 19 #include "mlir/Pass/Pass.h" 20 #include "mlir/Support/FileUtilities.h" 21 #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" 22 #include "mlir/Target/LLVMIR/Export.h" 23 24 #include "llvm/MC/MCAsmBackend.h" 25 #include "llvm/MC/MCAsmInfo.h" 26 #include "llvm/MC/MCCodeEmitter.h" 27 #include "llvm/MC/MCContext.h" 28 #include "llvm/MC/MCObjectFileInfo.h" 29 #include "llvm/MC/MCObjectWriter.h" 30 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 31 #include "llvm/MC/MCStreamer.h" 32 #include "llvm/MC/MCSubtargetInfo.h" 33 #include "llvm/MC/TargetRegistry.h" 34 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/FileUtilities.h" 37 #include "llvm/Support/Program.h" 38 #include "llvm/Support/SourceMgr.h" 39 #include "llvm/Support/TargetSelect.h" 40 #include "llvm/Support/WithColor.h" 41 42 #include "llvm/Target/TargetMachine.h" 43 #include "llvm/Target/TargetOptions.h" 44 45 #include "lld/Common/Driver.h" 46 47 #include <mutex> 48 49 using namespace mlir; 50 51 namespace { 52 class SerializeToHsacoPass 53 : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> { 54 public: 55 SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, 56 int optLevel); 57 SerializeToHsacoPass(const SerializeToHsacoPass &other); 58 StringRef getArgument() const override { return "gpu-to-hsaco"; } 59 StringRef getDescription() const override { 60 return "Lower GPU kernel function to HSACO binary annotations"; 61 } 62 63 protected: 64 Option<int> optLevel{ 65 *this, "opt-level", 66 llvm::cl::desc("Optimization level for HSACO compilation"), 67 llvm::cl::init(2)}; 68 69 Option<std::string> rocmPath{*this, "rocm-path", 70 llvm::cl::desc("Path to ROCm install")}; 71 72 /// Adds LLVM optimization passes 73 LogicalResult optimizeLlvm(llvm::Module &llvmModule, 74 llvm::TargetMachine &targetMachine) override; 75 76 private: 77 void getDependentDialects(DialectRegistry ®istry) const override; 78 79 // Serializes ROCDL to HSACO. 80 std::unique_ptr<std::vector<char>> 81 serializeISA(const std::string &isa) override; 82 83 std::unique_ptr<SmallVectorImpl<char>> assembleIsa(const std::string &isa); 84 std::unique_ptr<std::vector<char>> 85 createHsaco(const SmallVectorImpl<char> &isaBinary); 86 87 std::string getRocmPath(); 88 }; 89 } // end namespace 90 91 SerializeToHsacoPass::SerializeToHsacoPass(const SerializeToHsacoPass &other) 92 : PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass>(other) {} 93 94 /// Get a user-specified path to ROCm 95 // Tries, in order, the --rocm-path option, the ROCM_PATH environment variable 96 // and a compile-time default 97 std::string SerializeToHsacoPass::getRocmPath() { 98 if (rocmPath.getNumOccurrences() > 0) 99 return rocmPath.getValue(); 100 101 return __DEFAULT_ROCM_PATH__; 102 } 103 104 // Sets the 'option' to 'value' unless it already has a value. 105 static void maybeSetOption(Pass::Option<std::string> &option, 106 function_ref<std::string()> getValue) { 107 if (!option.hasValue()) 108 option = getValue(); 109 } 110 111 SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch, 112 StringRef features, int optLevel) { 113 maybeSetOption(this->triple, [&triple] { return triple.str(); }); 114 maybeSetOption(this->chip, [&arch] { return arch.str(); }); 115 maybeSetOption(this->features, [&features] { return features.str(); }); 116 if (this->optLevel.getNumOccurrences() == 0) 117 this->optLevel.setValue(optLevel); 118 } 119 120 void SerializeToHsacoPass::getDependentDialects( 121 DialectRegistry ®istry) const { 122 registerROCDLDialectTranslation(registry); 123 gpu::SerializeToBlobPass::getDependentDialects(registry); 124 } 125 126 LogicalResult 127 SerializeToHsacoPass::optimizeLlvm(llvm::Module &llvmModule, 128 llvm::TargetMachine &targetMachine) { 129 int optLevel = this->optLevel.getValue(); 130 if (optLevel < 0 || optLevel > 3) 131 return getOperation().emitError() 132 << "Invalid HSA optimization level" << optLevel << "\n"; 133 134 targetMachine.setOptLevel(static_cast<llvm::CodeGenOpt::Level>(optLevel)); 135 136 auto transformer = 137 makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); 138 auto error = transformer(&llvmModule); 139 if (error) { 140 InFlightDiagnostic mlirError = getOperation()->emitError(); 141 llvm::handleAllErrors( 142 std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) { 143 mlirError << "Could not optimize LLVM IR: " << ei.message() << "\n"; 144 }); 145 return mlirError; 146 } 147 return success(); 148 } 149 150 std::unique_ptr<SmallVectorImpl<char>> 151 SerializeToHsacoPass::assembleIsa(const std::string &isa) { 152 auto loc = getOperation().getLoc(); 153 154 SmallVector<char, 0> result; 155 llvm::raw_svector_ostream os(result); 156 157 llvm::Triple triple(llvm::Triple::normalize(this->triple)); 158 std::string error; 159 const llvm::Target *target = 160 llvm::TargetRegistry::lookupTarget(triple.normalize(), error); 161 if (!target) { 162 emitError(loc, Twine("failed to lookup target: ") + error); 163 return {}; 164 } 165 166 llvm::SourceMgr srcMgr; 167 srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), 168 llvm::SMLoc()); 169 170 const llvm::MCTargetOptions mcOptions; 171 std::unique_ptr<llvm::MCRegisterInfo> mri( 172 target->createMCRegInfo(this->triple)); 173 std::unique_ptr<llvm::MCAsmInfo> mai( 174 target->createMCAsmInfo(*mri, this->triple, mcOptions)); 175 mai->setRelaxELFRelocations(true); 176 std::unique_ptr<llvm::MCSubtargetInfo> sti( 177 target->createMCSubtargetInfo(this->triple, this->chip, this->features)); 178 179 llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, 180 &mcOptions); 181 std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo( 182 ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); 183 ctx.setObjectFileInfo(mofi.get()); 184 185 SmallString<128> cwd; 186 if (!llvm::sys::fs::current_path(cwd)) 187 ctx.setCompilationDir(cwd); 188 189 std::unique_ptr<llvm::MCStreamer> mcStreamer; 190 std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo()); 191 192 llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx); 193 llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); 194 mcStreamer.reset(target->createMCObjectStreamer( 195 triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab), 196 mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce), 197 *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, 198 /*DWARFMustBeAtTheEnd*/ false)); 199 mcStreamer->setUseAssemblerInfoForParsing(true); 200 201 std::unique_ptr<llvm::MCAsmParser> parser( 202 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); 203 std::unique_ptr<llvm::MCTargetAsmParser> tap( 204 target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); 205 206 if (!tap) { 207 emitError(loc, "assembler initialization error"); 208 return {}; 209 } 210 211 parser->setTargetParser(*tap); 212 parser->Run(false); 213 214 return std::make_unique<SmallVector<char, 0>>(std::move(result)); 215 } 216 217 std::unique_ptr<std::vector<char>> 218 SerializeToHsacoPass::createHsaco(const SmallVectorImpl<char> &isaBinary) { 219 auto loc = getOperation().getLoc(); 220 221 // Save the ISA binary to a temp file. 222 int tempIsaBinaryFd = -1; 223 SmallString<128> tempIsaBinaryFilename; 224 if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd, 225 tempIsaBinaryFilename)) { 226 emitError(loc, "temporary file for ISA binary creation error"); 227 return {}; 228 } 229 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); 230 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); 231 tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size()); 232 tempIsaBinaryOs.close(); 233 234 // Create a temp file for HSA code object. 235 int tempHsacoFD = -1; 236 SmallString<128> tempHsacoFilename; 237 if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD, 238 tempHsacoFilename)) { 239 emitError(loc, "temporary file for HSA code object creation error"); 240 return {}; 241 } 242 llvm::FileRemover cleanupHsaco(tempHsacoFilename); 243 244 { 245 static std::mutex mutex; 246 const std::lock_guard<std::mutex> lock(mutex); 247 // Invoke lld. Expect a true return value from lld. 248 if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(), 249 "-o", tempHsacoFilename.c_str()}, 250 /*canEarlyExit=*/false, llvm::outs(), llvm::errs())) { 251 emitError(loc, "lld invocation error"); 252 return {}; 253 } 254 } 255 256 // Load the HSA code object. 257 auto hsacoFile = openInputFile(tempHsacoFilename); 258 if (!hsacoFile) { 259 emitError(loc, "read HSA code object from temp file error"); 260 return {}; 261 } 262 263 StringRef buffer = hsacoFile->getBuffer(); 264 return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end()); 265 } 266 267 std::unique_ptr<std::vector<char>> 268 SerializeToHsacoPass::serializeISA(const std::string &isa) { 269 auto isaBinary = assembleIsa(isa); 270 if (!isaBinary) 271 return {}; 272 return createHsaco(*isaBinary); 273 } 274 275 // Register pass to serialize GPU kernel functions to a HSACO binary annotation. 276 void mlir::registerGpuSerializeToHsacoPass() { 277 PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO( 278 [] { 279 // Initialize LLVM AMDGPU backend. 280 LLVMInitializeAMDGPUAsmParser(); 281 LLVMInitializeAMDGPUAsmPrinter(); 282 LLVMInitializeAMDGPUTarget(); 283 LLVMInitializeAMDGPUTargetInfo(); 284 LLVMInitializeAMDGPUTargetMC(); 285 286 return std::make_unique<SerializeToHsacoPass>("amdgcn-amd-amdhsa", "", 287 "", 2); 288 }); 289 } 290 #else // MLIR_GPU_TO_HSACO_PASS_ENABLE 291 void mlir::registerGpuSerializeToHsacoPass() {} 292 #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE 293