//===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a pass that serializes a gpu module into HSAco blob and // adds that blob as a string attribute of the module. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/GPU/Passes.h" #include "mlir/IR/Location.h" #include "mlir/IR/MLIRContext.h" #if MLIR_GPU_TO_HSACO_PASS_ENABLE #include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Program.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/WithColor.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "lld/Common/Driver.h" #include using namespace mlir; namespace { class SerializeToHsacoPass : public PassWrapper { public: SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, int optLevel); SerializeToHsacoPass(const SerializeToHsacoPass &other); StringRef getArgument() const override { return "gpu-to-hsaco"; } StringRef getDescription() const override { return "Lower GPU kernel function to HSACO binary annotations"; } protected: Option optLevel{ *this, "opt-level", llvm::cl::desc("Optimization level for HSACO compilation"), llvm::cl::init(2)}; Option rocmPath{*this, "rocm-path", llvm::cl::desc("Path to ROCm install")}; /// Adds LLVM optimization passes LogicalResult optimizeLlvm(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine) override; private: void getDependentDialects(DialectRegistry ®istry) const override; // Serializes ROCDL to HSACO. std::unique_ptr> serializeISA(const std::string &isa) override; std::unique_ptr> assembleIsa(const std::string &isa); std::unique_ptr> createHsaco(const SmallVectorImpl &isaBinary); std::string getRocmPath(); }; } // end namespace SerializeToHsacoPass::SerializeToHsacoPass(const SerializeToHsacoPass &other) : PassWrapper(other) {} /// Get a user-specified path to ROCm // Tries, in order, the --rocm-path option, the ROCM_PATH environment variable // and a compile-time default std::string SerializeToHsacoPass::getRocmPath() { if (rocmPath.getNumOccurrences() > 0) return rocmPath.getValue(); return __DEFAULT_ROCM_PATH__; } // Sets the 'option' to 'value' unless it already has a value. static void maybeSetOption(Pass::Option &option, function_ref getValue) { if (!option.hasValue()) option = getValue(); } SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, int optLevel) { maybeSetOption(this->triple, [&triple] { return triple.str(); }); maybeSetOption(this->chip, [&arch] { return arch.str(); }); maybeSetOption(this->features, [&features] { return features.str(); }); if (this->optLevel.getNumOccurrences() == 0) this->optLevel.setValue(optLevel); } void SerializeToHsacoPass::getDependentDialects( DialectRegistry ®istry) const { registerROCDLDialectTranslation(registry); gpu::SerializeToBlobPass::getDependentDialects(registry); } LogicalResult SerializeToHsacoPass::optimizeLlvm(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine) { int optLevel = this->optLevel.getValue(); if (optLevel < 0 || optLevel > 3) return getOperation().emitError() << "Invalid HSA optimization level" << optLevel << "\n"; targetMachine.setOptLevel(static_cast(optLevel)); auto transformer = makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); auto error = transformer(&llvmModule); if (error) { InFlightDiagnostic mlirError = getOperation()->emitError(); llvm::handleAllErrors( std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) { mlirError << "Could not optimize LLVM IR: " << ei.message() << "\n"; }); return mlirError; } return success(); } std::unique_ptr> SerializeToHsacoPass::assembleIsa(const std::string &isa) { auto loc = getOperation().getLoc(); SmallVector result; llvm::raw_svector_ostream os(result); llvm::Triple triple(llvm::Triple::normalize(this->triple)); std::string error; const llvm::Target *target = llvm::TargetRegistry::lookupTarget(triple.normalize(), error); if (!target) { emitError(loc, Twine("failed to lookup target: ") + error); return {}; } llvm::SourceMgr srcMgr; srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), llvm::SMLoc()); const llvm::MCTargetOptions mcOptions; std::unique_ptr mri( target->createMCRegInfo(this->triple)); std::unique_ptr mai( target->createMCAsmInfo(*mri, this->triple, mcOptions)); mai->setRelaxELFRelocations(true); std::unique_ptr sti( target->createMCSubtargetInfo(this->triple, this->chip, this->features)); llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, &mcOptions); std::unique_ptr mofi(target->createMCObjectFileInfo( ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); ctx.setObjectFileInfo(mofi.get()); SmallString<128> cwd; if (!llvm::sys::fs::current_path(cwd)) ctx.setCompilationDir(cwd); std::unique_ptr mcStreamer; std::unique_ptr mcii(target->createMCInstrInfo()); llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx); llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); mcStreamer.reset(target->createMCObjectStreamer( triple, ctx, std::unique_ptr(mab), mab->createObjectWriter(os), std::unique_ptr(ce), *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ false)); mcStreamer->setUseAssemblerInfoForParsing(true); std::unique_ptr parser( createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); std::unique_ptr tap( target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); if (!tap) { emitError(loc, "assembler initialization error"); return {}; } parser->setTargetParser(*tap); parser->Run(false); return std::make_unique>(std::move(result)); } std::unique_ptr> SerializeToHsacoPass::createHsaco(const SmallVectorImpl &isaBinary) { auto loc = getOperation().getLoc(); // Save the ISA binary to a temp file. int tempIsaBinaryFd = -1; SmallString<128> tempIsaBinaryFilename; if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd, tempIsaBinaryFilename)) { emitError(loc, "temporary file for ISA binary creation error"); return {}; } llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size()); tempIsaBinaryOs.close(); // Create a temp file for HSA code object. int tempHsacoFD = -1; SmallString<128> tempHsacoFilename; if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD, tempHsacoFilename)) { emitError(loc, "temporary file for HSA code object creation error"); return {}; } llvm::FileRemover cleanupHsaco(tempHsacoFilename); { static std::mutex mutex; const std::lock_guard lock(mutex); // Invoke lld. Expect a true return value from lld. if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(), "-o", tempHsacoFilename.c_str()}, /*canEarlyExit=*/false, llvm::outs(), llvm::errs())) { emitError(loc, "lld invocation error"); return {}; } } // Load the HSA code object. auto hsacoFile = openInputFile(tempHsacoFilename); if (!hsacoFile) { emitError(loc, "read HSA code object from temp file error"); return {}; } StringRef buffer = hsacoFile->getBuffer(); return std::make_unique>(buffer.begin(), buffer.end()); } std::unique_ptr> SerializeToHsacoPass::serializeISA(const std::string &isa) { auto isaBinary = assembleIsa(isa); if (!isaBinary) return {}; return createHsaco(*isaBinary); } // Register pass to serialize GPU kernel functions to a HSACO binary annotation. void mlir::registerGpuSerializeToHsacoPass() { PassRegistration registerSerializeToHSACO( [] { // Initialize LLVM AMDGPU backend. LLVMInitializeAMDGPUAsmParser(); LLVMInitializeAMDGPUAsmPrinter(); LLVMInitializeAMDGPUTarget(); LLVMInitializeAMDGPUTargetInfo(); LLVMInitializeAMDGPUTargetMC(); return std::make_unique("amdgcn-amd-amdhsa", "", "", 2); }); } #else // MLIR_GPU_TO_HSACO_PASS_ENABLE void mlir::registerGpuSerializeToHsacoPass() {} #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE