19cf995beSStanislav Mekhanoshin //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
29cf995beSStanislav Mekhanoshin //
39cf995beSStanislav Mekhanoshin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49cf995beSStanislav Mekhanoshin // See https://llvm.org/LICENSE.txt for license information.
59cf995beSStanislav Mekhanoshin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69cf995beSStanislav Mekhanoshin //
79cf995beSStanislav Mekhanoshin //===----------------------------------------------------------------------===//
89cf995beSStanislav Mekhanoshin //
99cf995beSStanislav Mekhanoshin /// \file This pass recursively promotes generic pointer arguments of a kernel
109cf995beSStanislav Mekhanoshin /// into the global address space.
119cf995beSStanislav Mekhanoshin ///
129cf995beSStanislav Mekhanoshin /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
139cf995beSStanislav Mekhanoshin /// value is a pointer and loaded pointer is unmodified in the kernel before the
149cf995beSStanislav Mekhanoshin /// load, then promote loaded pointer to global. Then recursively continue.
159cf995beSStanislav Mekhanoshin //
169cf995beSStanislav Mekhanoshin //===----------------------------------------------------------------------===//
179cf995beSStanislav Mekhanoshin
189cf995beSStanislav Mekhanoshin #include "AMDGPU.h"
19290e5722SStanislav Mekhanoshin #include "Utils/AMDGPUMemoryUtils.h"
209cf995beSStanislav Mekhanoshin #include "llvm/ADT/SmallVector.h"
21290e5722SStanislav Mekhanoshin #include "llvm/Analysis/AliasAnalysis.h"
229cf995beSStanislav Mekhanoshin #include "llvm/Analysis/MemorySSA.h"
239cf995beSStanislav Mekhanoshin #include "llvm/IR/IRBuilder.h"
249cf995beSStanislav Mekhanoshin #include "llvm/InitializePasses.h"
259cf995beSStanislav Mekhanoshin
269cf995beSStanislav Mekhanoshin #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
279cf995beSStanislav Mekhanoshin
289cf995beSStanislav Mekhanoshin using namespace llvm;
299cf995beSStanislav Mekhanoshin
309cf995beSStanislav Mekhanoshin namespace {
319cf995beSStanislav Mekhanoshin
329cf995beSStanislav Mekhanoshin class AMDGPUPromoteKernelArguments : public FunctionPass {
339cf995beSStanislav Mekhanoshin MemorySSA *MSSA;
349cf995beSStanislav Mekhanoshin
35290e5722SStanislav Mekhanoshin AliasAnalysis *AA;
36290e5722SStanislav Mekhanoshin
379cf995beSStanislav Mekhanoshin Instruction *ArgCastInsertPt;
389cf995beSStanislav Mekhanoshin
399cf995beSStanislav Mekhanoshin SmallVector<Value *> Ptrs;
409cf995beSStanislav Mekhanoshin
419cf995beSStanislav Mekhanoshin void enqueueUsers(Value *Ptr);
429cf995beSStanislav Mekhanoshin
439cf995beSStanislav Mekhanoshin bool promotePointer(Value *Ptr);
449cf995beSStanislav Mekhanoshin
45b0aa1946SStanislav Mekhanoshin bool promoteLoad(LoadInst *LI);
46b0aa1946SStanislav Mekhanoshin
479cf995beSStanislav Mekhanoshin public:
489cf995beSStanislav Mekhanoshin static char ID;
499cf995beSStanislav Mekhanoshin
AMDGPUPromoteKernelArguments()509cf995beSStanislav Mekhanoshin AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
519cf995beSStanislav Mekhanoshin
52290e5722SStanislav Mekhanoshin bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
539cf995beSStanislav Mekhanoshin
549cf995beSStanislav Mekhanoshin bool runOnFunction(Function &F) override;
559cf995beSStanislav Mekhanoshin
getAnalysisUsage(AnalysisUsage & AU) const569cf995beSStanislav Mekhanoshin void getAnalysisUsage(AnalysisUsage &AU) const override {
57290e5722SStanislav Mekhanoshin AU.addRequired<AAResultsWrapperPass>();
589cf995beSStanislav Mekhanoshin AU.addRequired<MemorySSAWrapperPass>();
599cf995beSStanislav Mekhanoshin AU.setPreservesAll();
609cf995beSStanislav Mekhanoshin }
619cf995beSStanislav Mekhanoshin };
629cf995beSStanislav Mekhanoshin
639cf995beSStanislav Mekhanoshin } // end anonymous namespace
649cf995beSStanislav Mekhanoshin
enqueueUsers(Value * Ptr)659cf995beSStanislav Mekhanoshin void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
669cf995beSStanislav Mekhanoshin SmallVector<User *> PtrUsers(Ptr->users());
679cf995beSStanislav Mekhanoshin
689cf995beSStanislav Mekhanoshin while (!PtrUsers.empty()) {
699cf995beSStanislav Mekhanoshin Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
709cf995beSStanislav Mekhanoshin if (!U)
719cf995beSStanislav Mekhanoshin continue;
729cf995beSStanislav Mekhanoshin
739cf995beSStanislav Mekhanoshin switch (U->getOpcode()) {
749cf995beSStanislav Mekhanoshin default:
759cf995beSStanislav Mekhanoshin break;
769cf995beSStanislav Mekhanoshin case Instruction::Load: {
779cf995beSStanislav Mekhanoshin LoadInst *LD = cast<LoadInst>(U);
78b0aa1946SStanislav Mekhanoshin if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
79b0aa1946SStanislav Mekhanoshin !AMDGPU::isClobberedInFunction(LD, MSSA, AA))
809cf995beSStanislav Mekhanoshin Ptrs.push_back(LD);
81b0aa1946SStanislav Mekhanoshin
829cf995beSStanislav Mekhanoshin break;
839cf995beSStanislav Mekhanoshin }
849cf995beSStanislav Mekhanoshin case Instruction::GetElementPtr:
859cf995beSStanislav Mekhanoshin case Instruction::AddrSpaceCast:
869cf995beSStanislav Mekhanoshin case Instruction::BitCast:
879cf995beSStanislav Mekhanoshin if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
889cf995beSStanislav Mekhanoshin PtrUsers.append(U->user_begin(), U->user_end());
899cf995beSStanislav Mekhanoshin break;
909cf995beSStanislav Mekhanoshin }
919cf995beSStanislav Mekhanoshin }
929cf995beSStanislav Mekhanoshin }
939cf995beSStanislav Mekhanoshin
promotePointer(Value * Ptr)949cf995beSStanislav Mekhanoshin bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
95b0aa1946SStanislav Mekhanoshin bool Changed = false;
96b0aa1946SStanislav Mekhanoshin
97b0aa1946SStanislav Mekhanoshin LoadInst *LI = dyn_cast<LoadInst>(Ptr);
98b0aa1946SStanislav Mekhanoshin if (LI)
99b0aa1946SStanislav Mekhanoshin Changed |= promoteLoad(LI);
100b0aa1946SStanislav Mekhanoshin
101b0aa1946SStanislav Mekhanoshin PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
102b0aa1946SStanislav Mekhanoshin if (!PT)
103b0aa1946SStanislav Mekhanoshin return Changed;
104b0aa1946SStanislav Mekhanoshin
105b0aa1946SStanislav Mekhanoshin if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
106b0aa1946SStanislav Mekhanoshin PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
107b0aa1946SStanislav Mekhanoshin PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
1089cf995beSStanislav Mekhanoshin enqueueUsers(Ptr);
1099cf995beSStanislav Mekhanoshin
1109cf995beSStanislav Mekhanoshin if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
111b0aa1946SStanislav Mekhanoshin return Changed;
1129cf995beSStanislav Mekhanoshin
113b0aa1946SStanislav Mekhanoshin IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
114b0aa1946SStanislav Mekhanoshin : ArgCastInsertPt);
1159cf995beSStanislav Mekhanoshin
1169cf995beSStanislav Mekhanoshin // Cast pointer to global address space and back to flat and let
1179cf995beSStanislav Mekhanoshin // Infer Address Spaces pass to do all necessary rewriting.
1189cf995beSStanislav Mekhanoshin PointerType *NewPT =
1199cf995beSStanislav Mekhanoshin PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS);
1209cf995beSStanislav Mekhanoshin Value *Cast =
1219cf995beSStanislav Mekhanoshin B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
1229cf995beSStanislav Mekhanoshin Value *CastBack =
1239cf995beSStanislav Mekhanoshin B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
1249cf995beSStanislav Mekhanoshin Ptr->replaceUsesWithIf(CastBack,
1259cf995beSStanislav Mekhanoshin [Cast](Use &U) { return U.getUser() != Cast; });
1269cf995beSStanislav Mekhanoshin
1279cf995beSStanislav Mekhanoshin return true;
1289cf995beSStanislav Mekhanoshin }
1299cf995beSStanislav Mekhanoshin
promoteLoad(LoadInst * LI)130b0aa1946SStanislav Mekhanoshin bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
131b0aa1946SStanislav Mekhanoshin if (!LI->isSimple())
132b0aa1946SStanislav Mekhanoshin return false;
133b0aa1946SStanislav Mekhanoshin
134*9eabea39SStanislav Mekhanoshin LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
135b0aa1946SStanislav Mekhanoshin return true;
136b0aa1946SStanislav Mekhanoshin }
137b0aa1946SStanislav Mekhanoshin
1389cf995beSStanislav Mekhanoshin // skip allocas
getInsertPt(BasicBlock & BB)1399cf995beSStanislav Mekhanoshin static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
1409cf995beSStanislav Mekhanoshin BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
1419cf995beSStanislav Mekhanoshin for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
1429cf995beSStanislav Mekhanoshin AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
1439cf995beSStanislav Mekhanoshin
1449cf995beSStanislav Mekhanoshin // If this is a dynamic alloca, the value may depend on the loaded kernargs,
1459cf995beSStanislav Mekhanoshin // so loads will need to be inserted before it.
1469cf995beSStanislav Mekhanoshin if (!AI || !AI->isStaticAlloca())
1479cf995beSStanislav Mekhanoshin break;
1489cf995beSStanislav Mekhanoshin }
1499cf995beSStanislav Mekhanoshin
1509cf995beSStanislav Mekhanoshin return InsPt;
1519cf995beSStanislav Mekhanoshin }
1529cf995beSStanislav Mekhanoshin
run(Function & F,MemorySSA & MSSA,AliasAnalysis & AA)153290e5722SStanislav Mekhanoshin bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
154290e5722SStanislav Mekhanoshin AliasAnalysis &AA) {
1559cf995beSStanislav Mekhanoshin if (skipFunction(F))
1569cf995beSStanislav Mekhanoshin return false;
1579cf995beSStanislav Mekhanoshin
1589cf995beSStanislav Mekhanoshin CallingConv::ID CC = F.getCallingConv();
1599cf995beSStanislav Mekhanoshin if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
1609cf995beSStanislav Mekhanoshin return false;
1619cf995beSStanislav Mekhanoshin
1629cf995beSStanislav Mekhanoshin ArgCastInsertPt = &*getInsertPt(*F.begin());
1639cf995beSStanislav Mekhanoshin this->MSSA = &MSSA;
164290e5722SStanislav Mekhanoshin this->AA = &AA;
1659cf995beSStanislav Mekhanoshin
1669cf995beSStanislav Mekhanoshin for (Argument &Arg : F.args()) {
1679cf995beSStanislav Mekhanoshin if (Arg.use_empty())
1689cf995beSStanislav Mekhanoshin continue;
1699cf995beSStanislav Mekhanoshin
1709cf995beSStanislav Mekhanoshin PointerType *PT = dyn_cast<PointerType>(Arg.getType());
1719cf995beSStanislav Mekhanoshin if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
1729cf995beSStanislav Mekhanoshin PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
1739cf995beSStanislav Mekhanoshin PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
1749cf995beSStanislav Mekhanoshin continue;
1759cf995beSStanislav Mekhanoshin
1769cf995beSStanislav Mekhanoshin Ptrs.push_back(&Arg);
1779cf995beSStanislav Mekhanoshin }
1789cf995beSStanislav Mekhanoshin
1799cf995beSStanislav Mekhanoshin bool Changed = false;
1809cf995beSStanislav Mekhanoshin while (!Ptrs.empty()) {
1819cf995beSStanislav Mekhanoshin Value *Ptr = Ptrs.pop_back_val();
1829cf995beSStanislav Mekhanoshin Changed |= promotePointer(Ptr);
1839cf995beSStanislav Mekhanoshin }
1849cf995beSStanislav Mekhanoshin
1859cf995beSStanislav Mekhanoshin return Changed;
1869cf995beSStanislav Mekhanoshin }
1879cf995beSStanislav Mekhanoshin
runOnFunction(Function & F)1889cf995beSStanislav Mekhanoshin bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
1899cf995beSStanislav Mekhanoshin MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
190290e5722SStanislav Mekhanoshin AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
191290e5722SStanislav Mekhanoshin return run(F, MSSA, AA);
1929cf995beSStanislav Mekhanoshin }
1939cf995beSStanislav Mekhanoshin
1949cf995beSStanislav Mekhanoshin INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
1959cf995beSStanislav Mekhanoshin "AMDGPU Promote Kernel Arguments", false, false)
196290e5722SStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1979cf995beSStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1989cf995beSStanislav Mekhanoshin INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
1999cf995beSStanislav Mekhanoshin "AMDGPU Promote Kernel Arguments", false, false)
2009cf995beSStanislav Mekhanoshin
2019cf995beSStanislav Mekhanoshin char AMDGPUPromoteKernelArguments::ID = 0;
2029cf995beSStanislav Mekhanoshin
createAMDGPUPromoteKernelArgumentsPass()2039cf995beSStanislav Mekhanoshin FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
2049cf995beSStanislav Mekhanoshin return new AMDGPUPromoteKernelArguments();
2059cf995beSStanislav Mekhanoshin }
2069cf995beSStanislav Mekhanoshin
2079cf995beSStanislav Mekhanoshin PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)2089cf995beSStanislav Mekhanoshin AMDGPUPromoteKernelArgumentsPass::run(Function &F,
2099cf995beSStanislav Mekhanoshin FunctionAnalysisManager &AM) {
2109cf995beSStanislav Mekhanoshin MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
211290e5722SStanislav Mekhanoshin AliasAnalysis &AA = AM.getResult<AAManager>(F);
212290e5722SStanislav Mekhanoshin if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
2139cf995beSStanislav Mekhanoshin PreservedAnalyses PA;
2149cf995beSStanislav Mekhanoshin PA.preserveSet<CFGAnalyses>();
2159cf995beSStanislav Mekhanoshin PA.preserve<MemorySSAAnalysis>();
2169cf995beSStanislav Mekhanoshin return PA;
2179cf995beSStanislav Mekhanoshin }
2189cf995beSStanislav Mekhanoshin return PreservedAnalyses::all();
2199cf995beSStanislav Mekhanoshin }
220