19cf995beSStanislav Mekhanoshin //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
29cf995beSStanislav Mekhanoshin //
39cf995beSStanislav Mekhanoshin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49cf995beSStanislav Mekhanoshin // See https://llvm.org/LICENSE.txt for license information.
59cf995beSStanislav Mekhanoshin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69cf995beSStanislav Mekhanoshin //
79cf995beSStanislav Mekhanoshin //===----------------------------------------------------------------------===//
89cf995beSStanislav Mekhanoshin //
99cf995beSStanislav Mekhanoshin /// \file This pass recursively promotes generic pointer arguments of a kernel
109cf995beSStanislav Mekhanoshin /// into the global address space.
119cf995beSStanislav Mekhanoshin ///
129cf995beSStanislav Mekhanoshin /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
139cf995beSStanislav Mekhanoshin /// value is a pointer and loaded pointer is unmodified in the kernel before the
149cf995beSStanislav Mekhanoshin /// load, then promote loaded pointer to global. Then recursively continue.
159cf995beSStanislav Mekhanoshin //
169cf995beSStanislav Mekhanoshin //===----------------------------------------------------------------------===//
179cf995beSStanislav Mekhanoshin 
189cf995beSStanislav Mekhanoshin #include "AMDGPU.h"
19290e5722SStanislav Mekhanoshin #include "Utils/AMDGPUMemoryUtils.h"
209cf995beSStanislav Mekhanoshin #include "llvm/ADT/SmallVector.h"
21290e5722SStanislav Mekhanoshin #include "llvm/Analysis/AliasAnalysis.h"
229cf995beSStanislav Mekhanoshin #include "llvm/Analysis/MemorySSA.h"
239cf995beSStanislav Mekhanoshin #include "llvm/IR/IRBuilder.h"
249cf995beSStanislav Mekhanoshin #include "llvm/InitializePasses.h"
259cf995beSStanislav Mekhanoshin 
269cf995beSStanislav Mekhanoshin #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
279cf995beSStanislav Mekhanoshin 
289cf995beSStanislav Mekhanoshin using namespace llvm;
299cf995beSStanislav Mekhanoshin 
309cf995beSStanislav Mekhanoshin namespace {
319cf995beSStanislav Mekhanoshin 
329cf995beSStanislav Mekhanoshin class AMDGPUPromoteKernelArguments : public FunctionPass {
339cf995beSStanislav Mekhanoshin   MemorySSA *MSSA;
349cf995beSStanislav Mekhanoshin 
35290e5722SStanislav Mekhanoshin   AliasAnalysis *AA;
36290e5722SStanislav Mekhanoshin 
379cf995beSStanislav Mekhanoshin   Instruction *ArgCastInsertPt;
389cf995beSStanislav Mekhanoshin 
399cf995beSStanislav Mekhanoshin   SmallVector<Value *> Ptrs;
409cf995beSStanislav Mekhanoshin 
419cf995beSStanislav Mekhanoshin   void enqueueUsers(Value *Ptr);
429cf995beSStanislav Mekhanoshin 
439cf995beSStanislav Mekhanoshin   bool promotePointer(Value *Ptr);
449cf995beSStanislav Mekhanoshin 
45b0aa1946SStanislav Mekhanoshin   bool promoteLoad(LoadInst *LI);
46b0aa1946SStanislav Mekhanoshin 
479cf995beSStanislav Mekhanoshin public:
489cf995beSStanislav Mekhanoshin   static char ID;
499cf995beSStanislav Mekhanoshin 
AMDGPUPromoteKernelArguments()509cf995beSStanislav Mekhanoshin   AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
519cf995beSStanislav Mekhanoshin 
52290e5722SStanislav Mekhanoshin   bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
539cf995beSStanislav Mekhanoshin 
549cf995beSStanislav Mekhanoshin   bool runOnFunction(Function &F) override;
559cf995beSStanislav Mekhanoshin 
getAnalysisUsage(AnalysisUsage & AU) const569cf995beSStanislav Mekhanoshin   void getAnalysisUsage(AnalysisUsage &AU) const override {
57290e5722SStanislav Mekhanoshin     AU.addRequired<AAResultsWrapperPass>();
589cf995beSStanislav Mekhanoshin     AU.addRequired<MemorySSAWrapperPass>();
599cf995beSStanislav Mekhanoshin     AU.setPreservesAll();
609cf995beSStanislav Mekhanoshin   }
619cf995beSStanislav Mekhanoshin };
629cf995beSStanislav Mekhanoshin 
639cf995beSStanislav Mekhanoshin } // end anonymous namespace
649cf995beSStanislav Mekhanoshin 
enqueueUsers(Value * Ptr)659cf995beSStanislav Mekhanoshin void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
669cf995beSStanislav Mekhanoshin   SmallVector<User *> PtrUsers(Ptr->users());
679cf995beSStanislav Mekhanoshin 
689cf995beSStanislav Mekhanoshin   while (!PtrUsers.empty()) {
699cf995beSStanislav Mekhanoshin     Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
709cf995beSStanislav Mekhanoshin     if (!U)
719cf995beSStanislav Mekhanoshin       continue;
729cf995beSStanislav Mekhanoshin 
739cf995beSStanislav Mekhanoshin     switch (U->getOpcode()) {
749cf995beSStanislav Mekhanoshin     default:
759cf995beSStanislav Mekhanoshin       break;
769cf995beSStanislav Mekhanoshin     case Instruction::Load: {
779cf995beSStanislav Mekhanoshin       LoadInst *LD = cast<LoadInst>(U);
78b0aa1946SStanislav Mekhanoshin       if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
79b0aa1946SStanislav Mekhanoshin           !AMDGPU::isClobberedInFunction(LD, MSSA, AA))
809cf995beSStanislav Mekhanoshin         Ptrs.push_back(LD);
81b0aa1946SStanislav Mekhanoshin 
829cf995beSStanislav Mekhanoshin       break;
839cf995beSStanislav Mekhanoshin     }
849cf995beSStanislav Mekhanoshin     case Instruction::GetElementPtr:
859cf995beSStanislav Mekhanoshin     case Instruction::AddrSpaceCast:
869cf995beSStanislav Mekhanoshin     case Instruction::BitCast:
879cf995beSStanislav Mekhanoshin       if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
889cf995beSStanislav Mekhanoshin         PtrUsers.append(U->user_begin(), U->user_end());
899cf995beSStanislav Mekhanoshin       break;
909cf995beSStanislav Mekhanoshin     }
919cf995beSStanislav Mekhanoshin   }
929cf995beSStanislav Mekhanoshin }
939cf995beSStanislav Mekhanoshin 
promotePointer(Value * Ptr)949cf995beSStanislav Mekhanoshin bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
95b0aa1946SStanislav Mekhanoshin   bool Changed = false;
96b0aa1946SStanislav Mekhanoshin 
97b0aa1946SStanislav Mekhanoshin   LoadInst *LI = dyn_cast<LoadInst>(Ptr);
98b0aa1946SStanislav Mekhanoshin   if (LI)
99b0aa1946SStanislav Mekhanoshin     Changed |= promoteLoad(LI);
100b0aa1946SStanislav Mekhanoshin 
101b0aa1946SStanislav Mekhanoshin   PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
102b0aa1946SStanislav Mekhanoshin   if (!PT)
103b0aa1946SStanislav Mekhanoshin     return Changed;
104b0aa1946SStanislav Mekhanoshin 
105b0aa1946SStanislav Mekhanoshin   if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
106b0aa1946SStanislav Mekhanoshin       PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
107b0aa1946SStanislav Mekhanoshin       PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
1089cf995beSStanislav Mekhanoshin     enqueueUsers(Ptr);
1099cf995beSStanislav Mekhanoshin 
1109cf995beSStanislav Mekhanoshin   if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
111b0aa1946SStanislav Mekhanoshin     return Changed;
1129cf995beSStanislav Mekhanoshin 
113b0aa1946SStanislav Mekhanoshin   IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
114b0aa1946SStanislav Mekhanoshin                    : ArgCastInsertPt);
1159cf995beSStanislav Mekhanoshin 
1169cf995beSStanislav Mekhanoshin   // Cast pointer to global address space and back to flat and let
1179cf995beSStanislav Mekhanoshin   // Infer Address Spaces pass to do all necessary rewriting.
1189cf995beSStanislav Mekhanoshin   PointerType *NewPT =
1199cf995beSStanislav Mekhanoshin       PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS);
1209cf995beSStanislav Mekhanoshin   Value *Cast =
1219cf995beSStanislav Mekhanoshin       B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
1229cf995beSStanislav Mekhanoshin   Value *CastBack =
1239cf995beSStanislav Mekhanoshin       B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
1249cf995beSStanislav Mekhanoshin   Ptr->replaceUsesWithIf(CastBack,
1259cf995beSStanislav Mekhanoshin                          [Cast](Use &U) { return U.getUser() != Cast; });
1269cf995beSStanislav Mekhanoshin 
1279cf995beSStanislav Mekhanoshin   return true;
1289cf995beSStanislav Mekhanoshin }
1299cf995beSStanislav Mekhanoshin 
promoteLoad(LoadInst * LI)130b0aa1946SStanislav Mekhanoshin bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
131b0aa1946SStanislav Mekhanoshin   if (!LI->isSimple())
132b0aa1946SStanislav Mekhanoshin     return false;
133b0aa1946SStanislav Mekhanoshin 
134*9eabea39SStanislav Mekhanoshin   LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
135b0aa1946SStanislav Mekhanoshin   return true;
136b0aa1946SStanislav Mekhanoshin }
137b0aa1946SStanislav Mekhanoshin 
1389cf995beSStanislav Mekhanoshin // skip allocas
getInsertPt(BasicBlock & BB)1399cf995beSStanislav Mekhanoshin static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
1409cf995beSStanislav Mekhanoshin   BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
1419cf995beSStanislav Mekhanoshin   for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
1429cf995beSStanislav Mekhanoshin     AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
1439cf995beSStanislav Mekhanoshin 
1449cf995beSStanislav Mekhanoshin     // If this is a dynamic alloca, the value may depend on the loaded kernargs,
1459cf995beSStanislav Mekhanoshin     // so loads will need to be inserted before it.
1469cf995beSStanislav Mekhanoshin     if (!AI || !AI->isStaticAlloca())
1479cf995beSStanislav Mekhanoshin       break;
1489cf995beSStanislav Mekhanoshin   }
1499cf995beSStanislav Mekhanoshin 
1509cf995beSStanislav Mekhanoshin   return InsPt;
1519cf995beSStanislav Mekhanoshin }
1529cf995beSStanislav Mekhanoshin 
run(Function & F,MemorySSA & MSSA,AliasAnalysis & AA)153290e5722SStanislav Mekhanoshin bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
154290e5722SStanislav Mekhanoshin                                        AliasAnalysis &AA) {
1559cf995beSStanislav Mekhanoshin   if (skipFunction(F))
1569cf995beSStanislav Mekhanoshin     return false;
1579cf995beSStanislav Mekhanoshin 
1589cf995beSStanislav Mekhanoshin   CallingConv::ID CC = F.getCallingConv();
1599cf995beSStanislav Mekhanoshin   if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
1609cf995beSStanislav Mekhanoshin     return false;
1619cf995beSStanislav Mekhanoshin 
1629cf995beSStanislav Mekhanoshin   ArgCastInsertPt = &*getInsertPt(*F.begin());
1639cf995beSStanislav Mekhanoshin   this->MSSA = &MSSA;
164290e5722SStanislav Mekhanoshin   this->AA = &AA;
1659cf995beSStanislav Mekhanoshin 
1669cf995beSStanislav Mekhanoshin   for (Argument &Arg : F.args()) {
1679cf995beSStanislav Mekhanoshin     if (Arg.use_empty())
1689cf995beSStanislav Mekhanoshin       continue;
1699cf995beSStanislav Mekhanoshin 
1709cf995beSStanislav Mekhanoshin     PointerType *PT = dyn_cast<PointerType>(Arg.getType());
1719cf995beSStanislav Mekhanoshin     if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
1729cf995beSStanislav Mekhanoshin                 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
1739cf995beSStanislav Mekhanoshin                 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
1749cf995beSStanislav Mekhanoshin       continue;
1759cf995beSStanislav Mekhanoshin 
1769cf995beSStanislav Mekhanoshin     Ptrs.push_back(&Arg);
1779cf995beSStanislav Mekhanoshin   }
1789cf995beSStanislav Mekhanoshin 
1799cf995beSStanislav Mekhanoshin   bool Changed = false;
1809cf995beSStanislav Mekhanoshin   while (!Ptrs.empty()) {
1819cf995beSStanislav Mekhanoshin     Value *Ptr = Ptrs.pop_back_val();
1829cf995beSStanislav Mekhanoshin     Changed |= promotePointer(Ptr);
1839cf995beSStanislav Mekhanoshin   }
1849cf995beSStanislav Mekhanoshin 
1859cf995beSStanislav Mekhanoshin   return Changed;
1869cf995beSStanislav Mekhanoshin }
1879cf995beSStanislav Mekhanoshin 
runOnFunction(Function & F)1889cf995beSStanislav Mekhanoshin bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
1899cf995beSStanislav Mekhanoshin   MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
190290e5722SStanislav Mekhanoshin   AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
191290e5722SStanislav Mekhanoshin   return run(F, MSSA, AA);
1929cf995beSStanislav Mekhanoshin }
1939cf995beSStanislav Mekhanoshin 
1949cf995beSStanislav Mekhanoshin INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
1959cf995beSStanislav Mekhanoshin                       "AMDGPU Promote Kernel Arguments", false, false)
196290e5722SStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1979cf995beSStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1989cf995beSStanislav Mekhanoshin INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
1999cf995beSStanislav Mekhanoshin                     "AMDGPU Promote Kernel Arguments", false, false)
2009cf995beSStanislav Mekhanoshin 
2019cf995beSStanislav Mekhanoshin char AMDGPUPromoteKernelArguments::ID = 0;
2029cf995beSStanislav Mekhanoshin 
createAMDGPUPromoteKernelArgumentsPass()2039cf995beSStanislav Mekhanoshin FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
2049cf995beSStanislav Mekhanoshin   return new AMDGPUPromoteKernelArguments();
2059cf995beSStanislav Mekhanoshin }
2069cf995beSStanislav Mekhanoshin 
2079cf995beSStanislav Mekhanoshin PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)2089cf995beSStanislav Mekhanoshin AMDGPUPromoteKernelArgumentsPass::run(Function &F,
2099cf995beSStanislav Mekhanoshin                                       FunctionAnalysisManager &AM) {
2109cf995beSStanislav Mekhanoshin   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
211290e5722SStanislav Mekhanoshin   AliasAnalysis &AA = AM.getResult<AAManager>(F);
212290e5722SStanislav Mekhanoshin   if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
2139cf995beSStanislav Mekhanoshin     PreservedAnalyses PA;
2149cf995beSStanislav Mekhanoshin     PA.preserveSet<CFGAnalyses>();
2159cf995beSStanislav Mekhanoshin     PA.preserve<MemorySSAAnalysis>();
2169cf995beSStanislav Mekhanoshin     return PA;
2179cf995beSStanislav Mekhanoshin   }
2189cf995beSStanislav Mekhanoshin   return PreservedAnalyses::all();
2199cf995beSStanislav Mekhanoshin }
220