1 //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass recursively promotes generic pointer arguments of a kernel
10 /// into the global address space.
11 ///
12 /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
13 /// value is a pointer and loaded pointer is unmodified in the kernel before the
14 /// load, then promote loaded pointer to global. Then recursively continue.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
19 #include "Utils/AMDGPUMemoryUtils.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Analysis/AliasAnalysis.h"
22 #include "llvm/Analysis/MemorySSA.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/InitializePasses.h"
25 
26 #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
27 
28 using namespace llvm;
29 
30 namespace {
31 
32 class AMDGPUPromoteKernelArguments : public FunctionPass {
33   MemorySSA *MSSA;
34 
35   AliasAnalysis *AA;
36 
37   Instruction *ArgCastInsertPt;
38 
39   SmallVector<Value *> Ptrs;
40 
41   void enqueueUsers(Value *Ptr);
42 
43   bool promotePointer(Value *Ptr);
44 
45   bool promoteLoad(LoadInst *LI);
46 
47 public:
48   static char ID;
49 
50   AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
51 
52   bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
53 
54   bool runOnFunction(Function &F) override;
55 
56   void getAnalysisUsage(AnalysisUsage &AU) const override {
57     AU.addRequired<AAResultsWrapperPass>();
58     AU.addRequired<MemorySSAWrapperPass>();
59     AU.setPreservesAll();
60   }
61 };
62 
63 } // end anonymous namespace
64 
65 void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
66   SmallVector<User *> PtrUsers(Ptr->users());
67 
68   while (!PtrUsers.empty()) {
69     Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
70     if (!U)
71       continue;
72 
73     switch (U->getOpcode()) {
74     default:
75       break;
76     case Instruction::Load: {
77       LoadInst *LD = cast<LoadInst>(U);
78       if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
79           !AMDGPU::isClobberedInFunction(LD, MSSA, AA))
80         Ptrs.push_back(LD);
81 
82       break;
83     }
84     case Instruction::GetElementPtr:
85     case Instruction::AddrSpaceCast:
86     case Instruction::BitCast:
87       if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
88         PtrUsers.append(U->user_begin(), U->user_end());
89       break;
90     }
91   }
92 }
93 
94 bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
95   bool Changed = false;
96 
97   LoadInst *LI = dyn_cast<LoadInst>(Ptr);
98   if (LI)
99     Changed |= promoteLoad(LI);
100 
101   PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
102   if (!PT)
103     return Changed;
104 
105   if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
106       PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
107       PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
108     enqueueUsers(Ptr);
109 
110   if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
111     return Changed;
112 
113   IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
114                    : ArgCastInsertPt);
115 
116   // Cast pointer to global address space and back to flat and let
117   // Infer Address Spaces pass to do all necessary rewriting.
118   PointerType *NewPT =
119       PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS);
120   Value *Cast =
121       B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
122   Value *CastBack =
123       B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
124   Ptr->replaceUsesWithIf(CastBack,
125                          [Cast](Use &U) { return U.getUser() != Cast; });
126 
127   return true;
128 }
129 
130 bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
131   if (!LI->isSimple())
132     return false;
133 
134   Value *Ptr = LI->getPointerOperand();
135 
136   // Strip casts we have created earlier.
137   Value *OrigPtr = Ptr;
138   PointerType *PT;
139   for ( ; ; ) {
140     PT = cast<PointerType>(OrigPtr->getType());
141     if (PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
142       return false;
143     auto *P = dyn_cast<AddrSpaceCastInst>(OrigPtr);
144     if (!P)
145       break;
146     auto *NewPtr = P->getPointerOperand();
147     if (!cast<PointerType>(NewPtr->getType())->hasSameElementTypeAs(PT))
148       break;
149     OrigPtr = NewPtr;
150   }
151 
152   IRBuilder<> B(LI);
153 
154   PointerType *NewPT =
155       PointerType::getWithSamePointeeType(PT, AMDGPUAS::CONSTANT_ADDRESS);
156   Value *Cast = B.CreateAddrSpaceCast(OrigPtr, NewPT,
157                                       Twine(OrigPtr->getName(), ".const"));
158   LI->replaceUsesOfWith(Ptr, Cast);
159   return true;
160 }
161 
162 // skip allocas
163 static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
164   BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
165   for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
166     AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
167 
168     // If this is a dynamic alloca, the value may depend on the loaded kernargs,
169     // so loads will need to be inserted before it.
170     if (!AI || !AI->isStaticAlloca())
171       break;
172   }
173 
174   return InsPt;
175 }
176 
177 bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
178                                        AliasAnalysis &AA) {
179   if (skipFunction(F))
180     return false;
181 
182   CallingConv::ID CC = F.getCallingConv();
183   if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
184     return false;
185 
186   ArgCastInsertPt = &*getInsertPt(*F.begin());
187   this->MSSA = &MSSA;
188   this->AA = &AA;
189 
190   for (Argument &Arg : F.args()) {
191     if (Arg.use_empty())
192       continue;
193 
194     PointerType *PT = dyn_cast<PointerType>(Arg.getType());
195     if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
196                 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
197                 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
198       continue;
199 
200     Ptrs.push_back(&Arg);
201   }
202 
203   bool Changed = false;
204   while (!Ptrs.empty()) {
205     Value *Ptr = Ptrs.pop_back_val();
206     Changed |= promotePointer(Ptr);
207   }
208 
209   return Changed;
210 }
211 
212 bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
213   MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
214   AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
215   return run(F, MSSA, AA);
216 }
217 
218 INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
219                       "AMDGPU Promote Kernel Arguments", false, false)
220 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
221 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
222 INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
223                     "AMDGPU Promote Kernel Arguments", false, false)
224 
225 char AMDGPUPromoteKernelArguments::ID = 0;
226 
227 FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
228   return new AMDGPUPromoteKernelArguments();
229 }
230 
231 PreservedAnalyses
232 AMDGPUPromoteKernelArgumentsPass::run(Function &F,
233                                       FunctionAnalysisManager &AM) {
234   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
235   AliasAnalysis &AA = AM.getResult<AAManager>(F);
236   if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
237     PreservedAnalyses PA;
238     PA.preserveSet<CFGAnalyses>();
239     PA.preserve<MemorySSAAnalysis>();
240     return PA;
241   }
242   return PreservedAnalyses::all();
243 }
244