1 //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass recursively promotes generic pointer arguments of a kernel
10 /// into the global address space.
11 ///
12 /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
13 /// value is a pointer and loaded pointer is unmodified in the kernel before the
14 /// load, then promote loaded pointer to global. Then recursively continue.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
19 #include "Utils/AMDGPUMemoryUtils.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Analysis/AliasAnalysis.h"
22 #include "llvm/Analysis/MemorySSA.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/InitializePasses.h"
25 
26 #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
27 
28 using namespace llvm;
29 
30 namespace {
31 
32 class AMDGPUPromoteKernelArguments : public FunctionPass {
33   MemorySSA *MSSA;
34 
35   AliasAnalysis *AA;
36 
37   Instruction *ArgCastInsertPt;
38 
39   SmallVector<Value *> Ptrs;
40 
41   void enqueueUsers(Value *Ptr);
42 
43   bool promotePointer(Value *Ptr);
44 
45 public:
46   static char ID;
47 
48   AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
49 
50   bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
51 
52   bool runOnFunction(Function &F) override;
53 
54   void getAnalysisUsage(AnalysisUsage &AU) const override {
55     AU.addRequired<AAResultsWrapperPass>();
56     AU.addRequired<MemorySSAWrapperPass>();
57     AU.setPreservesAll();
58   }
59 };
60 
61 } // end anonymous namespace
62 
63 void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
64   SmallVector<User *> PtrUsers(Ptr->users());
65 
66   while (!PtrUsers.empty()) {
67     Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
68     if (!U)
69       continue;
70 
71     switch (U->getOpcode()) {
72     default:
73       break;
74     case Instruction::Load: {
75       LoadInst *LD = cast<LoadInst>(U);
76       PointerType *PT = dyn_cast<PointerType>(LD->getType());
77       if (!PT ||
78           (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
79            PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
80            PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) ||
81           LD->getPointerOperand()->stripInBoundsOffsets() != Ptr)
82         break;
83       // TODO: This load poprobably can be promoted to constant address space.
84       if (!AMDGPU::isClobberedInFunction(LD, MSSA, AA))
85         Ptrs.push_back(LD);
86       break;
87     }
88     case Instruction::GetElementPtr:
89     case Instruction::AddrSpaceCast:
90     case Instruction::BitCast:
91       if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
92         PtrUsers.append(U->user_begin(), U->user_end());
93       break;
94     }
95   }
96 }
97 
98 bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
99   enqueueUsers(Ptr);
100 
101   PointerType *PT = cast<PointerType>(Ptr->getType());
102   if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
103     return false;
104 
105   bool IsArg = isa<Argument>(Ptr);
106   IRBuilder<> B(IsArg ? ArgCastInsertPt
107                       : &*std::next(cast<Instruction>(Ptr)->getIterator()));
108 
109   // Cast pointer to global address space and back to flat and let
110   // Infer Address Spaces pass to do all necessary rewriting.
111   PointerType *NewPT =
112       PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS);
113   Value *Cast =
114       B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
115   Value *CastBack =
116       B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
117   Ptr->replaceUsesWithIf(CastBack,
118                          [Cast](Use &U) { return U.getUser() != Cast; });
119 
120   return true;
121 }
122 
123 // skip allocas
124 static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
125   BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
126   for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
127     AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
128 
129     // If this is a dynamic alloca, the value may depend on the loaded kernargs,
130     // so loads will need to be inserted before it.
131     if (!AI || !AI->isStaticAlloca())
132       break;
133   }
134 
135   return InsPt;
136 }
137 
138 bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
139                                        AliasAnalysis &AA) {
140   if (skipFunction(F))
141     return false;
142 
143   CallingConv::ID CC = F.getCallingConv();
144   if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
145     return false;
146 
147   ArgCastInsertPt = &*getInsertPt(*F.begin());
148   this->MSSA = &MSSA;
149   this->AA = &AA;
150 
151   for (Argument &Arg : F.args()) {
152     if (Arg.use_empty())
153       continue;
154 
155     PointerType *PT = dyn_cast<PointerType>(Arg.getType());
156     if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
157                 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
158                 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
159       continue;
160 
161     Ptrs.push_back(&Arg);
162   }
163 
164   bool Changed = false;
165   while (!Ptrs.empty()) {
166     Value *Ptr = Ptrs.pop_back_val();
167     Changed |= promotePointer(Ptr);
168   }
169 
170   return Changed;
171 }
172 
173 bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
174   MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
175   AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
176   return run(F, MSSA, AA);
177 }
178 
179 INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
180                       "AMDGPU Promote Kernel Arguments", false, false)
181 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
182 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
183 INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
184                     "AMDGPU Promote Kernel Arguments", false, false)
185 
186 char AMDGPUPromoteKernelArguments::ID = 0;
187 
188 FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
189   return new AMDGPUPromoteKernelArguments();
190 }
191 
192 PreservedAnalyses
193 AMDGPUPromoteKernelArgumentsPass::run(Function &F,
194                                       FunctionAnalysisManager &AM) {
195   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
196   AliasAnalysis &AA = AM.getResult<AAManager>(F);
197   if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
198     PreservedAnalyses PA;
199     PA.preserveSet<CFGAnalyses>();
200     PA.preserve<MemorySSAAnalysis>();
201     return PA;
202   }
203   return PreservedAnalyses::all();
204 }
205