1 //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass recursively promotes generic pointer arguments of a kernel 10 /// into the global address space. 11 /// 12 /// The pass walks kernel's pointer arguments, then loads from them. If a loaded 13 /// value is a pointer and loaded pointer is unmodified in the kernel before the 14 /// load, then promote loaded pointer to global. Then recursively continue. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "Utils/AMDGPUMemoryUtils.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Analysis/MemorySSA.h" 23 #include "llvm/IR/IRBuilder.h" 24 #include "llvm/InitializePasses.h" 25 26 #define DEBUG_TYPE "amdgpu-promote-kernel-arguments" 27 28 using namespace llvm; 29 30 namespace { 31 32 class AMDGPUPromoteKernelArguments : public FunctionPass { 33 MemorySSA *MSSA; 34 35 AliasAnalysis *AA; 36 37 Instruction *ArgCastInsertPt; 38 39 SmallVector<Value *> Ptrs; 40 41 void enqueueUsers(Value *Ptr); 42 43 bool promotePointer(Value *Ptr); 44 45 bool promoteLoad(LoadInst *LI); 46 47 public: 48 static char ID; 49 50 AMDGPUPromoteKernelArguments() : FunctionPass(ID) {} 51 52 bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA); 53 54 bool runOnFunction(Function &F) override; 55 56 void getAnalysisUsage(AnalysisUsage &AU) const override { 57 AU.addRequired<AAResultsWrapperPass>(); 58 AU.addRequired<MemorySSAWrapperPass>(); 59 AU.setPreservesAll(); 60 } 61 }; 62 63 } // end anonymous namespace 64 65 void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { 66 SmallVector<User *> PtrUsers(Ptr->users()); 67 68 while (!PtrUsers.empty()) { 69 Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val()); 70 if (!U) 71 continue; 72 73 switch (U->getOpcode()) { 74 default: 75 break; 76 case Instruction::Load: { 77 LoadInst *LD = cast<LoadInst>(U); 78 if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr && 79 !AMDGPU::isClobberedInFunction(LD, MSSA, AA)) 80 Ptrs.push_back(LD); 81 82 break; 83 } 84 case Instruction::GetElementPtr: 85 case Instruction::AddrSpaceCast: 86 case Instruction::BitCast: 87 if (U->getOperand(0)->stripInBoundsOffsets() == Ptr) 88 PtrUsers.append(U->user_begin(), U->user_end()); 89 break; 90 } 91 } 92 } 93 94 bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) { 95 bool Changed = false; 96 97 LoadInst *LI = dyn_cast<LoadInst>(Ptr); 98 if (LI) 99 Changed |= promoteLoad(LI); 100 101 PointerType *PT = dyn_cast<PointerType>(Ptr->getType()); 102 if (!PT) 103 return Changed; 104 105 if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 106 PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || 107 PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 108 enqueueUsers(Ptr); 109 110 if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) 111 return Changed; 112 113 IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator()) 114 : ArgCastInsertPt); 115 116 // Cast pointer to global address space and back to flat and let 117 // Infer Address Spaces pass to do all necessary rewriting. 118 PointerType *NewPT = 119 PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS); 120 Value *Cast = 121 B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global")); 122 Value *CastBack = 123 B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat")); 124 Ptr->replaceUsesWithIf(CastBack, 125 [Cast](Use &U) { return U.getUser() != Cast; }); 126 127 return true; 128 } 129 130 bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) { 131 if (!LI->isSimple()) 132 return false; 133 134 Value *Ptr = LI->getPointerOperand(); 135 136 // Strip casts we have created earlier. 137 Value *OrigPtr = Ptr; 138 PointerType *PT; 139 for ( ; ; ) { 140 PT = cast<PointerType>(OrigPtr->getType()); 141 if (PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 142 return false; 143 auto *P = dyn_cast<AddrSpaceCastInst>(OrigPtr); 144 if (!P) 145 break; 146 auto *NewPtr = P->getPointerOperand(); 147 if (!cast<PointerType>(NewPtr->getType())->hasSameElementTypeAs(PT)) 148 break; 149 OrigPtr = NewPtr; 150 } 151 152 IRBuilder<> B(LI); 153 154 PointerType *NewPT = 155 PointerType::getWithSamePointeeType(PT, AMDGPUAS::CONSTANT_ADDRESS); 156 Value *Cast = B.CreateAddrSpaceCast(OrigPtr, NewPT, 157 Twine(OrigPtr->getName(), ".const")); 158 LI->replaceUsesOfWith(Ptr, Cast); 159 return true; 160 } 161 162 // skip allocas 163 static BasicBlock::iterator getInsertPt(BasicBlock &BB) { 164 BasicBlock::iterator InsPt = BB.getFirstInsertionPt(); 165 for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) { 166 AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt); 167 168 // If this is a dynamic alloca, the value may depend on the loaded kernargs, 169 // so loads will need to be inserted before it. 170 if (!AI || !AI->isStaticAlloca()) 171 break; 172 } 173 174 return InsPt; 175 } 176 177 bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA, 178 AliasAnalysis &AA) { 179 if (skipFunction(F)) 180 return false; 181 182 CallingConv::ID CC = F.getCallingConv(); 183 if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) 184 return false; 185 186 ArgCastInsertPt = &*getInsertPt(*F.begin()); 187 this->MSSA = &MSSA; 188 this->AA = &AA; 189 190 for (Argument &Arg : F.args()) { 191 if (Arg.use_empty()) 192 continue; 193 194 PointerType *PT = dyn_cast<PointerType>(Arg.getType()); 195 if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && 196 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && 197 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)) 198 continue; 199 200 Ptrs.push_back(&Arg); 201 } 202 203 bool Changed = false; 204 while (!Ptrs.empty()) { 205 Value *Ptr = Ptrs.pop_back_val(); 206 Changed |= promotePointer(Ptr); 207 } 208 209 return Changed; 210 } 211 212 bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) { 213 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 214 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 215 return run(F, MSSA, AA); 216 } 217 218 INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE, 219 "AMDGPU Promote Kernel Arguments", false, false) 220 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 221 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 222 INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE, 223 "AMDGPU Promote Kernel Arguments", false, false) 224 225 char AMDGPUPromoteKernelArguments::ID = 0; 226 227 FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() { 228 return new AMDGPUPromoteKernelArguments(); 229 } 230 231 PreservedAnalyses 232 AMDGPUPromoteKernelArgumentsPass::run(Function &F, 233 FunctionAnalysisManager &AM) { 234 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); 235 AliasAnalysis &AA = AM.getResult<AAManager>(F); 236 if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) { 237 PreservedAnalyses PA; 238 PA.preserveSet<CFGAnalyses>(); 239 PA.preserve<MemorySSAAnalysis>(); 240 return PA; 241 } 242 return PreservedAnalyses::all(); 243 } 244