1 //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass recursively promotes generic pointer arguments of a kernel 10 /// into the global address space. 11 /// 12 /// The pass walks kernel's pointer arguments, then loads from them. If a loaded 13 /// value is a pointer and loaded pointer is unmodified in the kernel before the 14 /// load, then promote loaded pointer to global. Then recursively continue. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "Utils/AMDGPUMemoryUtils.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Analysis/MemorySSA.h" 23 #include "llvm/IR/IRBuilder.h" 24 #include "llvm/InitializePasses.h" 25 26 #define DEBUG_TYPE "amdgpu-promote-kernel-arguments" 27 28 using namespace llvm; 29 30 namespace { 31 32 class AMDGPUPromoteKernelArguments : public FunctionPass { 33 MemorySSA *MSSA; 34 35 AliasAnalysis *AA; 36 37 Instruction *ArgCastInsertPt; 38 39 SmallVector<Value *> Ptrs; 40 41 void enqueueUsers(Value *Ptr); 42 43 bool promotePointer(Value *Ptr); 44 45 public: 46 static char ID; 47 48 AMDGPUPromoteKernelArguments() : FunctionPass(ID) {} 49 50 bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA); 51 52 bool runOnFunction(Function &F) override; 53 54 void getAnalysisUsage(AnalysisUsage &AU) const override { 55 AU.addRequired<AAResultsWrapperPass>(); 56 AU.addRequired<MemorySSAWrapperPass>(); 57 AU.setPreservesAll(); 58 } 59 }; 60 61 } // end anonymous namespace 62 63 void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { 64 SmallVector<User *> PtrUsers(Ptr->users()); 65 66 while (!PtrUsers.empty()) { 67 Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val()); 68 if (!U) 69 continue; 70 71 switch (U->getOpcode()) { 72 default: 73 break; 74 case Instruction::Load: { 75 LoadInst *LD = cast<LoadInst>(U); 76 PointerType *PT = dyn_cast<PointerType>(LD->getType()); 77 if (!PT || 78 (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && 79 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && 80 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) || 81 LD->getPointerOperand()->stripInBoundsOffsets() != Ptr) 82 break; 83 // TODO: This load poprobably can be promoted to constant address space. 84 if (!AMDGPU::isClobberedInFunction(LD, MSSA, AA)) 85 Ptrs.push_back(LD); 86 break; 87 } 88 case Instruction::GetElementPtr: 89 case Instruction::AddrSpaceCast: 90 case Instruction::BitCast: 91 if (U->getOperand(0)->stripInBoundsOffsets() == Ptr) 92 PtrUsers.append(U->user_begin(), U->user_end()); 93 break; 94 } 95 } 96 } 97 98 bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) { 99 enqueueUsers(Ptr); 100 101 PointerType *PT = cast<PointerType>(Ptr->getType()); 102 if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) 103 return false; 104 105 bool IsArg = isa<Argument>(Ptr); 106 IRBuilder<> B(IsArg ? ArgCastInsertPt 107 : &*std::next(cast<Instruction>(Ptr)->getIterator())); 108 109 // Cast pointer to global address space and back to flat and let 110 // Infer Address Spaces pass to do all necessary rewriting. 111 PointerType *NewPT = 112 PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS); 113 Value *Cast = 114 B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global")); 115 Value *CastBack = 116 B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat")); 117 Ptr->replaceUsesWithIf(CastBack, 118 [Cast](Use &U) { return U.getUser() != Cast; }); 119 120 return true; 121 } 122 123 // skip allocas 124 static BasicBlock::iterator getInsertPt(BasicBlock &BB) { 125 BasicBlock::iterator InsPt = BB.getFirstInsertionPt(); 126 for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) { 127 AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt); 128 129 // If this is a dynamic alloca, the value may depend on the loaded kernargs, 130 // so loads will need to be inserted before it. 131 if (!AI || !AI->isStaticAlloca()) 132 break; 133 } 134 135 return InsPt; 136 } 137 138 bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA, 139 AliasAnalysis &AA) { 140 if (skipFunction(F)) 141 return false; 142 143 CallingConv::ID CC = F.getCallingConv(); 144 if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) 145 return false; 146 147 ArgCastInsertPt = &*getInsertPt(*F.begin()); 148 this->MSSA = &MSSA; 149 this->AA = &AA; 150 151 for (Argument &Arg : F.args()) { 152 if (Arg.use_empty()) 153 continue; 154 155 PointerType *PT = dyn_cast<PointerType>(Arg.getType()); 156 if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && 157 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && 158 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)) 159 continue; 160 161 Ptrs.push_back(&Arg); 162 } 163 164 bool Changed = false; 165 while (!Ptrs.empty()) { 166 Value *Ptr = Ptrs.pop_back_val(); 167 Changed |= promotePointer(Ptr); 168 } 169 170 return Changed; 171 } 172 173 bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) { 174 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 175 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 176 return run(F, MSSA, AA); 177 } 178 179 INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE, 180 "AMDGPU Promote Kernel Arguments", false, false) 181 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 182 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 183 INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE, 184 "AMDGPU Promote Kernel Arguments", false, false) 185 186 char AMDGPUPromoteKernelArguments::ID = 0; 187 188 FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() { 189 return new AMDGPUPromoteKernelArguments(); 190 } 191 192 PreservedAnalyses 193 AMDGPUPromoteKernelArgumentsPass::run(Function &F, 194 FunctionAnalysisManager &AM) { 195 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); 196 AliasAnalysis &AA = AM.getResult<AAManager>(F); 197 if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) { 198 PreservedAnalyses PA; 199 PA.preserveSet<CFGAnalyses>(); 200 PA.preserve<MemorySSAAnalysis>(); 201 return PA; 202 } 203 return PreservedAnalyses::all(); 204 } 205