1 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass adds amdgpu.uniform metadata to IR values so this information 11 /// can be used during instruction selection. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/Analysis/AliasAnalysis.h" 19 #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 20 #include "llvm/Analysis/MemorySSA.h" 21 #include "llvm/IR/InstVisitor.h" 22 #include "llvm/IR/IntrinsicsAMDGPU.h" 23 #include "llvm/InitializePasses.h" 24 25 #define DEBUG_TYPE "amdgpu-annotate-uniform" 26 27 using namespace llvm; 28 29 namespace { 30 31 class AMDGPUAnnotateUniformValues : public FunctionPass, 32 public InstVisitor<AMDGPUAnnotateUniformValues> { 33 LegacyDivergenceAnalysis *DA; 34 MemorySSA *MSSA; 35 AliasAnalysis *AA; 36 bool isEntryFunc; 37 38 public: 39 static char ID; 40 AMDGPUAnnotateUniformValues() : 41 FunctionPass(ID) { } 42 bool doInitialization(Module &M) override; 43 bool runOnFunction(Function &F) override; 44 StringRef getPassName() const override { 45 return "AMDGPU Annotate Uniform Values"; 46 } 47 void getAnalysisUsage(AnalysisUsage &AU) const override { 48 AU.addRequired<LegacyDivergenceAnalysis>(); 49 AU.addRequired<MemorySSAWrapperPass>(); 50 AU.addRequired<AAResultsWrapperPass>(); 51 AU.setPreservesAll(); 52 } 53 54 void visitBranchInst(BranchInst &I); 55 void visitLoadInst(LoadInst &I); 56 bool isClobberedInFunction(LoadInst * Load); 57 }; 58 59 } // End anonymous namespace 60 61 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, 62 "Add AMDGPU uniform metadata", false, false) 63 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 64 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 65 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 66 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, 67 "Add AMDGPU uniform metadata", false, false) 68 69 char AMDGPUAnnotateUniformValues::ID = 0; 70 71 static void setUniformMetadata(Instruction *I) { 72 I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {})); 73 } 74 static void setNoClobberMetadata(Instruction *I) { 75 I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); 76 } 77 78 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst *Load) { 79 MemorySSAWalker *Walker = MSSA->getWalker(); 80 SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)}; 81 SmallSet<MemoryAccess *, 8> Visited; 82 MemoryLocation Loc(MemoryLocation::get(Load)); 83 84 const auto isReallyAClobber = [this, Load](MemoryDef *Def) -> bool { 85 Instruction *DefInst = Def->getMemoryInst(); 86 LLVM_DEBUG(dbgs() << " Def: " << *DefInst << '\n'); 87 88 if (isa<FenceInst>(DefInst)) 89 return false; 90 91 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 92 switch (II->getIntrinsicID()) { 93 case Intrinsic::amdgcn_s_barrier: 94 case Intrinsic::amdgcn_wave_barrier: 95 return false; 96 default: 97 break; 98 } 99 } 100 101 // Ignore atomics not aliasing with the original load, any atomic is a 102 // universal MemoryDef from MSSA's point of view too, just like a fence. 103 const auto checkNoAlias = [this, Load](auto I) -> bool { 104 return I && AA->isNoAlias(I->getPointerOperand(), 105 Load->getPointerOperand()); 106 }; 107 108 if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) || 109 checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst))) 110 return false; 111 112 return true; 113 }; 114 115 LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); 116 117 // Start with a nearest dominating clobbering access, it will be either 118 // live on entry (nothing to do, load is not clobbered), MemoryDef, or 119 // MemoryPhi if several MemoryDefs can define this memory state. In that 120 // case add all Defs to WorkList and continue going up and checking all 121 // the definitions of this memory location until the root. When all the 122 // defs are exhausted and came to the entry state we have no clobber. 123 // Along the scan ignore barriers and fences which are considered clobbers 124 // by the MemorySSA, but not really writing anything into the memory. 125 while (!WorkList.empty()) { 126 MemoryAccess *MA = WorkList.pop_back_val(); 127 if (!Visited.insert(MA).second) 128 continue; 129 130 if (MSSA->isLiveOnEntryDef(MA)) 131 continue; 132 133 if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) { 134 if (isReallyAClobber(Def)) { 135 LLVM_DEBUG(dbgs() << " -> load is clobbered\n"); 136 return true; 137 } 138 139 WorkList.push_back( 140 Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); 141 continue; 142 } 143 144 const MemoryPhi *Phi = cast<MemoryPhi>(MA); 145 for (auto &Use : Phi->incoming_values()) 146 WorkList.push_back(cast<MemoryAccess>(&Use)); 147 } 148 149 LLVM_DEBUG(dbgs() << " -> no clobber\n"); 150 return false; 151 } 152 153 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { 154 if (DA->isUniform(&I)) 155 setUniformMetadata(&I); 156 } 157 158 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { 159 Value *Ptr = I.getPointerOperand(); 160 if (!DA->isUniform(Ptr)) 161 return; 162 Instruction *PtrI = dyn_cast<Instruction>(Ptr); 163 if (PtrI) 164 setUniformMetadata(PtrI); 165 166 // We're tracking up to the Function boundaries, and cannot go beyond because 167 // of FunctionPass restrictions. We can ensure that is memory not clobbered 168 // for memory operations that are live in to entry points only. 169 if (!isEntryFunc) 170 return; 171 bool GlobalLoad = I.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 172 bool NotClobbered = GlobalLoad && !isClobberedInFunction(&I); 173 if (NotClobbered) 174 setNoClobberMetadata(&I); 175 } 176 177 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { 178 return false; 179 } 180 181 bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { 182 if (skipFunction(F)) 183 return false; 184 185 DA = &getAnalysis<LegacyDivergenceAnalysis>(); 186 MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); 187 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 188 isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv()); 189 190 visit(F); 191 return true; 192 } 193 194 FunctionPass * 195 llvm::createAMDGPUAnnotateUniformValues() { 196 return new AMDGPUAnnotateUniformValues(); 197 } 198