1 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass adds amdgpu.uniform metadata to IR values so this information
11 /// can be used during instruction selection.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
19 #include "llvm/Analysis/MemorySSA.h"
20 #include "llvm/IR/InstVisitor.h"
21 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 #include "llvm/InitializePasses.h"
23 
24 #define DEBUG_TYPE "amdgpu-annotate-uniform"
25 
26 using namespace llvm;
27 
28 namespace {
29 
30 class AMDGPUAnnotateUniformValues : public FunctionPass,
31                        public InstVisitor<AMDGPUAnnotateUniformValues> {
32   LegacyDivergenceAnalysis *DA;
33   MemorySSA *MSSA;
34   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
35   bool isEntryFunc;
36 
37 public:
38   static char ID;
39   AMDGPUAnnotateUniformValues() :
40     FunctionPass(ID) { }
41   bool doInitialization(Module &M) override;
42   bool runOnFunction(Function &F) override;
43   StringRef getPassName() const override {
44     return "AMDGPU Annotate Uniform Values";
45   }
46   void getAnalysisUsage(AnalysisUsage &AU) const override {
47     AU.addRequired<LegacyDivergenceAnalysis>();
48     AU.addRequired<MemorySSAWrapperPass>();
49     AU.setPreservesAll();
50  }
51 
52   void visitBranchInst(BranchInst &I);
53   void visitLoadInst(LoadInst &I);
54   bool isClobberedInFunction(LoadInst * Load);
55 };
56 
57 } // End anonymous namespace
58 
59 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
60                       "Add AMDGPU uniform metadata", false, false)
61 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
62 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
63 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
64                     "Add AMDGPU uniform metadata", false, false)
65 
66 char AMDGPUAnnotateUniformValues::ID = 0;
67 
68 static void setUniformMetadata(Instruction *I) {
69   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
70 }
71 static void setNoClobberMetadata(Instruction *I) {
72   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
73 }
74 
75 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst *Load) {
76   MemorySSAWalker *Walker = MSSA->getWalker();
77   SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
78   SmallSet<MemoryAccess *, 8> Visited;
79   MemoryLocation Loc(MemoryLocation::get(Load));
80 
81   const auto isReallyAClobber = [](MemoryDef *Def) -> bool {
82     Instruction *DefInst = Def->getMemoryInst();
83     LLVM_DEBUG(dbgs() << "  Def: " << *DefInst << '\n');
84 
85     if (isa<FenceInst>(DefInst))
86       return false;
87 
88     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
89       switch (II->getIntrinsicID()) {
90       case Intrinsic::amdgcn_s_barrier:
91       case Intrinsic::amdgcn_wave_barrier:
92         return false;
93       default:
94         break;
95       }
96     }
97 
98     return true;
99   };
100 
101   LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
102 
103   // Start with a nearest dominating clobbering access, it will be either
104   // live on entry (nothing to do, load is not clobbered), MemoryDef, or
105   // MemoryPhi if several MemoryDefs can define this memory state. In that
106   // case add all Defs to WorkList and continue going up and checking all
107   // the definitions of this memory location until the root. When all the
108   // defs are exhausted and came to the entry state we have no clobber.
109   // Along the scan ignore barriers and fences which are considered clobbers
110   // by the MemorySSA, but not really writing anything into the memory.
111   while (!WorkList.empty()) {
112     MemoryAccess *MA = WorkList.pop_back_val();
113     if (!Visited.insert(MA).second)
114       continue;
115 
116     if (MSSA->isLiveOnEntryDef(MA))
117       continue;
118 
119     if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
120       if (isReallyAClobber(Def)) {
121         LLVM_DEBUG(dbgs() << "      -> load is clobbered\n");
122         return true;
123       }
124 
125       WorkList.push_back(
126           Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
127       continue;
128     }
129 
130     const MemoryPhi *Phi = cast<MemoryPhi>(MA);
131     for (auto &Use : Phi->incoming_values())
132       WorkList.push_back(cast<MemoryAccess>(&Use));
133   }
134 
135   LLVM_DEBUG(dbgs() << "      -> no clobber\n");
136   return false;
137 }
138 
139 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
140   if (DA->isUniform(&I))
141     setUniformMetadata(&I);
142 }
143 
144 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
145   Value *Ptr = I.getPointerOperand();
146   if (!DA->isUniform(Ptr))
147     return;
148   // We're tracking up to the Function boundaries, and cannot go beyond because
149   // of FunctionPass restrictions. We can ensure that is memory not clobbered
150   // for memory operations that are live in to entry points only.
151   Instruction *PtrI = dyn_cast<Instruction>(Ptr);
152 
153   if (!isEntryFunc) {
154     if (PtrI)
155       setUniformMetadata(PtrI);
156     return;
157   }
158 
159   bool NotClobbered = false;
160   bool GlobalLoad = I.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
161   if (PtrI)
162     NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
163   else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
164     if (GlobalLoad && !isClobberedInFunction(&I)) {
165       NotClobbered = true;
166       // Lookup for the existing GEP
167       if (noClobberClones.count(Ptr)) {
168         PtrI = noClobberClones[Ptr];
169       } else {
170         // Create GEP of the Value
171         Function *F = I.getParent()->getParent();
172         Value *Idx = Constant::getIntegerValue(
173           Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
174         // Insert GEP at the entry to make it dominate all uses
175         PtrI = GetElementPtrInst::Create(I.getType(), Ptr,
176                                          ArrayRef<Value *>(Idx), Twine(""),
177                                          F->getEntryBlock().getFirstNonPHI());
178       }
179       I.replaceUsesOfWith(Ptr, PtrI);
180     }
181   }
182 
183   if (PtrI) {
184     setUniformMetadata(PtrI);
185     if (NotClobbered)
186       setNoClobberMetadata(PtrI);
187   }
188 }
189 
190 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
191   return false;
192 }
193 
194 bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
195   if (skipFunction(F))
196     return false;
197 
198   DA = &getAnalysis<LegacyDivergenceAnalysis>();
199   MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
200   isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
201 
202   visit(F);
203   noClobberClones.clear();
204   return true;
205 }
206 
207 FunctionPass *
208 llvm::createAMDGPUAnnotateUniformValues() {
209   return new AMDGPUAnnotateUniformValues();
210 }
211