1 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass adds amdgpu.uniform metadata to IR values so this information
11 /// can be used during instruction selection.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/DepthFirstIterator.h"
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
22 #include "llvm/IR/InstVisitor.h"
23 #include "llvm/InitializePasses.h"
24 
25 #define DEBUG_TYPE "amdgpu-annotate-uniform"
26 
27 using namespace llvm;
28 
29 namespace {
30 
31 class AMDGPUAnnotateUniformValues : public FunctionPass,
32                        public InstVisitor<AMDGPUAnnotateUniformValues> {
33   LegacyDivergenceAnalysis *DA;
34   MemoryDependenceResults *MDR;
35   LoopInfo *LI;
36   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
37   bool isEntryFunc;
38 
39 public:
40   static char ID;
41   AMDGPUAnnotateUniformValues() :
42     FunctionPass(ID) { }
43   bool doInitialization(Module &M) override;
44   bool runOnFunction(Function &F) override;
45   StringRef getPassName() const override {
46     return "AMDGPU Annotate Uniform Values";
47   }
48   void getAnalysisUsage(AnalysisUsage &AU) const override {
49     AU.addRequired<LegacyDivergenceAnalysis>();
50     AU.addRequired<MemoryDependenceWrapperPass>();
51     AU.addRequired<LoopInfoWrapperPass>();
52     AU.setPreservesAll();
53  }
54 
55   void visitBranchInst(BranchInst &I);
56   void visitLoadInst(LoadInst &I);
57   bool isClobberedInFunction(LoadInst * Load);
58 };
59 
60 } // End anonymous namespace
61 
62 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
63                       "Add AMDGPU uniform metadata", false, false)
64 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
65 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
66 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
67 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
68                     "Add AMDGPU uniform metadata", false, false)
69 
70 char AMDGPUAnnotateUniformValues::ID = 0;
71 
72 static void setUniformMetadata(Instruction *I) {
73   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
74 }
75 static void setNoClobberMetadata(Instruction *I) {
76   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
77 }
78 
79 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
80   // 1. get Loop for the Load->getparent();
81   // 2. if it exists, collect all the BBs from the most outer
82   // loop and check for the writes. If NOT - start DFS over all preds.
83   // 3. Start DFS over all preds from the most outer loop header.
84   SetVector<BasicBlock *> Checklist;
85   BasicBlock *Start = Load->getParent();
86   Checklist.insert(Start);
87   const Value *Ptr = Load->getPointerOperand();
88   const Loop *L = LI->getLoopFor(Start);
89   if (L) {
90     const Loop *P = L;
91     do {
92       L = P;
93       P = P->getParentLoop();
94     } while (P);
95     Checklist.insert(L->block_begin(), L->block_end());
96     Start = L->getHeader();
97   }
98 
99   Checklist.insert(idf_begin(Start), idf_end(Start));
100   for (auto &BB : Checklist) {
101     BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
102       BasicBlock::iterator(Load) : BB->end();
103     auto Q = MDR->getPointerDependencyFrom(
104         MemoryLocation::getBeforeOrAfter(Ptr), true, StartIt, BB, Load);
105     if (Q.isClobber() || Q.isUnknown() ||
106         // Store defines the load and thus clobbers it.
107         (Q.isDef() && Q.getInst()->mayWriteToMemory()))
108       return true;
109   }
110   return false;
111 }
112 
113 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
114   if (DA->isUniform(&I))
115     setUniformMetadata(&I);
116 }
117 
118 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
119   Value *Ptr = I.getPointerOperand();
120   if (!DA->isUniform(Ptr))
121     return;
122   auto isGlobalLoad = [&](LoadInst &Load)->bool {
123     return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
124   };
125   // We're tracking up to the Function boundaries, and cannot go beyond because
126   // of FunctionPass restrictions. We can ensure that is memory not clobbered
127   // for memory operations that are live in to entry points only.
128   Instruction *PtrI = dyn_cast<Instruction>(Ptr);
129 
130   if (!isEntryFunc) {
131     if (PtrI)
132       setUniformMetadata(PtrI);
133     return;
134   }
135 
136   bool NotClobbered = false;
137   bool GlobalLoad = isGlobalLoad(I);
138   if (PtrI)
139     NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
140   else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
141     if (GlobalLoad && !isClobberedInFunction(&I)) {
142       NotClobbered = true;
143       // Lookup for the existing GEP
144       if (noClobberClones.count(Ptr)) {
145         PtrI = noClobberClones[Ptr];
146       } else {
147         // Create GEP of the Value
148         Function *F = I.getParent()->getParent();
149         Value *Idx = Constant::getIntegerValue(
150           Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
151         // Insert GEP at the entry to make it dominate all uses
152         PtrI = GetElementPtrInst::Create(
153           Ptr->getType()->getPointerElementType(), Ptr,
154           ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
155       }
156       I.replaceUsesOfWith(Ptr, PtrI);
157     }
158   }
159 
160   if (PtrI) {
161     setUniformMetadata(PtrI);
162     if (NotClobbered)
163       setNoClobberMetadata(PtrI);
164   }
165 }
166 
167 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
168   return false;
169 }
170 
171 bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
172   if (skipFunction(F))
173     return false;
174 
175   DA  = &getAnalysis<LegacyDivergenceAnalysis>();
176   MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
177   LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
178   isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
179 
180   visit(F);
181   noClobberClones.clear();
182   return true;
183 }
184 
185 FunctionPass *
186 llvm::createAMDGPUAnnotateUniformValues() {
187   return new AMDGPUAnnotateUniformValues();
188 }
189