1 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass adds amdgpu.uniform metadata to IR values so this information
11 /// can be used during instruction selection.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
20 #include "llvm/Analysis/MemorySSA.h"
21 #include "llvm/IR/InstVisitor.h"
22 #include "llvm/IR/IntrinsicsAMDGPU.h"
23 #include "llvm/InitializePasses.h"
24 
25 #define DEBUG_TYPE "amdgpu-annotate-uniform"
26 
27 using namespace llvm;
28 
29 namespace {
30 
31 class AMDGPUAnnotateUniformValues : public FunctionPass,
32                        public InstVisitor<AMDGPUAnnotateUniformValues> {
33   LegacyDivergenceAnalysis *DA;
34   MemorySSA *MSSA;
35   AliasAnalysis *AA;
36   bool isEntryFunc;
37 
38 public:
39   static char ID;
40   AMDGPUAnnotateUniformValues() :
41     FunctionPass(ID) { }
42   bool doInitialization(Module &M) override;
43   bool runOnFunction(Function &F) override;
44   StringRef getPassName() const override {
45     return "AMDGPU Annotate Uniform Values";
46   }
47   void getAnalysisUsage(AnalysisUsage &AU) const override {
48     AU.addRequired<LegacyDivergenceAnalysis>();
49     AU.addRequired<MemorySSAWrapperPass>();
50     AU.addRequired<AAResultsWrapperPass>();
51     AU.setPreservesAll();
52  }
53 
54   void visitBranchInst(BranchInst &I);
55   void visitLoadInst(LoadInst &I);
56   bool isClobberedInFunction(LoadInst * Load);
57 };
58 
59 } // End anonymous namespace
60 
61 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
62                       "Add AMDGPU uniform metadata", false, false)
63 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
64 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
65 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
66 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
67                     "Add AMDGPU uniform metadata", false, false)
68 
69 char AMDGPUAnnotateUniformValues::ID = 0;
70 
71 static void setUniformMetadata(Instruction *I) {
72   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
73 }
74 static void setNoClobberMetadata(Instruction *I) {
75   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
76 }
77 
78 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst *Load) {
79   MemorySSAWalker *Walker = MSSA->getWalker();
80   SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
81   SmallSet<MemoryAccess *, 8> Visited;
82   MemoryLocation Loc(MemoryLocation::get(Load));
83 
84   const auto isReallyAClobber = [this, Load](MemoryDef *Def) -> bool {
85     Instruction *DefInst = Def->getMemoryInst();
86     LLVM_DEBUG(dbgs() << "  Def: " << *DefInst << '\n');
87 
88     if (isa<FenceInst>(DefInst))
89       return false;
90 
91     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
92       switch (II->getIntrinsicID()) {
93       case Intrinsic::amdgcn_s_barrier:
94       case Intrinsic::amdgcn_wave_barrier:
95         return false;
96       default:
97         break;
98       }
99     }
100 
101     // Ignore atomics not aliasing with the original load, any atomic is a
102     // universal MemoryDef from MSSA's point of view too, just like a fence.
103     const auto checkNoAlias = [this, Load](auto I) -> bool {
104       return I && AA->isNoAlias(I->getPointerOperand(),
105                                 Load->getPointerOperand());
106     };
107 
108     if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
109         checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
110       return false;
111 
112     return true;
113   };
114 
115   LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
116 
117   // Start with a nearest dominating clobbering access, it will be either
118   // live on entry (nothing to do, load is not clobbered), MemoryDef, or
119   // MemoryPhi if several MemoryDefs can define this memory state. In that
120   // case add all Defs to WorkList and continue going up and checking all
121   // the definitions of this memory location until the root. When all the
122   // defs are exhausted and came to the entry state we have no clobber.
123   // Along the scan ignore barriers and fences which are considered clobbers
124   // by the MemorySSA, but not really writing anything into the memory.
125   while (!WorkList.empty()) {
126     MemoryAccess *MA = WorkList.pop_back_val();
127     if (!Visited.insert(MA).second)
128       continue;
129 
130     if (MSSA->isLiveOnEntryDef(MA))
131       continue;
132 
133     if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
134       if (isReallyAClobber(Def)) {
135         LLVM_DEBUG(dbgs() << "      -> load is clobbered\n");
136         return true;
137       }
138 
139       WorkList.push_back(
140           Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
141       continue;
142     }
143 
144     const MemoryPhi *Phi = cast<MemoryPhi>(MA);
145     for (auto &Use : Phi->incoming_values())
146       WorkList.push_back(cast<MemoryAccess>(&Use));
147   }
148 
149   LLVM_DEBUG(dbgs() << "      -> no clobber\n");
150   return false;
151 }
152 
153 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
154   if (DA->isUniform(&I))
155     setUniformMetadata(&I);
156 }
157 
158 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
159   Value *Ptr = I.getPointerOperand();
160   if (!DA->isUniform(Ptr))
161     return;
162   Instruction *PtrI = dyn_cast<Instruction>(Ptr);
163   if (PtrI)
164     setUniformMetadata(PtrI);
165 
166   // We're tracking up to the Function boundaries, and cannot go beyond because
167   // of FunctionPass restrictions. We can ensure that is memory not clobbered
168   // for memory operations that are live in to entry points only.
169   if (!isEntryFunc)
170     return;
171   bool GlobalLoad = I.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
172   bool NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
173   if (NotClobbered)
174     setNoClobberMetadata(&I);
175 }
176 
177 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
178   return false;
179 }
180 
181 bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
182   if (skipFunction(F))
183     return false;
184 
185   DA = &getAnalysis<LegacyDivergenceAnalysis>();
186   MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
187   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
188   isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
189 
190   visit(F);
191   return true;
192 }
193 
194 FunctionPass *
195 llvm::createAMDGPUAnnotateUniformValues() {
196   return new AMDGPUAnnotateUniformValues();
197 }
198