10b57cec5SDimitry Andric //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This pass adds amdgpu.uniform metadata to IR values so this information
110b57cec5SDimitry Andric /// can be used during instruction selection.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AMDGPU.h"
165ffd83dbSDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
170b57cec5SDimitry Andric #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
18*5f7ddb14SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
190b57cec5SDimitry Andric #include "llvm/IR/InstVisitor.h"
20480093f4SDimitry Andric #include "llvm/InitializePasses.h"
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-uniform"
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric using namespace llvm;
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric namespace {
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric class AMDGPUAnnotateUniformValues : public FunctionPass,
290b57cec5SDimitry Andric                        public InstVisitor<AMDGPUAnnotateUniformValues> {
300b57cec5SDimitry Andric   LegacyDivergenceAnalysis *DA;
31*5f7ddb14SDimitry Andric   MemorySSA *MSSA;
320b57cec5SDimitry Andric   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
335ffd83dbSDimitry Andric   bool isEntryFunc;
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric public:
360b57cec5SDimitry Andric   static char ID;
AMDGPUAnnotateUniformValues()370b57cec5SDimitry Andric   AMDGPUAnnotateUniformValues() :
380b57cec5SDimitry Andric     FunctionPass(ID) { }
390b57cec5SDimitry Andric   bool doInitialization(Module &M) override;
400b57cec5SDimitry Andric   bool runOnFunction(Function &F) override;
getPassName() const410b57cec5SDimitry Andric   StringRef getPassName() const override {
420b57cec5SDimitry Andric     return "AMDGPU Annotate Uniform Values";
430b57cec5SDimitry Andric   }
getAnalysisUsage(AnalysisUsage & AU) const440b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
450b57cec5SDimitry Andric     AU.addRequired<LegacyDivergenceAnalysis>();
46*5f7ddb14SDimitry Andric     AU.addRequired<MemorySSAWrapperPass>();
470b57cec5SDimitry Andric     AU.setPreservesAll();
480b57cec5SDimitry Andric  }
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric   void visitBranchInst(BranchInst &I);
510b57cec5SDimitry Andric   void visitLoadInst(LoadInst &I);
520b57cec5SDimitry Andric   bool isClobberedInFunction(LoadInst * Load);
530b57cec5SDimitry Andric };
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric } // End anonymous namespace
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
580b57cec5SDimitry Andric                       "Add AMDGPU uniform metadata", false, false)
590b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
60*5f7ddb14SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
610b57cec5SDimitry Andric INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
620b57cec5SDimitry Andric                     "Add AMDGPU uniform metadata", false, false)
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric char AMDGPUAnnotateUniformValues::ID = 0;
650b57cec5SDimitry Andric 
setUniformMetadata(Instruction * I)660b57cec5SDimitry Andric static void setUniformMetadata(Instruction *I) {
670b57cec5SDimitry Andric   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
680b57cec5SDimitry Andric }
setNoClobberMetadata(Instruction * I)690b57cec5SDimitry Andric static void setNoClobberMetadata(Instruction *I) {
700b57cec5SDimitry Andric   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
710b57cec5SDimitry Andric }
720b57cec5SDimitry Andric 
isClobberedInFunction(LoadInst * Load)730b57cec5SDimitry Andric bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
74*5f7ddb14SDimitry Andric   const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(Load);
75*5f7ddb14SDimitry Andric   return !MSSA->isLiveOnEntryDef(MA);
760b57cec5SDimitry Andric }
770b57cec5SDimitry Andric 
visitBranchInst(BranchInst & I)780b57cec5SDimitry Andric void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
790b57cec5SDimitry Andric   if (DA->isUniform(&I))
80*5f7ddb14SDimitry Andric     setUniformMetadata(&I);
810b57cec5SDimitry Andric }
820b57cec5SDimitry Andric 
visitLoadInst(LoadInst & I)830b57cec5SDimitry Andric void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
840b57cec5SDimitry Andric   Value *Ptr = I.getPointerOperand();
850b57cec5SDimitry Andric   if (!DA->isUniform(Ptr))
860b57cec5SDimitry Andric     return;
870b57cec5SDimitry Andric   auto isGlobalLoad = [&](LoadInst &Load)->bool {
880b57cec5SDimitry Andric     return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
890b57cec5SDimitry Andric   };
905ffd83dbSDimitry Andric   // We're tracking up to the Function boundaries, and cannot go beyond because
915ffd83dbSDimitry Andric   // of FunctionPass restrictions. We can ensure that is memory not clobbered
925ffd83dbSDimitry Andric   // for memory operations that are live in to entry points only.
930b57cec5SDimitry Andric   Instruction *PtrI = dyn_cast<Instruction>(Ptr);
941106035dSDimitry Andric 
951106035dSDimitry Andric   if (!isEntryFunc) {
961106035dSDimitry Andric     if (PtrI)
971106035dSDimitry Andric       setUniformMetadata(PtrI);
981106035dSDimitry Andric     return;
991106035dSDimitry Andric   }
1001106035dSDimitry Andric 
1011106035dSDimitry Andric   bool NotClobbered = false;
102af732203SDimitry Andric   bool GlobalLoad = isGlobalLoad(I);
1031106035dSDimitry Andric   if (PtrI)
104af732203SDimitry Andric     NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
1051106035dSDimitry Andric   else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
106af732203SDimitry Andric     if (GlobalLoad && !isClobberedInFunction(&I)) {
1071106035dSDimitry Andric       NotClobbered = true;
1080b57cec5SDimitry Andric       // Lookup for the existing GEP
1090b57cec5SDimitry Andric       if (noClobberClones.count(Ptr)) {
1100b57cec5SDimitry Andric         PtrI = noClobberClones[Ptr];
1110b57cec5SDimitry Andric       } else {
1120b57cec5SDimitry Andric         // Create GEP of the Value
1130b57cec5SDimitry Andric         Function *F = I.getParent()->getParent();
1140b57cec5SDimitry Andric         Value *Idx = Constant::getIntegerValue(
1150b57cec5SDimitry Andric           Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
1160b57cec5SDimitry Andric         // Insert GEP at the entry to make it dominate all uses
117*5f7ddb14SDimitry Andric         PtrI = GetElementPtrInst::Create(I.getType(), Ptr,
118*5f7ddb14SDimitry Andric                                          ArrayRef<Value *>(Idx), Twine(""),
119*5f7ddb14SDimitry Andric                                          F->getEntryBlock().getFirstNonPHI());
1200b57cec5SDimitry Andric       }
1210b57cec5SDimitry Andric       I.replaceUsesOfWith(Ptr, PtrI);
1220b57cec5SDimitry Andric     }
1230b57cec5SDimitry Andric   }
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric   if (PtrI) {
1260b57cec5SDimitry Andric     setUniformMetadata(PtrI);
1270b57cec5SDimitry Andric     if (NotClobbered)
1280b57cec5SDimitry Andric       setNoClobberMetadata(PtrI);
1290b57cec5SDimitry Andric   }
1300b57cec5SDimitry Andric }
1310b57cec5SDimitry Andric 
doInitialization(Module & M)1320b57cec5SDimitry Andric bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
1330b57cec5SDimitry Andric   return false;
1340b57cec5SDimitry Andric }
1350b57cec5SDimitry Andric 
runOnFunction(Function & F)1360b57cec5SDimitry Andric bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
1370b57cec5SDimitry Andric   if (skipFunction(F))
1380b57cec5SDimitry Andric     return false;
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric   DA = &getAnalysis<LegacyDivergenceAnalysis>();
141*5f7ddb14SDimitry Andric   MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
1425ffd83dbSDimitry Andric   isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric   visit(F);
1450b57cec5SDimitry Andric   noClobberClones.clear();
1460b57cec5SDimitry Andric   return true;
1470b57cec5SDimitry Andric }
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric FunctionPass *
createAMDGPUAnnotateUniformValues()1500b57cec5SDimitry Andric llvm::createAMDGPUAnnotateUniformValues() {
1510b57cec5SDimitry Andric   return new AMDGPUAnnotateUniformValues();
1520b57cec5SDimitry Andric }
153