10b57cec5SDimitry Andric //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This pass adds amdgpu.uniform metadata to IR values so this information
110b57cec5SDimitry Andric /// can be used during instruction selection.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric
150b57cec5SDimitry Andric #include "AMDGPU.h"
165ffd83dbSDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
170b57cec5SDimitry Andric #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
18*5f7ddb14SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
190b57cec5SDimitry Andric #include "llvm/IR/InstVisitor.h"
20480093f4SDimitry Andric #include "llvm/InitializePasses.h"
210b57cec5SDimitry Andric
220b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-uniform"
230b57cec5SDimitry Andric
240b57cec5SDimitry Andric using namespace llvm;
250b57cec5SDimitry Andric
260b57cec5SDimitry Andric namespace {
270b57cec5SDimitry Andric
280b57cec5SDimitry Andric class AMDGPUAnnotateUniformValues : public FunctionPass,
290b57cec5SDimitry Andric public InstVisitor<AMDGPUAnnotateUniformValues> {
300b57cec5SDimitry Andric LegacyDivergenceAnalysis *DA;
31*5f7ddb14SDimitry Andric MemorySSA *MSSA;
320b57cec5SDimitry Andric DenseMap<Value*, GetElementPtrInst*> noClobberClones;
335ffd83dbSDimitry Andric bool isEntryFunc;
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric public:
360b57cec5SDimitry Andric static char ID;
AMDGPUAnnotateUniformValues()370b57cec5SDimitry Andric AMDGPUAnnotateUniformValues() :
380b57cec5SDimitry Andric FunctionPass(ID) { }
390b57cec5SDimitry Andric bool doInitialization(Module &M) override;
400b57cec5SDimitry Andric bool runOnFunction(Function &F) override;
getPassName() const410b57cec5SDimitry Andric StringRef getPassName() const override {
420b57cec5SDimitry Andric return "AMDGPU Annotate Uniform Values";
430b57cec5SDimitry Andric }
getAnalysisUsage(AnalysisUsage & AU) const440b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
450b57cec5SDimitry Andric AU.addRequired<LegacyDivergenceAnalysis>();
46*5f7ddb14SDimitry Andric AU.addRequired<MemorySSAWrapperPass>();
470b57cec5SDimitry Andric AU.setPreservesAll();
480b57cec5SDimitry Andric }
490b57cec5SDimitry Andric
500b57cec5SDimitry Andric void visitBranchInst(BranchInst &I);
510b57cec5SDimitry Andric void visitLoadInst(LoadInst &I);
520b57cec5SDimitry Andric bool isClobberedInFunction(LoadInst * Load);
530b57cec5SDimitry Andric };
540b57cec5SDimitry Andric
550b57cec5SDimitry Andric } // End anonymous namespace
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
580b57cec5SDimitry Andric "Add AMDGPU uniform metadata", false, false)
590b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
60*5f7ddb14SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
610b57cec5SDimitry Andric INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
620b57cec5SDimitry Andric "Add AMDGPU uniform metadata", false, false)
630b57cec5SDimitry Andric
640b57cec5SDimitry Andric char AMDGPUAnnotateUniformValues::ID = 0;
650b57cec5SDimitry Andric
setUniformMetadata(Instruction * I)660b57cec5SDimitry Andric static void setUniformMetadata(Instruction *I) {
670b57cec5SDimitry Andric I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
680b57cec5SDimitry Andric }
setNoClobberMetadata(Instruction * I)690b57cec5SDimitry Andric static void setNoClobberMetadata(Instruction *I) {
700b57cec5SDimitry Andric I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
710b57cec5SDimitry Andric }
720b57cec5SDimitry Andric
isClobberedInFunction(LoadInst * Load)730b57cec5SDimitry Andric bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
74*5f7ddb14SDimitry Andric const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(Load);
75*5f7ddb14SDimitry Andric return !MSSA->isLiveOnEntryDef(MA);
760b57cec5SDimitry Andric }
770b57cec5SDimitry Andric
visitBranchInst(BranchInst & I)780b57cec5SDimitry Andric void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
790b57cec5SDimitry Andric if (DA->isUniform(&I))
80*5f7ddb14SDimitry Andric setUniformMetadata(&I);
810b57cec5SDimitry Andric }
820b57cec5SDimitry Andric
visitLoadInst(LoadInst & I)830b57cec5SDimitry Andric void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
840b57cec5SDimitry Andric Value *Ptr = I.getPointerOperand();
850b57cec5SDimitry Andric if (!DA->isUniform(Ptr))
860b57cec5SDimitry Andric return;
870b57cec5SDimitry Andric auto isGlobalLoad = [&](LoadInst &Load)->bool {
880b57cec5SDimitry Andric return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
890b57cec5SDimitry Andric };
905ffd83dbSDimitry Andric // We're tracking up to the Function boundaries, and cannot go beyond because
915ffd83dbSDimitry Andric // of FunctionPass restrictions. We can ensure that is memory not clobbered
925ffd83dbSDimitry Andric // for memory operations that are live in to entry points only.
930b57cec5SDimitry Andric Instruction *PtrI = dyn_cast<Instruction>(Ptr);
941106035dSDimitry Andric
951106035dSDimitry Andric if (!isEntryFunc) {
961106035dSDimitry Andric if (PtrI)
971106035dSDimitry Andric setUniformMetadata(PtrI);
981106035dSDimitry Andric return;
991106035dSDimitry Andric }
1001106035dSDimitry Andric
1011106035dSDimitry Andric bool NotClobbered = false;
102af732203SDimitry Andric bool GlobalLoad = isGlobalLoad(I);
1031106035dSDimitry Andric if (PtrI)
104af732203SDimitry Andric NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
1051106035dSDimitry Andric else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
106af732203SDimitry Andric if (GlobalLoad && !isClobberedInFunction(&I)) {
1071106035dSDimitry Andric NotClobbered = true;
1080b57cec5SDimitry Andric // Lookup for the existing GEP
1090b57cec5SDimitry Andric if (noClobberClones.count(Ptr)) {
1100b57cec5SDimitry Andric PtrI = noClobberClones[Ptr];
1110b57cec5SDimitry Andric } else {
1120b57cec5SDimitry Andric // Create GEP of the Value
1130b57cec5SDimitry Andric Function *F = I.getParent()->getParent();
1140b57cec5SDimitry Andric Value *Idx = Constant::getIntegerValue(
1150b57cec5SDimitry Andric Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
1160b57cec5SDimitry Andric // Insert GEP at the entry to make it dominate all uses
117*5f7ddb14SDimitry Andric PtrI = GetElementPtrInst::Create(I.getType(), Ptr,
118*5f7ddb14SDimitry Andric ArrayRef<Value *>(Idx), Twine(""),
119*5f7ddb14SDimitry Andric F->getEntryBlock().getFirstNonPHI());
1200b57cec5SDimitry Andric }
1210b57cec5SDimitry Andric I.replaceUsesOfWith(Ptr, PtrI);
1220b57cec5SDimitry Andric }
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric
1250b57cec5SDimitry Andric if (PtrI) {
1260b57cec5SDimitry Andric setUniformMetadata(PtrI);
1270b57cec5SDimitry Andric if (NotClobbered)
1280b57cec5SDimitry Andric setNoClobberMetadata(PtrI);
1290b57cec5SDimitry Andric }
1300b57cec5SDimitry Andric }
1310b57cec5SDimitry Andric
doInitialization(Module & M)1320b57cec5SDimitry Andric bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
1330b57cec5SDimitry Andric return false;
1340b57cec5SDimitry Andric }
1350b57cec5SDimitry Andric
runOnFunction(Function & F)1360b57cec5SDimitry Andric bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
1370b57cec5SDimitry Andric if (skipFunction(F))
1380b57cec5SDimitry Andric return false;
1390b57cec5SDimitry Andric
1400b57cec5SDimitry Andric DA = &getAnalysis<LegacyDivergenceAnalysis>();
141*5f7ddb14SDimitry Andric MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
1425ffd83dbSDimitry Andric isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
1430b57cec5SDimitry Andric
1440b57cec5SDimitry Andric visit(F);
1450b57cec5SDimitry Andric noClobberClones.clear();
1460b57cec5SDimitry Andric return true;
1470b57cec5SDimitry Andric }
1480b57cec5SDimitry Andric
1490b57cec5SDimitry Andric FunctionPass *
createAMDGPUAnnotateUniformValues()1500b57cec5SDimitry Andric llvm::createAMDGPUAnnotateUniformValues() {
1510b57cec5SDimitry Andric return new AMDGPUAnnotateUniformValues();
1520b57cec5SDimitry Andric }
153