11c538423SStanislav Mekhanoshin //===- AMDGPUPerfHintAnalysis.cpp - analysis of functions memory traffic --===//
21c538423SStanislav Mekhanoshin //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61c538423SStanislav Mekhanoshin //
71c538423SStanislav Mekhanoshin //===----------------------------------------------------------------------===//
81c538423SStanislav Mekhanoshin //
91c538423SStanislav Mekhanoshin /// \file
101c538423SStanislav Mekhanoshin /// \brief Analyzes if a function potentially memory bound and if a kernel
111c538423SStanislav Mekhanoshin /// kernel may benefit from limiting number of waves to reduce cache thrashing.
121c538423SStanislav Mekhanoshin ///
131c538423SStanislav Mekhanoshin //===----------------------------------------------------------------------===//
141c538423SStanislav Mekhanoshin 
151c538423SStanislav Mekhanoshin #include "AMDGPU.h"
161c538423SStanislav Mekhanoshin #include "AMDGPUPerfHintAnalysis.h"
171c538423SStanislav Mekhanoshin #include "Utils/AMDGPUBaseInfo.h"
181c538423SStanislav Mekhanoshin #include "llvm/ADT/SmallSet.h"
191c538423SStanislav Mekhanoshin #include "llvm/ADT/Statistic.h"
20e7e23e3eSMatt Arsenault #include "llvm/Analysis/CallGraph.h"
211c538423SStanislav Mekhanoshin #include "llvm/Analysis/ValueTracking.h"
221c538423SStanislav Mekhanoshin #include "llvm/CodeGen/TargetLowering.h"
231c538423SStanislav Mekhanoshin #include "llvm/CodeGen/TargetPassConfig.h"
241c538423SStanislav Mekhanoshin #include "llvm/CodeGen/TargetSubtargetInfo.h"
251c538423SStanislav Mekhanoshin #include "llvm/IR/Instructions.h"
2699142003SNikita Popov #include "llvm/IR/IntrinsicInst.h"
271c538423SStanislav Mekhanoshin #include "llvm/Support/CommandLine.h"
2871bed820SSimon Pilgrim #include "llvm/Target/TargetMachine.h"
291c538423SStanislav Mekhanoshin 
301c538423SStanislav Mekhanoshin using namespace llvm;
311c538423SStanislav Mekhanoshin 
321c538423SStanislav Mekhanoshin #define DEBUG_TYPE "amdgpu-perf-hint"
331c538423SStanislav Mekhanoshin 
341c538423SStanislav Mekhanoshin static cl::opt<unsigned>
351c538423SStanislav Mekhanoshin     MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden,
361c538423SStanislav Mekhanoshin                    cl::desc("Function mem bound threshold in %"));
371c538423SStanislav Mekhanoshin 
381c538423SStanislav Mekhanoshin static cl::opt<unsigned>
391c538423SStanislav Mekhanoshin     LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden,
401c538423SStanislav Mekhanoshin                     cl::desc("Kernel limit wave threshold in %"));
411c538423SStanislav Mekhanoshin 
421c538423SStanislav Mekhanoshin static cl::opt<unsigned>
431c538423SStanislav Mekhanoshin     IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden,
441c538423SStanislav Mekhanoshin              cl::desc("Indirect access memory instruction weight"));
451c538423SStanislav Mekhanoshin 
461c538423SStanislav Mekhanoshin static cl::opt<unsigned>
471c538423SStanislav Mekhanoshin     LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden,
481c538423SStanislav Mekhanoshin              cl::desc("Large stride memory access weight"));
491c538423SStanislav Mekhanoshin 
501c538423SStanislav Mekhanoshin static cl::opt<unsigned>
511c538423SStanislav Mekhanoshin     LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden,
521c538423SStanislav Mekhanoshin                       cl::desc("Large stride memory access threshold"));
531c538423SStanislav Mekhanoshin 
541c538423SStanislav Mekhanoshin STATISTIC(NumMemBound, "Number of functions marked as memory bound");
551c538423SStanislav Mekhanoshin STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");
561c538423SStanislav Mekhanoshin 
571c538423SStanislav Mekhanoshin char llvm::AMDGPUPerfHintAnalysis::ID = 0;
581c538423SStanislav Mekhanoshin char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID;
591c538423SStanislav Mekhanoshin 
601c538423SStanislav Mekhanoshin INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE,
611c538423SStanislav Mekhanoshin                 "Analysis if a function is memory bound", true, true)
621c538423SStanislav Mekhanoshin 
631c538423SStanislav Mekhanoshin namespace {
641c538423SStanislav Mekhanoshin 
651c538423SStanislav Mekhanoshin struct AMDGPUPerfHint {
661c538423SStanislav Mekhanoshin   friend AMDGPUPerfHintAnalysis;
671c538423SStanislav Mekhanoshin 
681c538423SStanislav Mekhanoshin public:
AMDGPUPerfHint__anon2ff3a9050111::AMDGPUPerfHint691c538423SStanislav Mekhanoshin   AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
701c538423SStanislav Mekhanoshin                  const TargetLowering *TLI_)
711c538423SStanislav Mekhanoshin       : FIM(FIM_), DL(nullptr), TLI(TLI_) {}
721c538423SStanislav Mekhanoshin 
73e7e23e3eSMatt Arsenault   bool runOnFunction(Function &F);
741c538423SStanislav Mekhanoshin 
751c538423SStanislav Mekhanoshin private:
761c538423SStanislav Mekhanoshin   struct MemAccessInfo {
77deac0ac5SKazu Hirata     const Value *V = nullptr;
78deac0ac5SKazu Hirata     const Value *Base = nullptr;
79deac0ac5SKazu Hirata     int64_t Offset = 0;
80deac0ac5SKazu Hirata     MemAccessInfo() = default;
811c538423SStanislav Mekhanoshin     bool isLargeStride(MemAccessInfo &Reference) const;
821c538423SStanislav Mekhanoshin #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print__anon2ff3a9050111::AMDGPUPerfHint::MemAccessInfo831c538423SStanislav Mekhanoshin     Printable print() const {
841c538423SStanislav Mekhanoshin       return Printable([this](raw_ostream &OS) {
851c538423SStanislav Mekhanoshin         OS << "Value: " << *V << '\n'
861c538423SStanislav Mekhanoshin            << "Base: " << *Base << " Offset: " << Offset << '\n';
871c538423SStanislav Mekhanoshin       });
881c538423SStanislav Mekhanoshin     }
891c538423SStanislav Mekhanoshin #endif
901c538423SStanislav Mekhanoshin   };
911c538423SStanislav Mekhanoshin 
921c538423SStanislav Mekhanoshin   MemAccessInfo makeMemAccessInfo(Instruction *) const;
931c538423SStanislav Mekhanoshin 
941c538423SStanislav Mekhanoshin   MemAccessInfo LastAccess; // Last memory access info
951c538423SStanislav Mekhanoshin 
961c538423SStanislav Mekhanoshin   AMDGPUPerfHintAnalysis::FuncInfoMap &FIM;
971c538423SStanislav Mekhanoshin 
981c538423SStanislav Mekhanoshin   const DataLayout *DL;
991c538423SStanislav Mekhanoshin 
1001c538423SStanislav Mekhanoshin   const TargetLowering *TLI;
1011c538423SStanislav Mekhanoshin 
102e7e23e3eSMatt Arsenault   AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
1031c538423SStanislav Mekhanoshin   static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
1041c538423SStanislav Mekhanoshin   static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
1051c538423SStanislav Mekhanoshin 
1061c538423SStanislav Mekhanoshin   bool isIndirectAccess(const Instruction *Inst) const;
1071c538423SStanislav Mekhanoshin 
1081c538423SStanislav Mekhanoshin   /// Check if the instruction is large stride.
1091c538423SStanislav Mekhanoshin   /// The purpose is to identify memory access pattern like:
1101c538423SStanislav Mekhanoshin   /// x = a[i];
1111c538423SStanislav Mekhanoshin   /// y = a[i+1000];
1121c538423SStanislav Mekhanoshin   /// z = a[i+2000];
1131c538423SStanislav Mekhanoshin   /// In the above example, the second and third memory access will be marked
1141c538423SStanislav Mekhanoshin   /// large stride memory access.
1151c538423SStanislav Mekhanoshin   bool isLargeStride(const Instruction *Inst);
1161c538423SStanislav Mekhanoshin 
1171c538423SStanislav Mekhanoshin   bool isGlobalAddr(const Value *V) const;
1181c538423SStanislav Mekhanoshin   bool isLocalAddr(const Value *V) const;
119*9fa425c1SAbinav Puthan Purayil   bool isGlobalLoadUsedInBB(const Instruction &) const;
1201c538423SStanislav Mekhanoshin };
1211c538423SStanislav Mekhanoshin 
getMemoryInstrPtrAndType(const Instruction * Inst)122220815a9SNikita Popov static std::pair<const Value *, const Type *> getMemoryInstrPtrAndType(
123220815a9SNikita Popov     const Instruction *Inst) {
124220815a9SNikita Popov   if (auto LI = dyn_cast<LoadInst>(Inst))
125220815a9SNikita Popov     return {LI->getPointerOperand(), LI->getType()};
126220815a9SNikita Popov   if (auto SI = dyn_cast<StoreInst>(Inst))
127220815a9SNikita Popov     return {SI->getPointerOperand(), SI->getValueOperand()->getType()};
128220815a9SNikita Popov   if (auto AI = dyn_cast<AtomicCmpXchgInst>(Inst))
129220815a9SNikita Popov     return {AI->getPointerOperand(), AI->getCompareOperand()->getType()};
130220815a9SNikita Popov   if (auto AI = dyn_cast<AtomicRMWInst>(Inst))
131220815a9SNikita Popov     return {AI->getPointerOperand(), AI->getValOperand()->getType()};
132220815a9SNikita Popov   if (auto MI = dyn_cast<AnyMemIntrinsic>(Inst))
133220815a9SNikita Popov     return {MI->getRawDest(), Type::getInt8Ty(MI->getContext())};
1341c538423SStanislav Mekhanoshin 
135220815a9SNikita Popov   return {nullptr, nullptr};
1361c538423SStanislav Mekhanoshin }
1371c538423SStanislav Mekhanoshin 
isIndirectAccess(const Instruction * Inst) const1381c538423SStanislav Mekhanoshin bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
1391c538423SStanislav Mekhanoshin   LLVM_DEBUG(dbgs() << "[isIndirectAccess] " << *Inst << '\n');
1401c538423SStanislav Mekhanoshin   SmallSet<const Value *, 32> WorkSet;
1411c538423SStanislav Mekhanoshin   SmallSet<const Value *, 32> Visited;
142220815a9SNikita Popov   if (const Value *MO = getMemoryInstrPtrAndType(Inst).first) {
1431c538423SStanislav Mekhanoshin     if (isGlobalAddr(MO))
1441c538423SStanislav Mekhanoshin       WorkSet.insert(MO);
1451c538423SStanislav Mekhanoshin   }
1461c538423SStanislav Mekhanoshin 
1471c538423SStanislav Mekhanoshin   while (!WorkSet.empty()) {
1481c538423SStanislav Mekhanoshin     const Value *V = *WorkSet.begin();
1491c538423SStanislav Mekhanoshin     WorkSet.erase(*WorkSet.begin());
1501c538423SStanislav Mekhanoshin     if (!Visited.insert(V).second)
1511c538423SStanislav Mekhanoshin       continue;
1521c538423SStanislav Mekhanoshin     LLVM_DEBUG(dbgs() << "  check: " << *V << '\n');
1531c538423SStanislav Mekhanoshin 
1541c538423SStanislav Mekhanoshin     if (auto LD = dyn_cast<LoadInst>(V)) {
1551c538423SStanislav Mekhanoshin       auto M = LD->getPointerOperand();
156c246b7bdSJay Foad       if (isGlobalAddr(M)) {
1571c538423SStanislav Mekhanoshin         LLVM_DEBUG(dbgs() << "    is IA\n");
1581c538423SStanislav Mekhanoshin         return true;
1591c538423SStanislav Mekhanoshin       }
1601c538423SStanislav Mekhanoshin       continue;
1611c538423SStanislav Mekhanoshin     }
1621c538423SStanislav Mekhanoshin 
1631c538423SStanislav Mekhanoshin     if (auto GEP = dyn_cast<GetElementPtrInst>(V)) {
1641c538423SStanislav Mekhanoshin       auto P = GEP->getPointerOperand();
1651c538423SStanislav Mekhanoshin       WorkSet.insert(P);
1661c538423SStanislav Mekhanoshin       for (unsigned I = 1, E = GEP->getNumIndices() + 1; I != E; ++I)
1671c538423SStanislav Mekhanoshin         WorkSet.insert(GEP->getOperand(I));
1681c538423SStanislav Mekhanoshin       continue;
1691c538423SStanislav Mekhanoshin     }
1701c538423SStanislav Mekhanoshin 
1711c538423SStanislav Mekhanoshin     if (auto U = dyn_cast<UnaryInstruction>(V)) {
1721c538423SStanislav Mekhanoshin       WorkSet.insert(U->getOperand(0));
1731c538423SStanislav Mekhanoshin       continue;
1741c538423SStanislav Mekhanoshin     }
1751c538423SStanislav Mekhanoshin 
1761c538423SStanislav Mekhanoshin     if (auto BO = dyn_cast<BinaryOperator>(V)) {
1771c538423SStanislav Mekhanoshin       WorkSet.insert(BO->getOperand(0));
1781c538423SStanislav Mekhanoshin       WorkSet.insert(BO->getOperand(1));
1791c538423SStanislav Mekhanoshin       continue;
1801c538423SStanislav Mekhanoshin     }
1811c538423SStanislav Mekhanoshin 
1821c538423SStanislav Mekhanoshin     if (auto S = dyn_cast<SelectInst>(V)) {
1831c538423SStanislav Mekhanoshin       WorkSet.insert(S->getFalseValue());
1841c538423SStanislav Mekhanoshin       WorkSet.insert(S->getTrueValue());
1851c538423SStanislav Mekhanoshin       continue;
1861c538423SStanislav Mekhanoshin     }
1871c538423SStanislav Mekhanoshin 
1881c538423SStanislav Mekhanoshin     if (auto E = dyn_cast<ExtractElementInst>(V)) {
1891c538423SStanislav Mekhanoshin       WorkSet.insert(E->getVectorOperand());
1901c538423SStanislav Mekhanoshin       continue;
1911c538423SStanislav Mekhanoshin     }
1921c538423SStanislav Mekhanoshin 
1931c538423SStanislav Mekhanoshin     LLVM_DEBUG(dbgs() << "    dropped\n");
1941c538423SStanislav Mekhanoshin   }
1951c538423SStanislav Mekhanoshin 
1961c538423SStanislav Mekhanoshin   LLVM_DEBUG(dbgs() << "  is not IA\n");
1971c538423SStanislav Mekhanoshin   return false;
1981c538423SStanislav Mekhanoshin }
1991c538423SStanislav Mekhanoshin 
200*9fa425c1SAbinav Puthan Purayil // Returns true if the global load `I` is used in its own basic block.
isGlobalLoadUsedInBB(const Instruction & I) const201*9fa425c1SAbinav Puthan Purayil bool AMDGPUPerfHint::isGlobalLoadUsedInBB(const Instruction &I) const {
202*9fa425c1SAbinav Puthan Purayil   const auto *Ld = dyn_cast<LoadInst>(&I);
203*9fa425c1SAbinav Puthan Purayil   if (!Ld)
204*9fa425c1SAbinav Puthan Purayil     return false;
205*9fa425c1SAbinav Puthan Purayil   if (!isGlobalAddr(Ld->getPointerOperand()))
206*9fa425c1SAbinav Puthan Purayil     return false;
207*9fa425c1SAbinav Puthan Purayil 
208*9fa425c1SAbinav Puthan Purayil   for (const User *Usr : Ld->users()) {
209*9fa425c1SAbinav Puthan Purayil     if (const Instruction *UsrInst = dyn_cast<Instruction>(Usr)) {
210*9fa425c1SAbinav Puthan Purayil       if (UsrInst->getParent() == I.getParent())
211*9fa425c1SAbinav Puthan Purayil         return true;
212*9fa425c1SAbinav Puthan Purayil     }
213*9fa425c1SAbinav Puthan Purayil   }
214*9fa425c1SAbinav Puthan Purayil 
215*9fa425c1SAbinav Puthan Purayil   return false;
216*9fa425c1SAbinav Puthan Purayil }
217*9fa425c1SAbinav Puthan Purayil 
visit(const Function & F)218e7e23e3eSMatt Arsenault AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
219e7e23e3eSMatt Arsenault   AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F];
2201c538423SStanislav Mekhanoshin 
2211c538423SStanislav Mekhanoshin   LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
2221c538423SStanislav Mekhanoshin 
2231c538423SStanislav Mekhanoshin   for (auto &B : F) {
2241c538423SStanislav Mekhanoshin     LastAccess = MemAccessInfo();
225*9fa425c1SAbinav Puthan Purayil     unsigned UsedGlobalLoadsInBB = 0;
2261c538423SStanislav Mekhanoshin     for (auto &I : B) {
227220815a9SNikita Popov       if (const Type *Ty = getMemoryInstrPtrAndType(&I).second) {
228220815a9SNikita Popov         unsigned Size = divideCeil(Ty->getPrimitiveSizeInBits(), 32);
229*9fa425c1SAbinav Puthan Purayil         // TODO: Check if the global load and its user are close to each other
230*9fa425c1SAbinav Puthan Purayil         // instead (Or do this analysis in GCNSchedStrategy?).
231*9fa425c1SAbinav Puthan Purayil         if (isGlobalLoadUsedInBB(I))
232*9fa425c1SAbinav Puthan Purayil           UsedGlobalLoadsInBB += Size;
2331c538423SStanislav Mekhanoshin         if (isIndirectAccess(&I))
234a397c1c8SStanislav Mekhanoshin           FI.IAMInstCost += Size;
2351c538423SStanislav Mekhanoshin         if (isLargeStride(&I))
236a397c1c8SStanislav Mekhanoshin           FI.LSMInstCost += Size;
237a397c1c8SStanislav Mekhanoshin         FI.MemInstCost += Size;
238a397c1c8SStanislav Mekhanoshin         FI.InstCost += Size;
2391c538423SStanislav Mekhanoshin         continue;
2401c538423SStanislav Mekhanoshin       }
241447e2c30SMircea Trofin       if (auto *CB = dyn_cast<CallBase>(&I)) {
242447e2c30SMircea Trofin         Function *Callee = CB->getCalledFunction();
2431c538423SStanislav Mekhanoshin         if (!Callee || Callee->isDeclaration()) {
244a397c1c8SStanislav Mekhanoshin           ++FI.InstCost;
2451c538423SStanislav Mekhanoshin           continue;
2461c538423SStanislav Mekhanoshin         }
2471c538423SStanislav Mekhanoshin         if (&F == Callee) // Handle immediate recursion
2481c538423SStanislav Mekhanoshin           continue;
2491c538423SStanislav Mekhanoshin 
2507fc1cee0SStanislav Mekhanoshin         auto Loc = FIM.find(Callee);
2515e9610a3SMatt Arsenault         if (Loc == FIM.end())
2525e9610a3SMatt Arsenault           continue;
2531c538423SStanislav Mekhanoshin 
254a397c1c8SStanislav Mekhanoshin         FI.MemInstCost += Loc->second.MemInstCost;
255a397c1c8SStanislav Mekhanoshin         FI.InstCost += Loc->second.InstCost;
256a397c1c8SStanislav Mekhanoshin         FI.IAMInstCost += Loc->second.IAMInstCost;
257a397c1c8SStanislav Mekhanoshin         FI.LSMInstCost += Loc->second.LSMInstCost;
2581c538423SStanislav Mekhanoshin       } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
2591c538423SStanislav Mekhanoshin         TargetLoweringBase::AddrMode AM;
2601c538423SStanislav Mekhanoshin         auto *Ptr = GetPointerBaseWithConstantOffset(GEP, AM.BaseOffs, *DL);
2611c538423SStanislav Mekhanoshin         AM.BaseGV = dyn_cast_or_null<GlobalValue>(const_cast<Value *>(Ptr));
2621c538423SStanislav Mekhanoshin         AM.HasBaseReg = !AM.BaseGV;
2631c538423SStanislav Mekhanoshin         if (TLI->isLegalAddressingMode(*DL, AM, GEP->getResultElementType(),
2641c538423SStanislav Mekhanoshin                                        GEP->getPointerAddressSpace()))
2651c538423SStanislav Mekhanoshin           // Offset will likely be folded into load or store
2661c538423SStanislav Mekhanoshin           continue;
267a397c1c8SStanislav Mekhanoshin         ++FI.InstCost;
2681c538423SStanislav Mekhanoshin       } else {
269a397c1c8SStanislav Mekhanoshin         ++FI.InstCost;
2701c538423SStanislav Mekhanoshin       }
2711c538423SStanislav Mekhanoshin     }
272*9fa425c1SAbinav Puthan Purayil 
273*9fa425c1SAbinav Puthan Purayil     if (!FI.HasDenseGlobalMemAcc) {
274*9fa425c1SAbinav Puthan Purayil       unsigned GlobalMemAccPercentage = UsedGlobalLoadsInBB * 100 / B.size();
275*9fa425c1SAbinav Puthan Purayil       if (GlobalMemAccPercentage > 50) {
276*9fa425c1SAbinav Puthan Purayil         LLVM_DEBUG(dbgs() << "[HasDenseGlobalMemAcc] Set to true since "
277*9fa425c1SAbinav Puthan Purayil                           << B.getName() << " has " << GlobalMemAccPercentage
278*9fa425c1SAbinav Puthan Purayil                           << "% global memory access\n");
279*9fa425c1SAbinav Puthan Purayil         FI.HasDenseGlobalMemAcc = true;
280*9fa425c1SAbinav Puthan Purayil       }
281*9fa425c1SAbinav Puthan Purayil     }
2821c538423SStanislav Mekhanoshin   }
283e7e23e3eSMatt Arsenault 
284e7e23e3eSMatt Arsenault   return &FI;
2851c538423SStanislav Mekhanoshin }
2861c538423SStanislav Mekhanoshin 
runOnFunction(Function & F)287e7e23e3eSMatt Arsenault bool AMDGPUPerfHint::runOnFunction(Function &F) {
2881c538423SStanislav Mekhanoshin   const Module &M = *F.getParent();
2891c538423SStanislav Mekhanoshin   DL = &M.getDataLayout();
2901c538423SStanislav Mekhanoshin 
291e7e23e3eSMatt Arsenault   if (F.hasFnAttribute("amdgpu-wave-limiter") &&
292e7e23e3eSMatt Arsenault       F.hasFnAttribute("amdgpu-memory-bound"))
293e7e23e3eSMatt Arsenault     return false;
2941c538423SStanislav Mekhanoshin 
295e7e23e3eSMatt Arsenault   const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);
296e7e23e3eSMatt Arsenault 
297a397c1c8SStanislav Mekhanoshin   LLVM_DEBUG(dbgs() << F.getName() << " MemInst cost: " << Info->MemInstCost
2981c538423SStanislav Mekhanoshin                     << '\n'
299a397c1c8SStanislav Mekhanoshin                     << " IAMInst cost: " << Info->IAMInstCost << '\n'
300a397c1c8SStanislav Mekhanoshin                     << " LSMInst cost: " << Info->LSMInstCost << '\n'
301a397c1c8SStanislav Mekhanoshin                     << " TotalInst cost: " << Info->InstCost << '\n');
3021c538423SStanislav Mekhanoshin 
3031822a5ecSJay Foad   bool Changed = false;
3041822a5ecSJay Foad 
305e7e23e3eSMatt Arsenault   if (isMemBound(*Info)) {
3061c538423SStanislav Mekhanoshin     LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
3071c538423SStanislav Mekhanoshin     NumMemBound++;
308e7e23e3eSMatt Arsenault     F.addFnAttr("amdgpu-memory-bound", "true");
3091822a5ecSJay Foad     Changed = true;
3101c538423SStanislav Mekhanoshin   }
3111c538423SStanislav Mekhanoshin 
312e7e23e3eSMatt Arsenault   if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
3131c538423SStanislav Mekhanoshin     LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
3141c538423SStanislav Mekhanoshin     NumLimitWave++;
315e7e23e3eSMatt Arsenault     F.addFnAttr("amdgpu-wave-limiter", "true");
3161822a5ecSJay Foad     Changed = true;
3171c538423SStanislav Mekhanoshin   }
318e7e23e3eSMatt Arsenault 
3191822a5ecSJay Foad   return Changed;
3201c538423SStanislav Mekhanoshin }
3211c538423SStanislav Mekhanoshin 
isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo & FI)3221c538423SStanislav Mekhanoshin bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
323*9fa425c1SAbinav Puthan Purayil   // Reverting optimal scheduling in favour of occupancy with basic block(s)
324*9fa425c1SAbinav Puthan Purayil   // having dense global memory access can potentially hurt performance.
325*9fa425c1SAbinav Puthan Purayil   if (FI.HasDenseGlobalMemAcc)
326*9fa425c1SAbinav Puthan Purayil     return true;
327*9fa425c1SAbinav Puthan Purayil 
328a397c1c8SStanislav Mekhanoshin   return FI.MemInstCost * 100 / FI.InstCost > MemBoundThresh;
3291c538423SStanislav Mekhanoshin }
3301c538423SStanislav Mekhanoshin 
needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo & FI)3311c538423SStanislav Mekhanoshin bool AMDGPUPerfHint::needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
332a397c1c8SStanislav Mekhanoshin   return ((FI.MemInstCost + FI.IAMInstCost * IAWeight +
333a397c1c8SStanislav Mekhanoshin            FI.LSMInstCost * LSWeight) * 100 / FI.InstCost) > LimitWaveThresh;
3341c538423SStanislav Mekhanoshin }
3351c538423SStanislav Mekhanoshin 
isGlobalAddr(const Value * V) const3361c538423SStanislav Mekhanoshin bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {
3371c538423SStanislav Mekhanoshin   if (auto PT = dyn_cast<PointerType>(V->getType())) {
3381c538423SStanislav Mekhanoshin     unsigned As = PT->getAddressSpace();
3391c538423SStanislav Mekhanoshin     // Flat likely points to global too.
3400da6350dSMatt Arsenault     return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;
3411c538423SStanislav Mekhanoshin   }
3421c538423SStanislav Mekhanoshin   return false;
3431c538423SStanislav Mekhanoshin }
3441c538423SStanislav Mekhanoshin 
isLocalAddr(const Value * V) const3451c538423SStanislav Mekhanoshin bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {
3461c538423SStanislav Mekhanoshin   if (auto PT = dyn_cast<PointerType>(V->getType()))
3470da6350dSMatt Arsenault     return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
3481c538423SStanislav Mekhanoshin   return false;
3491c538423SStanislav Mekhanoshin }
3501c538423SStanislav Mekhanoshin 
isLargeStride(const Instruction * Inst)3511c538423SStanislav Mekhanoshin bool AMDGPUPerfHint::isLargeStride(const Instruction *Inst) {
3521c538423SStanislav Mekhanoshin   LLVM_DEBUG(dbgs() << "[isLargeStride] " << *Inst << '\n');
3531c538423SStanislav Mekhanoshin 
3541c538423SStanislav Mekhanoshin   MemAccessInfo MAI = makeMemAccessInfo(const_cast<Instruction *>(Inst));
3551c538423SStanislav Mekhanoshin   bool IsLargeStride = MAI.isLargeStride(LastAccess);
3561c538423SStanislav Mekhanoshin   if (MAI.Base)
3571c538423SStanislav Mekhanoshin     LastAccess = std::move(MAI);
3581c538423SStanislav Mekhanoshin 
3591c538423SStanislav Mekhanoshin   return IsLargeStride;
3601c538423SStanislav Mekhanoshin }
3611c538423SStanislav Mekhanoshin 
3621c538423SStanislav Mekhanoshin AMDGPUPerfHint::MemAccessInfo
makeMemAccessInfo(Instruction * Inst) const3631c538423SStanislav Mekhanoshin AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
3641c538423SStanislav Mekhanoshin   MemAccessInfo MAI;
365220815a9SNikita Popov   const Value *MO = getMemoryInstrPtrAndType(Inst).first;
3661c538423SStanislav Mekhanoshin 
3671c538423SStanislav Mekhanoshin   LLVM_DEBUG(dbgs() << "[isLargeStride] MO: " << *MO << '\n');
3681c538423SStanislav Mekhanoshin   // Do not treat local-addr memory access as large stride.
3691c538423SStanislav Mekhanoshin   if (isLocalAddr(MO))
3701c538423SStanislav Mekhanoshin     return MAI;
3711c538423SStanislav Mekhanoshin 
3721c538423SStanislav Mekhanoshin   MAI.V = MO;
3731c538423SStanislav Mekhanoshin   MAI.Base = GetPointerBaseWithConstantOffset(MO, MAI.Offset, *DL);
3741c538423SStanislav Mekhanoshin   return MAI;
3751c538423SStanislav Mekhanoshin }
3761c538423SStanislav Mekhanoshin 
isLargeStride(MemAccessInfo & Reference) const3771c538423SStanislav Mekhanoshin bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
3781c538423SStanislav Mekhanoshin     MemAccessInfo &Reference) const {
3791c538423SStanislav Mekhanoshin 
3801c538423SStanislav Mekhanoshin   if (!Base || !Reference.Base || Base != Reference.Base)
3811c538423SStanislav Mekhanoshin     return false;
3821c538423SStanislav Mekhanoshin 
3831c538423SStanislav Mekhanoshin   uint64_t Diff = Offset > Reference.Offset ? Offset - Reference.Offset
3841c538423SStanislav Mekhanoshin                                             : Reference.Offset - Offset;
3851c538423SStanislav Mekhanoshin   bool Result = Diff > LargeStrideThresh;
3861c538423SStanislav Mekhanoshin   LLVM_DEBUG(dbgs() << "[isLargeStride compare]\n"
3871c538423SStanislav Mekhanoshin                << print() << "<=>\n"
3881c538423SStanislav Mekhanoshin                << Reference.print() << "Result:" << Result << '\n');
3891c538423SStanislav Mekhanoshin   return Result;
3901c538423SStanislav Mekhanoshin }
3911c538423SStanislav Mekhanoshin } // namespace
3921c538423SStanislav Mekhanoshin 
runOnSCC(CallGraphSCC & SCC)393e7e23e3eSMatt Arsenault bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
3941c538423SStanislav Mekhanoshin   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
3951c538423SStanislav Mekhanoshin   if (!TPC)
3961c538423SStanislav Mekhanoshin     return false;
3971c538423SStanislav Mekhanoshin 
3981c538423SStanislav Mekhanoshin   const TargetMachine &TM = TPC->getTM<TargetMachine>();
3991c538423SStanislav Mekhanoshin 
400e7e23e3eSMatt Arsenault   bool Changed = false;
401e7e23e3eSMatt Arsenault   for (CallGraphNode *I : SCC) {
402e7e23e3eSMatt Arsenault     Function *F = I->getFunction();
403e7e23e3eSMatt Arsenault     if (!F || F->isDeclaration())
404e7e23e3eSMatt Arsenault       continue;
405e7e23e3eSMatt Arsenault 
406e7e23e3eSMatt Arsenault     const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
4071c538423SStanislav Mekhanoshin     AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
408e7e23e3eSMatt Arsenault 
409e7e23e3eSMatt Arsenault     if (Analyzer.runOnFunction(*F))
410e7e23e3eSMatt Arsenault       Changed = true;
411e7e23e3eSMatt Arsenault   }
412e7e23e3eSMatt Arsenault 
413e7e23e3eSMatt Arsenault   return Changed;
4141c538423SStanislav Mekhanoshin }
4151c538423SStanislav Mekhanoshin 
isMemoryBound(const Function * F) const4161c538423SStanislav Mekhanoshin bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
4171c538423SStanislav Mekhanoshin   auto FI = FIM.find(F);
4181c538423SStanislav Mekhanoshin   if (FI == FIM.end())
4191c538423SStanislav Mekhanoshin     return false;
4201c538423SStanislav Mekhanoshin 
4211c538423SStanislav Mekhanoshin   return AMDGPUPerfHint::isMemBound(FI->second);
4221c538423SStanislav Mekhanoshin }
4231c538423SStanislav Mekhanoshin 
needsWaveLimiter(const Function * F) const4241c538423SStanislav Mekhanoshin bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {
4251c538423SStanislav Mekhanoshin   auto FI = FIM.find(F);
4261c538423SStanislav Mekhanoshin   if (FI == FIM.end())
4271c538423SStanislav Mekhanoshin     return false;
4281c538423SStanislav Mekhanoshin 
4291c538423SStanislav Mekhanoshin   return AMDGPUPerfHint::needLimitWave(FI->second);
4301c538423SStanislav Mekhanoshin }
431