196709823SKuter Dinel //===- AMDGPUAttributor.cpp -----------------------------------------------===//
296709823SKuter Dinel //
396709823SKuter Dinel // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
496709823SKuter Dinel // See https://llvm.org/LICENSE.txt for license information.
596709823SKuter Dinel // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
696709823SKuter Dinel //
796709823SKuter Dinel //===----------------------------------------------------------------------===//
896709823SKuter Dinel //
996709823SKuter Dinel /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
1096709823SKuter Dinel //
1196709823SKuter Dinel //===----------------------------------------------------------------------===//
1296709823SKuter Dinel 
1396709823SKuter Dinel #include "AMDGPU.h"
1496709823SKuter Dinel #include "GCNSubtarget.h"
15d8f99bb6SSameer Sahasrabuddhe #include "Utils/AMDGPUBaseInfo.h"
1696709823SKuter Dinel #include "llvm/CodeGen/TargetPassConfig.h"
1796709823SKuter Dinel #include "llvm/IR/IntrinsicsAMDGPU.h"
1896709823SKuter Dinel #include "llvm/IR/IntrinsicsR600.h"
1996709823SKuter Dinel #include "llvm/Target/TargetMachine.h"
2096709823SKuter Dinel #include "llvm/Transforms/IPO/Attributor.h"
2196709823SKuter Dinel 
2296709823SKuter Dinel #define DEBUG_TYPE "amdgpu-attributor"
2396709823SKuter Dinel 
2496709823SKuter Dinel using namespace llvm;
2596709823SKuter Dinel 
2602a2e46fSSameer Sahasrabuddhe #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
27088cc636SMatt Arsenault 
2802a2e46fSSameer Sahasrabuddhe enum ImplicitArgumentPositions {
2902a2e46fSSameer Sahasrabuddhe   #include "AMDGPUAttributes.def"
3002a2e46fSSameer Sahasrabuddhe   LAST_ARG_POS
31088cc636SMatt Arsenault };
32088cc636SMatt Arsenault 
3302a2e46fSSameer Sahasrabuddhe #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
3402a2e46fSSameer Sahasrabuddhe 
3502a2e46fSSameer Sahasrabuddhe enum ImplicitArgumentMask {
3602a2e46fSSameer Sahasrabuddhe   NOT_IMPLICIT_INPUT = 0,
3702a2e46fSSameer Sahasrabuddhe   #include "AMDGPUAttributes.def"
3802a2e46fSSameer Sahasrabuddhe   ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
3902a2e46fSSameer Sahasrabuddhe };
4002a2e46fSSameer Sahasrabuddhe 
4102a2e46fSSameer Sahasrabuddhe #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
42088cc636SMatt Arsenault static constexpr std::pair<ImplicitArgumentMask,
43088cc636SMatt Arsenault                            StringLiteral> ImplicitAttrs[] = {
4402a2e46fSSameer Sahasrabuddhe  #include "AMDGPUAttributes.def"
45088cc636SMatt Arsenault };
4696709823SKuter Dinel 
4796709823SKuter Dinel // We do not need to note the x workitem or workgroup id because they are always
4896709823SKuter Dinel // initialized.
4996709823SKuter Dinel //
5096709823SKuter Dinel // TODO: We should not add the attributes if the known compile time workgroup
5196709823SKuter Dinel // size is 1 for y/z.
52088cc636SMatt Arsenault static ImplicitArgumentMask
intrinsicToAttrMask(Intrinsic::ID ID,bool & NonKernelOnly,bool & NeedsImplicit,bool HasApertureRegs,bool SupportsGetDoorBellID)530f20a35bSChangpeng Fang intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
540f20a35bSChangpeng Fang                     bool HasApertureRegs, bool SupportsGetDoorBellID) {
550f20a35bSChangpeng Fang   unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion();
5696709823SKuter Dinel   switch (ID) {
5796709823SKuter Dinel   case Intrinsic::amdgcn_workitem_id_x:
5896709823SKuter Dinel     NonKernelOnly = true;
59088cc636SMatt Arsenault     return WORKITEM_ID_X;
6096709823SKuter Dinel   case Intrinsic::amdgcn_workgroup_id_x:
6196709823SKuter Dinel     NonKernelOnly = true;
62088cc636SMatt Arsenault     return WORKGROUP_ID_X;
6396709823SKuter Dinel   case Intrinsic::amdgcn_workitem_id_y:
6496709823SKuter Dinel   case Intrinsic::r600_read_tidig_y:
65088cc636SMatt Arsenault     return WORKITEM_ID_Y;
6696709823SKuter Dinel   case Intrinsic::amdgcn_workitem_id_z:
6796709823SKuter Dinel   case Intrinsic::r600_read_tidig_z:
68088cc636SMatt Arsenault     return WORKITEM_ID_Z;
6996709823SKuter Dinel   case Intrinsic::amdgcn_workgroup_id_y:
7096709823SKuter Dinel   case Intrinsic::r600_read_tgid_y:
71088cc636SMatt Arsenault     return WORKGROUP_ID_Y;
7296709823SKuter Dinel   case Intrinsic::amdgcn_workgroup_id_z:
7396709823SKuter Dinel   case Intrinsic::r600_read_tgid_z:
74088cc636SMatt Arsenault     return WORKGROUP_ID_Z;
753a205977SJon Chesterfield   case Intrinsic::amdgcn_lds_kernel_id:
763a205977SJon Chesterfield     return LDS_KERNEL_ID;
7796709823SKuter Dinel   case Intrinsic::amdgcn_dispatch_ptr:
78088cc636SMatt Arsenault     return DISPATCH_PTR;
7996709823SKuter Dinel   case Intrinsic::amdgcn_dispatch_id:
80088cc636SMatt Arsenault     return DISPATCH_ID;
8196709823SKuter Dinel   case Intrinsic::amdgcn_implicitarg_ptr:
82088cc636SMatt Arsenault     return IMPLICIT_ARG_PTR;
830f20a35bSChangpeng Fang   // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
840f20a35bSChangpeng Fang   // queue_ptr.
8596709823SKuter Dinel   case Intrinsic::amdgcn_queue_ptr:
860f20a35bSChangpeng Fang     NeedsImplicit = (CodeObjectVersion == 5);
870f20a35bSChangpeng Fang     return QUEUE_PTR;
8896709823SKuter Dinel   case Intrinsic::amdgcn_is_shared:
8996709823SKuter Dinel   case Intrinsic::amdgcn_is_private:
900f20a35bSChangpeng Fang     if (HasApertureRegs)
910f20a35bSChangpeng Fang       return NOT_IMPLICIT_INPUT;
920f20a35bSChangpeng Fang     // Under V5, we need implicitarg_ptr + offsets to access private_base or
930f20a35bSChangpeng Fang     // shared_base. For pre-V5, however, need to access them through queue_ptr +
940f20a35bSChangpeng Fang     // offsets.
950f20a35bSChangpeng Fang     return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
9696709823SKuter Dinel   case Intrinsic::trap:
970f20a35bSChangpeng Fang     if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
980f20a35bSChangpeng Fang       return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR;
990f20a35bSChangpeng Fang     NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5.
100088cc636SMatt Arsenault     return QUEUE_PTR;
10196709823SKuter Dinel   default:
102088cc636SMatt Arsenault     return NOT_IMPLICIT_INPUT;
10396709823SKuter Dinel   }
10496709823SKuter Dinel }
10596709823SKuter Dinel 
castRequiresQueuePtr(unsigned SrcAS)10696709823SKuter Dinel static bool castRequiresQueuePtr(unsigned SrcAS) {
10796709823SKuter Dinel   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
10896709823SKuter Dinel }
10996709823SKuter Dinel 
isDSAddress(const Constant * C)11096709823SKuter Dinel static bool isDSAddress(const Constant *C) {
11196709823SKuter Dinel   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
11296709823SKuter Dinel   if (!GV)
11396709823SKuter Dinel     return false;
11496709823SKuter Dinel   unsigned AS = GV->getAddressSpace();
11596709823SKuter Dinel   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
11696709823SKuter Dinel }
11796709823SKuter Dinel 
1180eebe2e3SMatt Arsenault /// Returns true if the function requires the implicit argument be passed
1190eebe2e3SMatt Arsenault /// regardless of the function contents.
funcRequiresHostcallPtr(const Function & F)120d8f99bb6SSameer Sahasrabuddhe static bool funcRequiresHostcallPtr(const Function &F) {
1210eebe2e3SMatt Arsenault   // Sanitizers require the hostcall buffer passed in the implicit arguments.
1220eebe2e3SMatt Arsenault   return F.hasFnAttribute(Attribute::SanitizeAddress) ||
1230eebe2e3SMatt Arsenault          F.hasFnAttribute(Attribute::SanitizeThread) ||
1240eebe2e3SMatt Arsenault          F.hasFnAttribute(Attribute::SanitizeMemory) ||
1250eebe2e3SMatt Arsenault          F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1260eebe2e3SMatt Arsenault          F.hasFnAttribute(Attribute::SanitizeMemTag);
1270eebe2e3SMatt Arsenault }
1280eebe2e3SMatt Arsenault 
1299b8b1645SBenjamin Kramer namespace {
13096709823SKuter Dinel class AMDGPUInformationCache : public InformationCache {
13196709823SKuter Dinel public:
AMDGPUInformationCache(const Module & M,AnalysisGetter & AG,BumpPtrAllocator & Allocator,SetVector<Function * > * CGSCC,TargetMachine & TM)13296709823SKuter Dinel   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
13396709823SKuter Dinel                          BumpPtrAllocator &Allocator,
13496709823SKuter Dinel                          SetVector<Function *> *CGSCC, TargetMachine &TM)
13596709823SKuter Dinel       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
13696709823SKuter Dinel   TargetMachine &TM;
13796709823SKuter Dinel 
13896709823SKuter Dinel   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
13996709823SKuter Dinel 
14096709823SKuter Dinel   /// Check if the subtarget has aperture regs.
hasApertureRegs(Function & F)14196709823SKuter Dinel   bool hasApertureRegs(Function &F) {
14296709823SKuter Dinel     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
14396709823SKuter Dinel     return ST.hasApertureRegs();
14496709823SKuter Dinel   }
14596709823SKuter Dinel 
1460f20a35bSChangpeng Fang   /// Check if the subtarget supports GetDoorbellID.
supportsGetDoorbellID(Function & F)1470f20a35bSChangpeng Fang   bool supportsGetDoorbellID(Function &F) {
1480f20a35bSChangpeng Fang     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
1490f20a35bSChangpeng Fang     return ST.supportsGetDoorbellID();
1500f20a35bSChangpeng Fang   }
1510f20a35bSChangpeng Fang 
getFlatWorkGroupSizes(const Function & F)152ec57b375SMatt Arsenault   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
153ec57b375SMatt Arsenault     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
154ec57b375SMatt Arsenault     return ST.getFlatWorkGroupSizes(F);
155ec57b375SMatt Arsenault   }
156ec57b375SMatt Arsenault 
157ec57b375SMatt Arsenault   std::pair<unsigned, unsigned>
getMaximumFlatWorkGroupRange(const Function & F)158ec57b375SMatt Arsenault   getMaximumFlatWorkGroupRange(const Function &F) {
159ec57b375SMatt Arsenault     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
160ec57b375SMatt Arsenault     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
161ec57b375SMatt Arsenault   }
162ec57b375SMatt Arsenault 
16396709823SKuter Dinel private:
16402a2e46fSSameer Sahasrabuddhe   /// Check if the ConstantExpr \p CE requires the queue pointer.
visitConstExpr(const ConstantExpr * CE)16596709823SKuter Dinel   static bool visitConstExpr(const ConstantExpr *CE) {
16696709823SKuter Dinel     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
16796709823SKuter Dinel       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
16896709823SKuter Dinel       return castRequiresQueuePtr(SrcAS);
16996709823SKuter Dinel     }
17096709823SKuter Dinel     return false;
17196709823SKuter Dinel   }
17296709823SKuter Dinel 
17396709823SKuter Dinel   /// Get the constant access bitmap for \p C.
getConstantAccess(const Constant * C)17496709823SKuter Dinel   uint8_t getConstantAccess(const Constant *C) {
17596709823SKuter Dinel     auto It = ConstantStatus.find(C);
17696709823SKuter Dinel     if (It != ConstantStatus.end())
17796709823SKuter Dinel       return It->second;
17896709823SKuter Dinel 
17996709823SKuter Dinel     uint8_t Result = 0;
18096709823SKuter Dinel     if (isDSAddress(C))
18196709823SKuter Dinel       Result = DS_GLOBAL;
18296709823SKuter Dinel 
18396709823SKuter Dinel     if (const auto *CE = dyn_cast<ConstantExpr>(C))
18496709823SKuter Dinel       if (visitConstExpr(CE))
18596709823SKuter Dinel         Result |= ADDR_SPACE_CAST;
18696709823SKuter Dinel 
18796709823SKuter Dinel     for (const Use &U : C->operands()) {
18896709823SKuter Dinel       const auto *OpC = dyn_cast<Constant>(U);
18996709823SKuter Dinel       if (!OpC)
19096709823SKuter Dinel         continue;
19196709823SKuter Dinel 
19296709823SKuter Dinel       Result |= getConstantAccess(OpC);
19396709823SKuter Dinel     }
19496709823SKuter Dinel     return Result;
19596709823SKuter Dinel   }
19696709823SKuter Dinel 
19796709823SKuter Dinel public:
19802a2e46fSSameer Sahasrabuddhe   /// Returns true if \p Fn needs the queue pointer because of \p C.
needsQueuePtr(const Constant * C,Function & Fn)19996709823SKuter Dinel   bool needsQueuePtr(const Constant *C, Function &Fn) {
20096709823SKuter Dinel     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
20196709823SKuter Dinel     bool HasAperture = hasApertureRegs(Fn);
20296709823SKuter Dinel 
20396709823SKuter Dinel     // No need to explore the constants.
20496709823SKuter Dinel     if (!IsNonEntryFunc && HasAperture)
20596709823SKuter Dinel       return false;
20696709823SKuter Dinel 
20796709823SKuter Dinel     uint8_t Access = getConstantAccess(C);
20896709823SKuter Dinel 
20996709823SKuter Dinel     // We need to trap on DS globals in non-entry functions.
21096709823SKuter Dinel     if (IsNonEntryFunc && (Access & DS_GLOBAL))
21196709823SKuter Dinel       return true;
21296709823SKuter Dinel 
21396709823SKuter Dinel     return !HasAperture && (Access & ADDR_SPACE_CAST);
21496709823SKuter Dinel   }
21596709823SKuter Dinel 
21696709823SKuter Dinel private:
21702a2e46fSSameer Sahasrabuddhe   /// Used to determine if the Constant needs the queue pointer.
21896709823SKuter Dinel   DenseMap<const Constant *, uint8_t> ConstantStatus;
21996709823SKuter Dinel };
22096709823SKuter Dinel 
221088cc636SMatt Arsenault struct AAAMDAttributes : public StateWrapper<
222088cc636SMatt Arsenault   BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
223088cc636SMatt Arsenault   using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
224088cc636SMatt Arsenault                             AbstractAttribute>;
225088cc636SMatt Arsenault 
AAAMDAttributes__anon4805a6110111::AAAMDAttributes22696709823SKuter Dinel   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
22796709823SKuter Dinel 
22896709823SKuter Dinel   /// Create an abstract attribute view for the position \p IRP.
22996709823SKuter Dinel   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
23096709823SKuter Dinel                                             Attributor &A);
23196709823SKuter Dinel 
23296709823SKuter Dinel   /// See AbstractAttribute::getName().
getName__anon4805a6110111::AAAMDAttributes23396709823SKuter Dinel   const std::string getName() const override { return "AAAMDAttributes"; }
23496709823SKuter Dinel 
23596709823SKuter Dinel   /// See AbstractAttribute::getIdAddr().
getIdAddr__anon4805a6110111::AAAMDAttributes23696709823SKuter Dinel   const char *getIdAddr() const override { return &ID; }
23796709823SKuter Dinel 
23896709823SKuter Dinel   /// This function should return true if the type of the \p AA is
23996709823SKuter Dinel   /// AAAMDAttributes.
classof__anon4805a6110111::AAAMDAttributes24096709823SKuter Dinel   static bool classof(const AbstractAttribute *AA) {
24196709823SKuter Dinel     return (AA->getIdAddr() == &ID);
24296709823SKuter Dinel   }
24396709823SKuter Dinel 
24496709823SKuter Dinel   /// Unique ID (due to the unique address)
24596709823SKuter Dinel   static const char ID;
24696709823SKuter Dinel };
24796709823SKuter Dinel const char AAAMDAttributes::ID = 0;
24896709823SKuter Dinel 
249f1217420SMatt Arsenault struct AAUniformWorkGroupSize
25096709823SKuter Dinel     : public StateWrapper<BooleanState, AbstractAttribute> {
25196709823SKuter Dinel   using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAUniformWorkGroupSize__anon4805a6110111::AAUniformWorkGroupSize252f1217420SMatt Arsenault   AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
25396709823SKuter Dinel 
25496709823SKuter Dinel   /// Create an abstract attribute view for the position \p IRP.
255f1217420SMatt Arsenault   static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
25696709823SKuter Dinel                                                    Attributor &A);
25796709823SKuter Dinel 
25896709823SKuter Dinel   /// See AbstractAttribute::getName().
getName__anon4805a6110111::AAUniformWorkGroupSize259f1217420SMatt Arsenault   const std::string getName() const override {
260f1217420SMatt Arsenault     return "AAUniformWorkGroupSize";
261f1217420SMatt Arsenault   }
26296709823SKuter Dinel 
26396709823SKuter Dinel   /// See AbstractAttribute::getIdAddr().
getIdAddr__anon4805a6110111::AAUniformWorkGroupSize26496709823SKuter Dinel   const char *getIdAddr() const override { return &ID; }
26596709823SKuter Dinel 
26696709823SKuter Dinel   /// This function should return true if the type of the \p AA is
26796709823SKuter Dinel   /// AAAMDAttributes.
classof__anon4805a6110111::AAUniformWorkGroupSize26896709823SKuter Dinel   static bool classof(const AbstractAttribute *AA) {
26996709823SKuter Dinel     return (AA->getIdAddr() == &ID);
27096709823SKuter Dinel   }
27196709823SKuter Dinel 
27296709823SKuter Dinel   /// Unique ID (due to the unique address)
27396709823SKuter Dinel   static const char ID;
27496709823SKuter Dinel };
275f1217420SMatt Arsenault const char AAUniformWorkGroupSize::ID = 0;
27696709823SKuter Dinel 
277f1217420SMatt Arsenault struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
AAUniformWorkGroupSizeFunction__anon4805a6110111::AAUniformWorkGroupSizeFunction278f1217420SMatt Arsenault   AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
279f1217420SMatt Arsenault       : AAUniformWorkGroupSize(IRP, A) {}
28096709823SKuter Dinel 
initialize__anon4805a6110111::AAUniformWorkGroupSizeFunction28196709823SKuter Dinel   void initialize(Attributor &A) override {
28296709823SKuter Dinel     Function *F = getAssociatedFunction();
28396709823SKuter Dinel     CallingConv::ID CC = F->getCallingConv();
28496709823SKuter Dinel 
28596709823SKuter Dinel     if (CC != CallingConv::AMDGPU_KERNEL)
28696709823SKuter Dinel       return;
28796709823SKuter Dinel 
28896709823SKuter Dinel     bool InitialValue = false;
28996709823SKuter Dinel     if (F->hasFnAttribute("uniform-work-group-size"))
29096709823SKuter Dinel       InitialValue = F->getFnAttribute("uniform-work-group-size")
29196709823SKuter Dinel                          .getValueAsString()
29296709823SKuter Dinel                          .equals("true");
29396709823SKuter Dinel 
29496709823SKuter Dinel     if (InitialValue)
29596709823SKuter Dinel       indicateOptimisticFixpoint();
29696709823SKuter Dinel     else
29796709823SKuter Dinel       indicatePessimisticFixpoint();
29896709823SKuter Dinel   }
29996709823SKuter Dinel 
updateImpl__anon4805a6110111::AAUniformWorkGroupSizeFunction30096709823SKuter Dinel   ChangeStatus updateImpl(Attributor &A) override {
30196709823SKuter Dinel     ChangeStatus Change = ChangeStatus::UNCHANGED;
30296709823SKuter Dinel 
30396709823SKuter Dinel     auto CheckCallSite = [&](AbstractCallSite CS) {
30496709823SKuter Dinel       Function *Caller = CS.getInstruction()->getFunction();
305f1217420SMatt Arsenault       LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
306edb05d55SAlexander Belyaev                         << "->" << getAssociatedFunction()->getName() << "\n");
30796709823SKuter Dinel 
308f1217420SMatt Arsenault       const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
30996709823SKuter Dinel           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
31096709823SKuter Dinel 
31196709823SKuter Dinel       Change = Change | clampStateAndIndicateChange(this->getState(),
31296709823SKuter Dinel                                                     CallerInfo.getState());
31396709823SKuter Dinel 
31496709823SKuter Dinel       return true;
31596709823SKuter Dinel     };
31696709823SKuter Dinel 
31796709823SKuter Dinel     bool AllCallSitesKnown = true;
31896709823SKuter Dinel     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
3194132dc91SMatt Arsenault       return indicatePessimisticFixpoint();
32096709823SKuter Dinel 
32196709823SKuter Dinel     return Change;
32296709823SKuter Dinel   }
32396709823SKuter Dinel 
manifest__anon4805a6110111::AAUniformWorkGroupSizeFunction32496709823SKuter Dinel   ChangeStatus manifest(Attributor &A) override {
32596709823SKuter Dinel     SmallVector<Attribute, 8> AttrList;
32696709823SKuter Dinel     LLVMContext &Ctx = getAssociatedFunction()->getContext();
32796709823SKuter Dinel 
32896709823SKuter Dinel     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
32996709823SKuter Dinel                                       getAssumed() ? "true" : "false"));
33096709823SKuter Dinel     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
33196709823SKuter Dinel                                               /* ForceReplace */ true);
33296709823SKuter Dinel   }
33396709823SKuter Dinel 
isValidState__anon4805a6110111::AAUniformWorkGroupSizeFunction33496709823SKuter Dinel   bool isValidState() const override {
33596709823SKuter Dinel     // This state is always valid, even when the state is false.
33696709823SKuter Dinel     return true;
33796709823SKuter Dinel   }
33896709823SKuter Dinel 
getAsStr__anon4805a6110111::AAUniformWorkGroupSizeFunction33996709823SKuter Dinel   const std::string getAsStr() const override {
34096709823SKuter Dinel     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
34196709823SKuter Dinel   }
34296709823SKuter Dinel 
34396709823SKuter Dinel   /// See AbstractAttribute::trackStatistics()
trackStatistics__anon4805a6110111::AAUniformWorkGroupSizeFunction34496709823SKuter Dinel   void trackStatistics() const override {}
34596709823SKuter Dinel };
34696709823SKuter Dinel 
347f1217420SMatt Arsenault AAUniformWorkGroupSize &
createForPosition(const IRPosition & IRP,Attributor & A)348f1217420SMatt Arsenault AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
34996709823SKuter Dinel                                           Attributor &A) {
35096709823SKuter Dinel   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
351f1217420SMatt Arsenault     return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
352f1217420SMatt Arsenault   llvm_unreachable(
353f1217420SMatt Arsenault       "AAUniformWorkGroupSize is only valid for function position");
35496709823SKuter Dinel }
35596709823SKuter Dinel 
35696709823SKuter Dinel struct AAAMDAttributesFunction : public AAAMDAttributes {
AAAMDAttributesFunction__anon4805a6110111::AAAMDAttributesFunction35796709823SKuter Dinel   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
35896709823SKuter Dinel       : AAAMDAttributes(IRP, A) {}
35996709823SKuter Dinel 
initialize__anon4805a6110111::AAAMDAttributesFunction36096709823SKuter Dinel   void initialize(Attributor &A) override {
36196709823SKuter Dinel     Function *F = getAssociatedFunction();
3620eebe2e3SMatt Arsenault 
3630eebe2e3SMatt Arsenault     // If the function requires the implicit arg pointer due to sanitizers,
3640eebe2e3SMatt Arsenault     // assume it's needed even if explicitly marked as not requiring it.
365d8f99bb6SSameer Sahasrabuddhe     const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
366d8f99bb6SSameer Sahasrabuddhe     if (NeedsHostcall) {
3670eebe2e3SMatt Arsenault       removeAssumedBits(IMPLICIT_ARG_PTR);
368d8f99bb6SSameer Sahasrabuddhe       removeAssumedBits(HOSTCALL_PTR);
369d8f99bb6SSameer Sahasrabuddhe     }
3700eebe2e3SMatt Arsenault 
371088cc636SMatt Arsenault     for (auto Attr : ImplicitAttrs) {
372d8f99bb6SSameer Sahasrabuddhe       if (NeedsHostcall &&
373d8f99bb6SSameer Sahasrabuddhe           (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
3740eebe2e3SMatt Arsenault         continue;
3750eebe2e3SMatt Arsenault 
376088cc636SMatt Arsenault       if (F->hasFnAttribute(Attr.second))
377088cc636SMatt Arsenault         addKnownBits(Attr.first);
378088cc636SMatt Arsenault     }
379088cc636SMatt Arsenault 
380088cc636SMatt Arsenault     if (F->isDeclaration())
381088cc636SMatt Arsenault       return;
38296709823SKuter Dinel 
38396709823SKuter Dinel     // Ignore functions with graphics calling conventions, these are currently
38496709823SKuter Dinel     // not allowed to have kernel arguments.
38596709823SKuter Dinel     if (AMDGPU::isGraphics(F->getCallingConv())) {
38696709823SKuter Dinel       indicatePessimisticFixpoint();
38796709823SKuter Dinel       return;
38896709823SKuter Dinel     }
38996709823SKuter Dinel   }
39096709823SKuter Dinel 
updateImpl__anon4805a6110111::AAAMDAttributesFunction39196709823SKuter Dinel   ChangeStatus updateImpl(Attributor &A) override {
39296709823SKuter Dinel     Function *F = getAssociatedFunction();
393088cc636SMatt Arsenault     // The current assumed state used to determine a change.
394088cc636SMatt Arsenault     auto OrigAssumed = getAssumed();
39596709823SKuter Dinel 
39696709823SKuter Dinel     // Check for Intrinsics and propagate attributes.
39796709823SKuter Dinel     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
39896709823SKuter Dinel         *this, this->getIRPosition(), DepClassTy::REQUIRED);
399088cc636SMatt Arsenault     if (AAEdges.hasNonAsmUnknownCallee())
400088cc636SMatt Arsenault       return indicatePessimisticFixpoint();
40196709823SKuter Dinel 
402088cc636SMatt Arsenault     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
40396709823SKuter Dinel 
4040f20a35bSChangpeng Fang     bool NeedsImplicit = false;
4050f20a35bSChangpeng Fang     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
4060f20a35bSChangpeng Fang     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
4070f20a35bSChangpeng Fang     bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
408088cc636SMatt Arsenault 
40996709823SKuter Dinel     for (Function *Callee : AAEdges.getOptimisticEdges()) {
41096709823SKuter Dinel       Intrinsic::ID IID = Callee->getIntrinsicID();
411088cc636SMatt Arsenault       if (IID == Intrinsic::not_intrinsic) {
412088cc636SMatt Arsenault         const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
413088cc636SMatt Arsenault           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
414088cc636SMatt Arsenault         *this &= AAAMD;
41596709823SKuter Dinel         continue;
41696709823SKuter Dinel       }
41796709823SKuter Dinel 
418088cc636SMatt Arsenault       bool NonKernelOnly = false;
419088cc636SMatt Arsenault       ImplicitArgumentMask AttrMask =
4200f20a35bSChangpeng Fang           intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
4210f20a35bSChangpeng Fang                               HasApertureRegs, SupportsGetDoorbellID);
422088cc636SMatt Arsenault       if (AttrMask != NOT_IMPLICIT_INPUT) {
423088cc636SMatt Arsenault         if ((IsNonEntryFunc || !NonKernelOnly))
424088cc636SMatt Arsenault           removeAssumedBits(AttrMask);
425088cc636SMatt Arsenault       }
42696709823SKuter Dinel     }
42796709823SKuter Dinel 
4280f20a35bSChangpeng Fang     // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
4290f20a35bSChangpeng Fang     if (NeedsImplicit)
4300f20a35bSChangpeng Fang       removeAssumedBits(IMPLICIT_ARG_PTR);
431c6a6b579SSameer Sahasrabuddhe 
4320f20a35bSChangpeng Fang     if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
4330f20a35bSChangpeng Fang       // Under V5, we need implicitarg_ptr + offsets to access private_base or
4340f20a35bSChangpeng Fang       // shared_base. We do not actually need queue_ptr.
4350f20a35bSChangpeng Fang       if (AMDGPU::getAmdhsaCodeObjectVersion() == 5)
4360f20a35bSChangpeng Fang         removeAssumedBits(IMPLICIT_ARG_PTR);
4370f20a35bSChangpeng Fang       else
438088cc636SMatt Arsenault         removeAssumedBits(QUEUE_PTR);
43996709823SKuter Dinel     }
44096709823SKuter Dinel 
4418edaf259SChangpeng Fang     if (funcRetrievesMultigridSyncArg(A)) {
4428edaf259SChangpeng Fang       assert(!isAssumed(IMPLICIT_ARG_PTR) &&
4438edaf259SChangpeng Fang              "multigrid_sync_arg needs implicitarg_ptr");
4448edaf259SChangpeng Fang       removeAssumedBits(MULTIGRID_SYNC_ARG);
4458edaf259SChangpeng Fang     }
4468edaf259SChangpeng Fang 
447d8f99bb6SSameer Sahasrabuddhe     if (funcRetrievesHostcallPtr(A)) {
448ca62b1dbSChangpeng Fang       assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
449d8f99bb6SSameer Sahasrabuddhe       removeAssumedBits(HOSTCALL_PTR);
450d8f99bb6SSameer Sahasrabuddhe     }
451d8f99bb6SSameer Sahasrabuddhe 
452ca62b1dbSChangpeng Fang     if (funcRetrievesHeapPtr(A)) {
453ca62b1dbSChangpeng Fang       assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
454ca62b1dbSChangpeng Fang       removeAssumedBits(HEAP_PTR);
455ca62b1dbSChangpeng Fang     }
456ca62b1dbSChangpeng Fang 
4570f20a35bSChangpeng Fang     if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
4580f20a35bSChangpeng Fang       assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
4590f20a35bSChangpeng Fang       removeAssumedBits(QUEUE_PTR);
4600f20a35bSChangpeng Fang     }
4610f20a35bSChangpeng Fang 
4623a205977SJon Chesterfield     if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
4633a205977SJon Chesterfield       removeAssumedBits(LDS_KERNEL_ID);
4643a205977SJon Chesterfield     }
4653a205977SJon Chesterfield 
46602a2e46fSSameer Sahasrabuddhe     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
46702a2e46fSSameer Sahasrabuddhe                                        : ChangeStatus::UNCHANGED;
46896709823SKuter Dinel   }
46996709823SKuter Dinel 
manifest__anon4805a6110111::AAAMDAttributesFunction47096709823SKuter Dinel   ChangeStatus manifest(Attributor &A) override {
47196709823SKuter Dinel     SmallVector<Attribute, 8> AttrList;
47296709823SKuter Dinel     LLVMContext &Ctx = getAssociatedFunction()->getContext();
47396709823SKuter Dinel 
474088cc636SMatt Arsenault     for (auto Attr : ImplicitAttrs) {
475088cc636SMatt Arsenault       if (isKnown(Attr.first))
476088cc636SMatt Arsenault         AttrList.push_back(Attribute::get(Ctx, Attr.second));
477088cc636SMatt Arsenault     }
47896709823SKuter Dinel 
47996709823SKuter Dinel     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
48096709823SKuter Dinel                                               /* ForceReplace */ true);
48196709823SKuter Dinel   }
48296709823SKuter Dinel 
getAsStr__anon4805a6110111::AAAMDAttributesFunction48396709823SKuter Dinel   const std::string getAsStr() const override {
484088cc636SMatt Arsenault     std::string Str;
485088cc636SMatt Arsenault     raw_string_ostream OS(Str);
486088cc636SMatt Arsenault     OS << "AMDInfo[";
487088cc636SMatt Arsenault     for (auto Attr : ImplicitAttrs)
488088cc636SMatt Arsenault       OS << ' ' << Attr.second;
489088cc636SMatt Arsenault     OS << " ]";
490088cc636SMatt Arsenault     return OS.str();
49196709823SKuter Dinel   }
49296709823SKuter Dinel 
49396709823SKuter Dinel   /// See AbstractAttribute::trackStatistics()
trackStatistics__anon4805a6110111::AAAMDAttributesFunction49496709823SKuter Dinel   void trackStatistics() const override {}
49502a2e46fSSameer Sahasrabuddhe 
49602a2e46fSSameer Sahasrabuddhe private:
checkForQueuePtr__anon4805a6110111::AAAMDAttributesFunction49702a2e46fSSameer Sahasrabuddhe   bool checkForQueuePtr(Attributor &A) {
49802a2e46fSSameer Sahasrabuddhe     Function *F = getAssociatedFunction();
49902a2e46fSSameer Sahasrabuddhe     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
50002a2e46fSSameer Sahasrabuddhe 
50102a2e46fSSameer Sahasrabuddhe     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
50202a2e46fSSameer Sahasrabuddhe 
50302a2e46fSSameer Sahasrabuddhe     bool NeedsQueuePtr = false;
50402a2e46fSSameer Sahasrabuddhe 
50502a2e46fSSameer Sahasrabuddhe     auto CheckAddrSpaceCasts = [&](Instruction &I) {
50602a2e46fSSameer Sahasrabuddhe       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
50702a2e46fSSameer Sahasrabuddhe       if (castRequiresQueuePtr(SrcAS)) {
50802a2e46fSSameer Sahasrabuddhe         NeedsQueuePtr = true;
50902a2e46fSSameer Sahasrabuddhe         return false;
51002a2e46fSSameer Sahasrabuddhe       }
51102a2e46fSSameer Sahasrabuddhe       return true;
51202a2e46fSSameer Sahasrabuddhe     };
51302a2e46fSSameer Sahasrabuddhe 
51402a2e46fSSameer Sahasrabuddhe     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
51502a2e46fSSameer Sahasrabuddhe 
51602a2e46fSSameer Sahasrabuddhe     // `checkForAllInstructions` is much more cheaper than going through all
51702a2e46fSSameer Sahasrabuddhe     // instructions, try it first.
51802a2e46fSSameer Sahasrabuddhe 
51902a2e46fSSameer Sahasrabuddhe     // The queue pointer is not needed if aperture regs is present.
52002a2e46fSSameer Sahasrabuddhe     if (!HasApertureRegs) {
52102a2e46fSSameer Sahasrabuddhe       bool UsedAssumedInformation = false;
52202a2e46fSSameer Sahasrabuddhe       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
52302a2e46fSSameer Sahasrabuddhe                                 {Instruction::AddrSpaceCast},
52402a2e46fSSameer Sahasrabuddhe                                 UsedAssumedInformation);
52502a2e46fSSameer Sahasrabuddhe     }
52602a2e46fSSameer Sahasrabuddhe 
52702a2e46fSSameer Sahasrabuddhe     // If we found  that we need the queue pointer, nothing else to do.
52802a2e46fSSameer Sahasrabuddhe     if (NeedsQueuePtr)
52902a2e46fSSameer Sahasrabuddhe       return true;
53002a2e46fSSameer Sahasrabuddhe 
53102a2e46fSSameer Sahasrabuddhe     if (!IsNonEntryFunc && HasApertureRegs)
53202a2e46fSSameer Sahasrabuddhe       return false;
53302a2e46fSSameer Sahasrabuddhe 
53402a2e46fSSameer Sahasrabuddhe     for (BasicBlock &BB : *F) {
53502a2e46fSSameer Sahasrabuddhe       for (Instruction &I : BB) {
53602a2e46fSSameer Sahasrabuddhe         for (const Use &U : I.operands()) {
53702a2e46fSSameer Sahasrabuddhe           if (const auto *C = dyn_cast<Constant>(U)) {
53802a2e46fSSameer Sahasrabuddhe             if (InfoCache.needsQueuePtr(C, *F))
53902a2e46fSSameer Sahasrabuddhe               return true;
54002a2e46fSSameer Sahasrabuddhe           }
54102a2e46fSSameer Sahasrabuddhe         }
54202a2e46fSSameer Sahasrabuddhe       }
54302a2e46fSSameer Sahasrabuddhe     }
54402a2e46fSSameer Sahasrabuddhe 
54502a2e46fSSameer Sahasrabuddhe     return false;
54602a2e46fSSameer Sahasrabuddhe   }
547d8f99bb6SSameer Sahasrabuddhe 
funcRetrievesMultigridSyncArg__anon4805a6110111::AAAMDAttributesFunction5488edaf259SChangpeng Fang   bool funcRetrievesMultigridSyncArg(Attributor &A) {
5498edaf259SChangpeng Fang     auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition();
5508edaf259SChangpeng Fang     AAPointerInfo::OffsetAndSize OAS(Pos, 8);
5518edaf259SChangpeng Fang     return funcRetrievesImplicitKernelArg(A, OAS);
5528edaf259SChangpeng Fang   }
5538edaf259SChangpeng Fang 
funcRetrievesHostcallPtr__anon4805a6110111::AAAMDAttributesFunction554d8f99bb6SSameer Sahasrabuddhe   bool funcRetrievesHostcallPtr(Attributor &A) {
555d8f99bb6SSameer Sahasrabuddhe     auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
556ca62b1dbSChangpeng Fang     AAPointerInfo::OffsetAndSize OAS(Pos, 8);
557ca62b1dbSChangpeng Fang     return funcRetrievesImplicitKernelArg(A, OAS);
558ca62b1dbSChangpeng Fang   }
559d8f99bb6SSameer Sahasrabuddhe 
funcRetrievesHeapPtr__anon4805a6110111::AAAMDAttributesFunction560ca62b1dbSChangpeng Fang   bool funcRetrievesHeapPtr(Attributor &A) {
561ca62b1dbSChangpeng Fang     if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
562ca62b1dbSChangpeng Fang       return false;
563dd5895ccSChangpeng Fang     AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
564ca62b1dbSChangpeng Fang     return funcRetrievesImplicitKernelArg(A, OAS);
565ca62b1dbSChangpeng Fang   }
566ca62b1dbSChangpeng Fang 
funcRetrievesQueuePtr__anon4805a6110111::AAAMDAttributesFunction5670f20a35bSChangpeng Fang   bool funcRetrievesQueuePtr(Attributor &A) {
5680f20a35bSChangpeng Fang     if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
5690f20a35bSChangpeng Fang       return false;
570dd5895ccSChangpeng Fang     AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
5710f20a35bSChangpeng Fang     return funcRetrievesImplicitKernelArg(A, OAS);
5720f20a35bSChangpeng Fang   }
5730f20a35bSChangpeng Fang 
funcRetrievesImplicitKernelArg__anon4805a6110111::AAAMDAttributesFunction574ca62b1dbSChangpeng Fang   bool funcRetrievesImplicitKernelArg(Attributor &A,
575ca62b1dbSChangpeng Fang                                       AAPointerInfo::OffsetAndSize OAS) {
576d8f99bb6SSameer Sahasrabuddhe     // Check if this is a call to the implicitarg_ptr builtin and it
577d8f99bb6SSameer Sahasrabuddhe     // is used to retrieve the hostcall pointer. The implicit arg for
578d8f99bb6SSameer Sahasrabuddhe     // hostcall is not used only if every use of the implicitarg_ptr
579d8f99bb6SSameer Sahasrabuddhe     // is a load that clearly does not retrieve any byte of the
580d8f99bb6SSameer Sahasrabuddhe     // hostcall pointer. We check this by tracing all the uses of the
581d8f99bb6SSameer Sahasrabuddhe     // initial call to the implicitarg_ptr intrinsic.
582ca62b1dbSChangpeng Fang     auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
583d8f99bb6SSameer Sahasrabuddhe       auto &Call = cast<CallBase>(I);
584d8f99bb6SSameer Sahasrabuddhe       if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
585d8f99bb6SSameer Sahasrabuddhe         return true;
586d8f99bb6SSameer Sahasrabuddhe 
587d8f99bb6SSameer Sahasrabuddhe       const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
588d8f99bb6SSameer Sahasrabuddhe           *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
589d8f99bb6SSameer Sahasrabuddhe 
590d8f99bb6SSameer Sahasrabuddhe       return PointerInfoAA.forallInterferingAccesses(
591d8f99bb6SSameer Sahasrabuddhe           OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
592d8f99bb6SSameer Sahasrabuddhe             return Acc.getRemoteInst()->isDroppable();
593d8f99bb6SSameer Sahasrabuddhe           });
594d8f99bb6SSameer Sahasrabuddhe     };
595d8f99bb6SSameer Sahasrabuddhe 
596d8f99bb6SSameer Sahasrabuddhe     bool UsedAssumedInformation = false;
597ca62b1dbSChangpeng Fang     return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
598d8f99bb6SSameer Sahasrabuddhe                                               UsedAssumedInformation);
599d8f99bb6SSameer Sahasrabuddhe   }
6003a205977SJon Chesterfield 
funcRetrievesLDSKernelId__anon4805a6110111::AAAMDAttributesFunction6013a205977SJon Chesterfield   bool funcRetrievesLDSKernelId(Attributor &A) {
6023a205977SJon Chesterfield     auto DoesNotRetrieve = [&](Instruction &I) {
6033a205977SJon Chesterfield       auto &Call = cast<CallBase>(I);
6043a205977SJon Chesterfield       return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
6053a205977SJon Chesterfield     };
6063a205977SJon Chesterfield     bool UsedAssumedInformation = false;
6073a205977SJon Chesterfield     return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
6083a205977SJon Chesterfield                                               UsedAssumedInformation);
6093a205977SJon Chesterfield   }
61096709823SKuter Dinel };
61196709823SKuter Dinel 
createForPosition(const IRPosition & IRP,Attributor & A)61296709823SKuter Dinel AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
61396709823SKuter Dinel                                                     Attributor &A) {
61496709823SKuter Dinel   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
61596709823SKuter Dinel     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
61696709823SKuter Dinel   llvm_unreachable("AAAMDAttributes is only valid for function position");
61796709823SKuter Dinel }
61896709823SKuter Dinel 
619ec57b375SMatt Arsenault /// Propagate amdgpu-flat-work-group-size attribute.
620ec57b375SMatt Arsenault struct AAAMDFlatWorkGroupSize
621ec57b375SMatt Arsenault     : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
622ec57b375SMatt Arsenault   using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
AAAMDFlatWorkGroupSize__anon4805a6110111::AAAMDFlatWorkGroupSize623ec57b375SMatt Arsenault   AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
624ec57b375SMatt Arsenault       : Base(IRP, 32) {}
625ec57b375SMatt Arsenault 
626ec57b375SMatt Arsenault   /// See AbstractAttribute::getState(...).
getState__anon4805a6110111::AAAMDFlatWorkGroupSize627ec57b375SMatt Arsenault   IntegerRangeState &getState() override { return *this; }
getState__anon4805a6110111::AAAMDFlatWorkGroupSize628ec57b375SMatt Arsenault   const IntegerRangeState &getState() const override { return *this; }
629ec57b375SMatt Arsenault 
initialize__anon4805a6110111::AAAMDFlatWorkGroupSize630ec57b375SMatt Arsenault   void initialize(Attributor &A) override {
631ec57b375SMatt Arsenault     Function *F = getAssociatedFunction();
632ec57b375SMatt Arsenault     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
633ec57b375SMatt Arsenault     unsigned MinGroupSize, MaxGroupSize;
634ec57b375SMatt Arsenault     std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
635ec57b375SMatt Arsenault     intersectKnown(
636ec57b375SMatt Arsenault         ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
6376bcf1f91SMatt Arsenault 
6386bcf1f91SMatt Arsenault     if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
6396bcf1f91SMatt Arsenault       indicatePessimisticFixpoint();
640ec57b375SMatt Arsenault   }
641ec57b375SMatt Arsenault 
updateImpl__anon4805a6110111::AAAMDFlatWorkGroupSize642ec57b375SMatt Arsenault   ChangeStatus updateImpl(Attributor &A) override {
643ec57b375SMatt Arsenault     ChangeStatus Change = ChangeStatus::UNCHANGED;
644ec57b375SMatt Arsenault 
645ec57b375SMatt Arsenault     auto CheckCallSite = [&](AbstractCallSite CS) {
646ec57b375SMatt Arsenault       Function *Caller = CS.getInstruction()->getFunction();
647ec57b375SMatt Arsenault       LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
648ec57b375SMatt Arsenault                         << "->" << getAssociatedFunction()->getName() << '\n');
649ec57b375SMatt Arsenault 
650ec57b375SMatt Arsenault       const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
651ec57b375SMatt Arsenault           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
652ec57b375SMatt Arsenault 
653ec57b375SMatt Arsenault       Change |=
654ec57b375SMatt Arsenault           clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
655ec57b375SMatt Arsenault 
656ec57b375SMatt Arsenault       return true;
657ec57b375SMatt Arsenault     };
658ec57b375SMatt Arsenault 
659ec57b375SMatt Arsenault     bool AllCallSitesKnown = true;
660ec57b375SMatt Arsenault     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
661ec57b375SMatt Arsenault       return indicatePessimisticFixpoint();
662ec57b375SMatt Arsenault 
663ec57b375SMatt Arsenault     return Change;
664ec57b375SMatt Arsenault   }
665ec57b375SMatt Arsenault 
manifest__anon4805a6110111::AAAMDFlatWorkGroupSize666ec57b375SMatt Arsenault   ChangeStatus manifest(Attributor &A) override {
667ec57b375SMatt Arsenault     SmallVector<Attribute, 8> AttrList;
668ec57b375SMatt Arsenault     Function *F = getAssociatedFunction();
669ec57b375SMatt Arsenault     LLVMContext &Ctx = F->getContext();
670ec57b375SMatt Arsenault 
671ec57b375SMatt Arsenault     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
672ec57b375SMatt Arsenault     unsigned Min, Max;
673ec57b375SMatt Arsenault     std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
674ec57b375SMatt Arsenault 
675ec57b375SMatt Arsenault     // Don't add the attribute if it's the implied default.
676ec57b375SMatt Arsenault     if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
677ec57b375SMatt Arsenault       return ChangeStatus::UNCHANGED;
678ec57b375SMatt Arsenault 
679ec57b375SMatt Arsenault     SmallString<10> Buffer;
680ec57b375SMatt Arsenault     raw_svector_ostream OS(Buffer);
681ec57b375SMatt Arsenault     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
682ec57b375SMatt Arsenault 
683ec57b375SMatt Arsenault     AttrList.push_back(
684ec57b375SMatt Arsenault         Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
685ec57b375SMatt Arsenault     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
686ec57b375SMatt Arsenault                                               /* ForceReplace */ true);
687ec57b375SMatt Arsenault   }
688ec57b375SMatt Arsenault 
getAsStr__anon4805a6110111::AAAMDFlatWorkGroupSize689ec57b375SMatt Arsenault   const std::string getAsStr() const override {
690ec57b375SMatt Arsenault     std::string Str;
691ec57b375SMatt Arsenault     raw_string_ostream OS(Str);
692ec57b375SMatt Arsenault     OS << "AMDFlatWorkGroupSize[";
693ec57b375SMatt Arsenault     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
694ec57b375SMatt Arsenault     OS << ']';
695ec57b375SMatt Arsenault     return OS.str();
696ec57b375SMatt Arsenault   }
697ec57b375SMatt Arsenault 
698ec57b375SMatt Arsenault   /// See AbstractAttribute::trackStatistics()
trackStatistics__anon4805a6110111::AAAMDFlatWorkGroupSize699ec57b375SMatt Arsenault   void trackStatistics() const override {}
700ec57b375SMatt Arsenault 
701ec57b375SMatt Arsenault   /// Create an abstract attribute view for the position \p IRP.
702ec57b375SMatt Arsenault   static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
703ec57b375SMatt Arsenault                                                    Attributor &A);
704ec57b375SMatt Arsenault 
705ec57b375SMatt Arsenault   /// See AbstractAttribute::getName()
getName__anon4805a6110111::AAAMDFlatWorkGroupSize706ec57b375SMatt Arsenault   const std::string getName() const override {
707ec57b375SMatt Arsenault     return "AAAMDFlatWorkGroupSize";
708ec57b375SMatt Arsenault   }
709ec57b375SMatt Arsenault 
710ec57b375SMatt Arsenault   /// See AbstractAttribute::getIdAddr()
getIdAddr__anon4805a6110111::AAAMDFlatWorkGroupSize711ec57b375SMatt Arsenault   const char *getIdAddr() const override { return &ID; }
712ec57b375SMatt Arsenault 
713ec57b375SMatt Arsenault   /// This function should return true if the type of the \p AA is
714ec57b375SMatt Arsenault   /// AAAMDFlatWorkGroupSize
classof__anon4805a6110111::AAAMDFlatWorkGroupSize715ec57b375SMatt Arsenault   static bool classof(const AbstractAttribute *AA) {
716ec57b375SMatt Arsenault     return (AA->getIdAddr() == &ID);
717ec57b375SMatt Arsenault   }
718ec57b375SMatt Arsenault 
719ec57b375SMatt Arsenault   /// Unique ID (due to the unique address)
720ec57b375SMatt Arsenault   static const char ID;
721ec57b375SMatt Arsenault };
722ec57b375SMatt Arsenault 
723ec57b375SMatt Arsenault const char AAAMDFlatWorkGroupSize::ID = 0;
724ec57b375SMatt Arsenault 
725ec57b375SMatt Arsenault AAAMDFlatWorkGroupSize &
createForPosition(const IRPosition & IRP,Attributor & A)726ec57b375SMatt Arsenault AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
727ec57b375SMatt Arsenault                                           Attributor &A) {
728ec57b375SMatt Arsenault   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
729ec57b375SMatt Arsenault     return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
730ec57b375SMatt Arsenault   llvm_unreachable(
731ec57b375SMatt Arsenault       "AAAMDFlatWorkGroupSize is only valid for function position");
732ec57b375SMatt Arsenault }
733ec57b375SMatt Arsenault 
73496709823SKuter Dinel class AMDGPUAttributor : public ModulePass {
73596709823SKuter Dinel public:
AMDGPUAttributor()73696709823SKuter Dinel   AMDGPUAttributor() : ModulePass(ID) {}
73796709823SKuter Dinel 
73896709823SKuter Dinel   /// doInitialization - Virtual method overridden by subclasses to do
73996709823SKuter Dinel   /// any necessary initialization before any pass is run.
doInitialization(Module &)74096709823SKuter Dinel   bool doInitialization(Module &) override {
74196709823SKuter Dinel     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
74296709823SKuter Dinel     if (!TPC)
74396709823SKuter Dinel       report_fatal_error("TargetMachine is required");
74496709823SKuter Dinel 
74596709823SKuter Dinel     TM = &TPC->getTM<TargetMachine>();
74696709823SKuter Dinel     return false;
74796709823SKuter Dinel   }
74896709823SKuter Dinel 
runOnModule(Module & M)74996709823SKuter Dinel   bool runOnModule(Module &M) override {
75096709823SKuter Dinel     SetVector<Function *> Functions;
75196709823SKuter Dinel     AnalysisGetter AG;
752a77ae4aaSMatt Arsenault     for (Function &F : M) {
753a77ae4aaSMatt Arsenault       if (!F.isIntrinsic())
75496709823SKuter Dinel         Functions.insert(&F);
755a77ae4aaSMatt Arsenault     }
75696709823SKuter Dinel 
75796709823SKuter Dinel     CallGraphUpdater CGUpdater;
75896709823SKuter Dinel     BumpPtrAllocator Allocator;
75996709823SKuter Dinel     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
76046d82e73SMatt Arsenault     DenseSet<const char *> Allowed(
761ec57b375SMatt Arsenault         {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
762*bf789b19SJohannes Doerfert          &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID,
763*bf789b19SJohannes Doerfert          &AAPointerInfo::ID});
76446d82e73SMatt Arsenault 
7653be3b401SJohannes Doerfert     AttributorConfig AC(CGUpdater);
7663be3b401SJohannes Doerfert     AC.Allowed = &Allowed;
7673be3b401SJohannes Doerfert     AC.IsModulePass = true;
7683be3b401SJohannes Doerfert     AC.DefaultInitializeLiveInternals = false;
7693be3b401SJohannes Doerfert 
7703be3b401SJohannes Doerfert     Attributor A(Functions, InfoCache, AC);
77196709823SKuter Dinel 
77296709823SKuter Dinel     for (Function &F : M) {
773a77ae4aaSMatt Arsenault       if (!F.isIntrinsic()) {
77496709823SKuter Dinel         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
775f1217420SMatt Arsenault         A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
776ec57b375SMatt Arsenault         if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
777ec57b375SMatt Arsenault           A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
778ec57b375SMatt Arsenault         }
77996709823SKuter Dinel       }
780a77ae4aaSMatt Arsenault     }
78196709823SKuter Dinel 
78296709823SKuter Dinel     ChangeStatus Change = A.run();
78396709823SKuter Dinel     return Change == ChangeStatus::CHANGED;
78496709823SKuter Dinel   }
78596709823SKuter Dinel 
getPassName() const78696709823SKuter Dinel   StringRef getPassName() const override { return "AMDGPU Attributor"; }
78796709823SKuter Dinel   TargetMachine *TM;
78896709823SKuter Dinel   static char ID;
78996709823SKuter Dinel };
7909b8b1645SBenjamin Kramer } // namespace
79196709823SKuter Dinel 
79296709823SKuter Dinel char AMDGPUAttributor::ID = 0;
79396709823SKuter Dinel 
createAMDGPUAttributorPass()79496709823SKuter Dinel Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
79596709823SKuter Dinel INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
796