196709823SKuter Dinel //===- AMDGPUAttributor.cpp -----------------------------------------------===//
296709823SKuter Dinel //
396709823SKuter Dinel // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
496709823SKuter Dinel // See https://llvm.org/LICENSE.txt for license information.
596709823SKuter Dinel // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
696709823SKuter Dinel //
796709823SKuter Dinel //===----------------------------------------------------------------------===//
896709823SKuter Dinel //
996709823SKuter Dinel /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
1096709823SKuter Dinel //
1196709823SKuter Dinel //===----------------------------------------------------------------------===//
1296709823SKuter Dinel
1396709823SKuter Dinel #include "AMDGPU.h"
1496709823SKuter Dinel #include "GCNSubtarget.h"
15d8f99bb6SSameer Sahasrabuddhe #include "Utils/AMDGPUBaseInfo.h"
1696709823SKuter Dinel #include "llvm/CodeGen/TargetPassConfig.h"
1796709823SKuter Dinel #include "llvm/IR/IntrinsicsAMDGPU.h"
1896709823SKuter Dinel #include "llvm/IR/IntrinsicsR600.h"
1996709823SKuter Dinel #include "llvm/Target/TargetMachine.h"
2096709823SKuter Dinel #include "llvm/Transforms/IPO/Attributor.h"
2196709823SKuter Dinel
2296709823SKuter Dinel #define DEBUG_TYPE "amdgpu-attributor"
2396709823SKuter Dinel
2496709823SKuter Dinel using namespace llvm;
2596709823SKuter Dinel
2602a2e46fSSameer Sahasrabuddhe #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
27088cc636SMatt Arsenault
2802a2e46fSSameer Sahasrabuddhe enum ImplicitArgumentPositions {
2902a2e46fSSameer Sahasrabuddhe #include "AMDGPUAttributes.def"
3002a2e46fSSameer Sahasrabuddhe LAST_ARG_POS
31088cc636SMatt Arsenault };
32088cc636SMatt Arsenault
3302a2e46fSSameer Sahasrabuddhe #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
3402a2e46fSSameer Sahasrabuddhe
3502a2e46fSSameer Sahasrabuddhe enum ImplicitArgumentMask {
3602a2e46fSSameer Sahasrabuddhe NOT_IMPLICIT_INPUT = 0,
3702a2e46fSSameer Sahasrabuddhe #include "AMDGPUAttributes.def"
3802a2e46fSSameer Sahasrabuddhe ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
3902a2e46fSSameer Sahasrabuddhe };
4002a2e46fSSameer Sahasrabuddhe
4102a2e46fSSameer Sahasrabuddhe #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
42088cc636SMatt Arsenault static constexpr std::pair<ImplicitArgumentMask,
43088cc636SMatt Arsenault StringLiteral> ImplicitAttrs[] = {
4402a2e46fSSameer Sahasrabuddhe #include "AMDGPUAttributes.def"
45088cc636SMatt Arsenault };
4696709823SKuter Dinel
4796709823SKuter Dinel // We do not need to note the x workitem or workgroup id because they are always
4896709823SKuter Dinel // initialized.
4996709823SKuter Dinel //
5096709823SKuter Dinel // TODO: We should not add the attributes if the known compile time workgroup
5196709823SKuter Dinel // size is 1 for y/z.
52088cc636SMatt Arsenault static ImplicitArgumentMask
intrinsicToAttrMask(Intrinsic::ID ID,bool & NonKernelOnly,bool & NeedsImplicit,bool HasApertureRegs,bool SupportsGetDoorBellID)530f20a35bSChangpeng Fang intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
540f20a35bSChangpeng Fang bool HasApertureRegs, bool SupportsGetDoorBellID) {
550f20a35bSChangpeng Fang unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion();
5696709823SKuter Dinel switch (ID) {
5796709823SKuter Dinel case Intrinsic::amdgcn_workitem_id_x:
5896709823SKuter Dinel NonKernelOnly = true;
59088cc636SMatt Arsenault return WORKITEM_ID_X;
6096709823SKuter Dinel case Intrinsic::amdgcn_workgroup_id_x:
6196709823SKuter Dinel NonKernelOnly = true;
62088cc636SMatt Arsenault return WORKGROUP_ID_X;
6396709823SKuter Dinel case Intrinsic::amdgcn_workitem_id_y:
6496709823SKuter Dinel case Intrinsic::r600_read_tidig_y:
65088cc636SMatt Arsenault return WORKITEM_ID_Y;
6696709823SKuter Dinel case Intrinsic::amdgcn_workitem_id_z:
6796709823SKuter Dinel case Intrinsic::r600_read_tidig_z:
68088cc636SMatt Arsenault return WORKITEM_ID_Z;
6996709823SKuter Dinel case Intrinsic::amdgcn_workgroup_id_y:
7096709823SKuter Dinel case Intrinsic::r600_read_tgid_y:
71088cc636SMatt Arsenault return WORKGROUP_ID_Y;
7296709823SKuter Dinel case Intrinsic::amdgcn_workgroup_id_z:
7396709823SKuter Dinel case Intrinsic::r600_read_tgid_z:
74088cc636SMatt Arsenault return WORKGROUP_ID_Z;
753a205977SJon Chesterfield case Intrinsic::amdgcn_lds_kernel_id:
763a205977SJon Chesterfield return LDS_KERNEL_ID;
7796709823SKuter Dinel case Intrinsic::amdgcn_dispatch_ptr:
78088cc636SMatt Arsenault return DISPATCH_PTR;
7996709823SKuter Dinel case Intrinsic::amdgcn_dispatch_id:
80088cc636SMatt Arsenault return DISPATCH_ID;
8196709823SKuter Dinel case Intrinsic::amdgcn_implicitarg_ptr:
82088cc636SMatt Arsenault return IMPLICIT_ARG_PTR;
830f20a35bSChangpeng Fang // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
840f20a35bSChangpeng Fang // queue_ptr.
8596709823SKuter Dinel case Intrinsic::amdgcn_queue_ptr:
860f20a35bSChangpeng Fang NeedsImplicit = (CodeObjectVersion == 5);
870f20a35bSChangpeng Fang return QUEUE_PTR;
8896709823SKuter Dinel case Intrinsic::amdgcn_is_shared:
8996709823SKuter Dinel case Intrinsic::amdgcn_is_private:
900f20a35bSChangpeng Fang if (HasApertureRegs)
910f20a35bSChangpeng Fang return NOT_IMPLICIT_INPUT;
920f20a35bSChangpeng Fang // Under V5, we need implicitarg_ptr + offsets to access private_base or
930f20a35bSChangpeng Fang // shared_base. For pre-V5, however, need to access them through queue_ptr +
940f20a35bSChangpeng Fang // offsets.
950f20a35bSChangpeng Fang return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
9696709823SKuter Dinel case Intrinsic::trap:
970f20a35bSChangpeng Fang if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
980f20a35bSChangpeng Fang return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR;
990f20a35bSChangpeng Fang NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5.
100088cc636SMatt Arsenault return QUEUE_PTR;
10196709823SKuter Dinel default:
102088cc636SMatt Arsenault return NOT_IMPLICIT_INPUT;
10396709823SKuter Dinel }
10496709823SKuter Dinel }
10596709823SKuter Dinel
castRequiresQueuePtr(unsigned SrcAS)10696709823SKuter Dinel static bool castRequiresQueuePtr(unsigned SrcAS) {
10796709823SKuter Dinel return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
10896709823SKuter Dinel }
10996709823SKuter Dinel
isDSAddress(const Constant * C)11096709823SKuter Dinel static bool isDSAddress(const Constant *C) {
11196709823SKuter Dinel const GlobalValue *GV = dyn_cast<GlobalValue>(C);
11296709823SKuter Dinel if (!GV)
11396709823SKuter Dinel return false;
11496709823SKuter Dinel unsigned AS = GV->getAddressSpace();
11596709823SKuter Dinel return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
11696709823SKuter Dinel }
11796709823SKuter Dinel
1180eebe2e3SMatt Arsenault /// Returns true if the function requires the implicit argument be passed
1190eebe2e3SMatt Arsenault /// regardless of the function contents.
funcRequiresHostcallPtr(const Function & F)120d8f99bb6SSameer Sahasrabuddhe static bool funcRequiresHostcallPtr(const Function &F) {
1210eebe2e3SMatt Arsenault // Sanitizers require the hostcall buffer passed in the implicit arguments.
1220eebe2e3SMatt Arsenault return F.hasFnAttribute(Attribute::SanitizeAddress) ||
1230eebe2e3SMatt Arsenault F.hasFnAttribute(Attribute::SanitizeThread) ||
1240eebe2e3SMatt Arsenault F.hasFnAttribute(Attribute::SanitizeMemory) ||
1250eebe2e3SMatt Arsenault F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1260eebe2e3SMatt Arsenault F.hasFnAttribute(Attribute::SanitizeMemTag);
1270eebe2e3SMatt Arsenault }
1280eebe2e3SMatt Arsenault
1299b8b1645SBenjamin Kramer namespace {
13096709823SKuter Dinel class AMDGPUInformationCache : public InformationCache {
13196709823SKuter Dinel public:
AMDGPUInformationCache(const Module & M,AnalysisGetter & AG,BumpPtrAllocator & Allocator,SetVector<Function * > * CGSCC,TargetMachine & TM)13296709823SKuter Dinel AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
13396709823SKuter Dinel BumpPtrAllocator &Allocator,
13496709823SKuter Dinel SetVector<Function *> *CGSCC, TargetMachine &TM)
13596709823SKuter Dinel : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
13696709823SKuter Dinel TargetMachine &TM;
13796709823SKuter Dinel
13896709823SKuter Dinel enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
13996709823SKuter Dinel
14096709823SKuter Dinel /// Check if the subtarget has aperture regs.
hasApertureRegs(Function & F)14196709823SKuter Dinel bool hasApertureRegs(Function &F) {
14296709823SKuter Dinel const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
14396709823SKuter Dinel return ST.hasApertureRegs();
14496709823SKuter Dinel }
14596709823SKuter Dinel
1460f20a35bSChangpeng Fang /// Check if the subtarget supports GetDoorbellID.
supportsGetDoorbellID(Function & F)1470f20a35bSChangpeng Fang bool supportsGetDoorbellID(Function &F) {
1480f20a35bSChangpeng Fang const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
1490f20a35bSChangpeng Fang return ST.supportsGetDoorbellID();
1500f20a35bSChangpeng Fang }
1510f20a35bSChangpeng Fang
getFlatWorkGroupSizes(const Function & F)152ec57b375SMatt Arsenault std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
153ec57b375SMatt Arsenault const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
154ec57b375SMatt Arsenault return ST.getFlatWorkGroupSizes(F);
155ec57b375SMatt Arsenault }
156ec57b375SMatt Arsenault
157ec57b375SMatt Arsenault std::pair<unsigned, unsigned>
getMaximumFlatWorkGroupRange(const Function & F)158ec57b375SMatt Arsenault getMaximumFlatWorkGroupRange(const Function &F) {
159ec57b375SMatt Arsenault const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
160ec57b375SMatt Arsenault return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
161ec57b375SMatt Arsenault }
162ec57b375SMatt Arsenault
16396709823SKuter Dinel private:
16402a2e46fSSameer Sahasrabuddhe /// Check if the ConstantExpr \p CE requires the queue pointer.
visitConstExpr(const ConstantExpr * CE)16596709823SKuter Dinel static bool visitConstExpr(const ConstantExpr *CE) {
16696709823SKuter Dinel if (CE->getOpcode() == Instruction::AddrSpaceCast) {
16796709823SKuter Dinel unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
16896709823SKuter Dinel return castRequiresQueuePtr(SrcAS);
16996709823SKuter Dinel }
17096709823SKuter Dinel return false;
17196709823SKuter Dinel }
17296709823SKuter Dinel
17396709823SKuter Dinel /// Get the constant access bitmap for \p C.
getConstantAccess(const Constant * C)17496709823SKuter Dinel uint8_t getConstantAccess(const Constant *C) {
17596709823SKuter Dinel auto It = ConstantStatus.find(C);
17696709823SKuter Dinel if (It != ConstantStatus.end())
17796709823SKuter Dinel return It->second;
17896709823SKuter Dinel
17996709823SKuter Dinel uint8_t Result = 0;
18096709823SKuter Dinel if (isDSAddress(C))
18196709823SKuter Dinel Result = DS_GLOBAL;
18296709823SKuter Dinel
18396709823SKuter Dinel if (const auto *CE = dyn_cast<ConstantExpr>(C))
18496709823SKuter Dinel if (visitConstExpr(CE))
18596709823SKuter Dinel Result |= ADDR_SPACE_CAST;
18696709823SKuter Dinel
18796709823SKuter Dinel for (const Use &U : C->operands()) {
18896709823SKuter Dinel const auto *OpC = dyn_cast<Constant>(U);
18996709823SKuter Dinel if (!OpC)
19096709823SKuter Dinel continue;
19196709823SKuter Dinel
19296709823SKuter Dinel Result |= getConstantAccess(OpC);
19396709823SKuter Dinel }
19496709823SKuter Dinel return Result;
19596709823SKuter Dinel }
19696709823SKuter Dinel
19796709823SKuter Dinel public:
19802a2e46fSSameer Sahasrabuddhe /// Returns true if \p Fn needs the queue pointer because of \p C.
needsQueuePtr(const Constant * C,Function & Fn)19996709823SKuter Dinel bool needsQueuePtr(const Constant *C, Function &Fn) {
20096709823SKuter Dinel bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
20196709823SKuter Dinel bool HasAperture = hasApertureRegs(Fn);
20296709823SKuter Dinel
20396709823SKuter Dinel // No need to explore the constants.
20496709823SKuter Dinel if (!IsNonEntryFunc && HasAperture)
20596709823SKuter Dinel return false;
20696709823SKuter Dinel
20796709823SKuter Dinel uint8_t Access = getConstantAccess(C);
20896709823SKuter Dinel
20996709823SKuter Dinel // We need to trap on DS globals in non-entry functions.
21096709823SKuter Dinel if (IsNonEntryFunc && (Access & DS_GLOBAL))
21196709823SKuter Dinel return true;
21296709823SKuter Dinel
21396709823SKuter Dinel return !HasAperture && (Access & ADDR_SPACE_CAST);
21496709823SKuter Dinel }
21596709823SKuter Dinel
21696709823SKuter Dinel private:
21702a2e46fSSameer Sahasrabuddhe /// Used to determine if the Constant needs the queue pointer.
21896709823SKuter Dinel DenseMap<const Constant *, uint8_t> ConstantStatus;
21996709823SKuter Dinel };
22096709823SKuter Dinel
221088cc636SMatt Arsenault struct AAAMDAttributes : public StateWrapper<
222088cc636SMatt Arsenault BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
223088cc636SMatt Arsenault using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
224088cc636SMatt Arsenault AbstractAttribute>;
225088cc636SMatt Arsenault
AAAMDAttributes__anon4805a6110111::AAAMDAttributes22696709823SKuter Dinel AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
22796709823SKuter Dinel
22896709823SKuter Dinel /// Create an abstract attribute view for the position \p IRP.
22996709823SKuter Dinel static AAAMDAttributes &createForPosition(const IRPosition &IRP,
23096709823SKuter Dinel Attributor &A);
23196709823SKuter Dinel
23296709823SKuter Dinel /// See AbstractAttribute::getName().
getName__anon4805a6110111::AAAMDAttributes23396709823SKuter Dinel const std::string getName() const override { return "AAAMDAttributes"; }
23496709823SKuter Dinel
23596709823SKuter Dinel /// See AbstractAttribute::getIdAddr().
getIdAddr__anon4805a6110111::AAAMDAttributes23696709823SKuter Dinel const char *getIdAddr() const override { return &ID; }
23796709823SKuter Dinel
23896709823SKuter Dinel /// This function should return true if the type of the \p AA is
23996709823SKuter Dinel /// AAAMDAttributes.
classof__anon4805a6110111::AAAMDAttributes24096709823SKuter Dinel static bool classof(const AbstractAttribute *AA) {
24196709823SKuter Dinel return (AA->getIdAddr() == &ID);
24296709823SKuter Dinel }
24396709823SKuter Dinel
24496709823SKuter Dinel /// Unique ID (due to the unique address)
24596709823SKuter Dinel static const char ID;
24696709823SKuter Dinel };
24796709823SKuter Dinel const char AAAMDAttributes::ID = 0;
24896709823SKuter Dinel
249f1217420SMatt Arsenault struct AAUniformWorkGroupSize
25096709823SKuter Dinel : public StateWrapper<BooleanState, AbstractAttribute> {
25196709823SKuter Dinel using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAUniformWorkGroupSize__anon4805a6110111::AAUniformWorkGroupSize252f1217420SMatt Arsenault AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
25396709823SKuter Dinel
25496709823SKuter Dinel /// Create an abstract attribute view for the position \p IRP.
255f1217420SMatt Arsenault static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
25696709823SKuter Dinel Attributor &A);
25796709823SKuter Dinel
25896709823SKuter Dinel /// See AbstractAttribute::getName().
getName__anon4805a6110111::AAUniformWorkGroupSize259f1217420SMatt Arsenault const std::string getName() const override {
260f1217420SMatt Arsenault return "AAUniformWorkGroupSize";
261f1217420SMatt Arsenault }
26296709823SKuter Dinel
26396709823SKuter Dinel /// See AbstractAttribute::getIdAddr().
getIdAddr__anon4805a6110111::AAUniformWorkGroupSize26496709823SKuter Dinel const char *getIdAddr() const override { return &ID; }
26596709823SKuter Dinel
26696709823SKuter Dinel /// This function should return true if the type of the \p AA is
26796709823SKuter Dinel /// AAAMDAttributes.
classof__anon4805a6110111::AAUniformWorkGroupSize26896709823SKuter Dinel static bool classof(const AbstractAttribute *AA) {
26996709823SKuter Dinel return (AA->getIdAddr() == &ID);
27096709823SKuter Dinel }
27196709823SKuter Dinel
27296709823SKuter Dinel /// Unique ID (due to the unique address)
27396709823SKuter Dinel static const char ID;
27496709823SKuter Dinel };
275f1217420SMatt Arsenault const char AAUniformWorkGroupSize::ID = 0;
27696709823SKuter Dinel
277f1217420SMatt Arsenault struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
AAUniformWorkGroupSizeFunction__anon4805a6110111::AAUniformWorkGroupSizeFunction278f1217420SMatt Arsenault AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
279f1217420SMatt Arsenault : AAUniformWorkGroupSize(IRP, A) {}
28096709823SKuter Dinel
initialize__anon4805a6110111::AAUniformWorkGroupSizeFunction28196709823SKuter Dinel void initialize(Attributor &A) override {
28296709823SKuter Dinel Function *F = getAssociatedFunction();
28396709823SKuter Dinel CallingConv::ID CC = F->getCallingConv();
28496709823SKuter Dinel
28596709823SKuter Dinel if (CC != CallingConv::AMDGPU_KERNEL)
28696709823SKuter Dinel return;
28796709823SKuter Dinel
28896709823SKuter Dinel bool InitialValue = false;
28996709823SKuter Dinel if (F->hasFnAttribute("uniform-work-group-size"))
29096709823SKuter Dinel InitialValue = F->getFnAttribute("uniform-work-group-size")
29196709823SKuter Dinel .getValueAsString()
29296709823SKuter Dinel .equals("true");
29396709823SKuter Dinel
29496709823SKuter Dinel if (InitialValue)
29596709823SKuter Dinel indicateOptimisticFixpoint();
29696709823SKuter Dinel else
29796709823SKuter Dinel indicatePessimisticFixpoint();
29896709823SKuter Dinel }
29996709823SKuter Dinel
updateImpl__anon4805a6110111::AAUniformWorkGroupSizeFunction30096709823SKuter Dinel ChangeStatus updateImpl(Attributor &A) override {
30196709823SKuter Dinel ChangeStatus Change = ChangeStatus::UNCHANGED;
30296709823SKuter Dinel
30396709823SKuter Dinel auto CheckCallSite = [&](AbstractCallSite CS) {
30496709823SKuter Dinel Function *Caller = CS.getInstruction()->getFunction();
305f1217420SMatt Arsenault LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
306edb05d55SAlexander Belyaev << "->" << getAssociatedFunction()->getName() << "\n");
30796709823SKuter Dinel
308f1217420SMatt Arsenault const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
30996709823SKuter Dinel *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
31096709823SKuter Dinel
31196709823SKuter Dinel Change = Change | clampStateAndIndicateChange(this->getState(),
31296709823SKuter Dinel CallerInfo.getState());
31396709823SKuter Dinel
31496709823SKuter Dinel return true;
31596709823SKuter Dinel };
31696709823SKuter Dinel
31796709823SKuter Dinel bool AllCallSitesKnown = true;
31896709823SKuter Dinel if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
3194132dc91SMatt Arsenault return indicatePessimisticFixpoint();
32096709823SKuter Dinel
32196709823SKuter Dinel return Change;
32296709823SKuter Dinel }
32396709823SKuter Dinel
manifest__anon4805a6110111::AAUniformWorkGroupSizeFunction32496709823SKuter Dinel ChangeStatus manifest(Attributor &A) override {
32596709823SKuter Dinel SmallVector<Attribute, 8> AttrList;
32696709823SKuter Dinel LLVMContext &Ctx = getAssociatedFunction()->getContext();
32796709823SKuter Dinel
32896709823SKuter Dinel AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
32996709823SKuter Dinel getAssumed() ? "true" : "false"));
33096709823SKuter Dinel return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
33196709823SKuter Dinel /* ForceReplace */ true);
33296709823SKuter Dinel }
33396709823SKuter Dinel
isValidState__anon4805a6110111::AAUniformWorkGroupSizeFunction33496709823SKuter Dinel bool isValidState() const override {
33596709823SKuter Dinel // This state is always valid, even when the state is false.
33696709823SKuter Dinel return true;
33796709823SKuter Dinel }
33896709823SKuter Dinel
getAsStr__anon4805a6110111::AAUniformWorkGroupSizeFunction33996709823SKuter Dinel const std::string getAsStr() const override {
34096709823SKuter Dinel return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
34196709823SKuter Dinel }
34296709823SKuter Dinel
34396709823SKuter Dinel /// See AbstractAttribute::trackStatistics()
trackStatistics__anon4805a6110111::AAUniformWorkGroupSizeFunction34496709823SKuter Dinel void trackStatistics() const override {}
34596709823SKuter Dinel };
34696709823SKuter Dinel
347f1217420SMatt Arsenault AAUniformWorkGroupSize &
createForPosition(const IRPosition & IRP,Attributor & A)348f1217420SMatt Arsenault AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
34996709823SKuter Dinel Attributor &A) {
35096709823SKuter Dinel if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
351f1217420SMatt Arsenault return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
352f1217420SMatt Arsenault llvm_unreachable(
353f1217420SMatt Arsenault "AAUniformWorkGroupSize is only valid for function position");
35496709823SKuter Dinel }
35596709823SKuter Dinel
35696709823SKuter Dinel struct AAAMDAttributesFunction : public AAAMDAttributes {
AAAMDAttributesFunction__anon4805a6110111::AAAMDAttributesFunction35796709823SKuter Dinel AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
35896709823SKuter Dinel : AAAMDAttributes(IRP, A) {}
35996709823SKuter Dinel
initialize__anon4805a6110111::AAAMDAttributesFunction36096709823SKuter Dinel void initialize(Attributor &A) override {
36196709823SKuter Dinel Function *F = getAssociatedFunction();
3620eebe2e3SMatt Arsenault
3630eebe2e3SMatt Arsenault // If the function requires the implicit arg pointer due to sanitizers,
3640eebe2e3SMatt Arsenault // assume it's needed even if explicitly marked as not requiring it.
365d8f99bb6SSameer Sahasrabuddhe const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
366d8f99bb6SSameer Sahasrabuddhe if (NeedsHostcall) {
3670eebe2e3SMatt Arsenault removeAssumedBits(IMPLICIT_ARG_PTR);
368d8f99bb6SSameer Sahasrabuddhe removeAssumedBits(HOSTCALL_PTR);
369d8f99bb6SSameer Sahasrabuddhe }
3700eebe2e3SMatt Arsenault
371088cc636SMatt Arsenault for (auto Attr : ImplicitAttrs) {
372d8f99bb6SSameer Sahasrabuddhe if (NeedsHostcall &&
373d8f99bb6SSameer Sahasrabuddhe (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
3740eebe2e3SMatt Arsenault continue;
3750eebe2e3SMatt Arsenault
376088cc636SMatt Arsenault if (F->hasFnAttribute(Attr.second))
377088cc636SMatt Arsenault addKnownBits(Attr.first);
378088cc636SMatt Arsenault }
379088cc636SMatt Arsenault
380088cc636SMatt Arsenault if (F->isDeclaration())
381088cc636SMatt Arsenault return;
38296709823SKuter Dinel
38396709823SKuter Dinel // Ignore functions with graphics calling conventions, these are currently
38496709823SKuter Dinel // not allowed to have kernel arguments.
38596709823SKuter Dinel if (AMDGPU::isGraphics(F->getCallingConv())) {
38696709823SKuter Dinel indicatePessimisticFixpoint();
38796709823SKuter Dinel return;
38896709823SKuter Dinel }
38996709823SKuter Dinel }
39096709823SKuter Dinel
updateImpl__anon4805a6110111::AAAMDAttributesFunction39196709823SKuter Dinel ChangeStatus updateImpl(Attributor &A) override {
39296709823SKuter Dinel Function *F = getAssociatedFunction();
393088cc636SMatt Arsenault // The current assumed state used to determine a change.
394088cc636SMatt Arsenault auto OrigAssumed = getAssumed();
39596709823SKuter Dinel
39696709823SKuter Dinel // Check for Intrinsics and propagate attributes.
39796709823SKuter Dinel const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
39896709823SKuter Dinel *this, this->getIRPosition(), DepClassTy::REQUIRED);
399088cc636SMatt Arsenault if (AAEdges.hasNonAsmUnknownCallee())
400088cc636SMatt Arsenault return indicatePessimisticFixpoint();
40196709823SKuter Dinel
402088cc636SMatt Arsenault bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
40396709823SKuter Dinel
4040f20a35bSChangpeng Fang bool NeedsImplicit = false;
4050f20a35bSChangpeng Fang auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
4060f20a35bSChangpeng Fang bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
4070f20a35bSChangpeng Fang bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
408088cc636SMatt Arsenault
40996709823SKuter Dinel for (Function *Callee : AAEdges.getOptimisticEdges()) {
41096709823SKuter Dinel Intrinsic::ID IID = Callee->getIntrinsicID();
411088cc636SMatt Arsenault if (IID == Intrinsic::not_intrinsic) {
412088cc636SMatt Arsenault const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
413088cc636SMatt Arsenault *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
414088cc636SMatt Arsenault *this &= AAAMD;
41596709823SKuter Dinel continue;
41696709823SKuter Dinel }
41796709823SKuter Dinel
418088cc636SMatt Arsenault bool NonKernelOnly = false;
419088cc636SMatt Arsenault ImplicitArgumentMask AttrMask =
4200f20a35bSChangpeng Fang intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
4210f20a35bSChangpeng Fang HasApertureRegs, SupportsGetDoorbellID);
422088cc636SMatt Arsenault if (AttrMask != NOT_IMPLICIT_INPUT) {
423088cc636SMatt Arsenault if ((IsNonEntryFunc || !NonKernelOnly))
424088cc636SMatt Arsenault removeAssumedBits(AttrMask);
425088cc636SMatt Arsenault }
42696709823SKuter Dinel }
42796709823SKuter Dinel
4280f20a35bSChangpeng Fang // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
4290f20a35bSChangpeng Fang if (NeedsImplicit)
4300f20a35bSChangpeng Fang removeAssumedBits(IMPLICIT_ARG_PTR);
431c6a6b579SSameer Sahasrabuddhe
4320f20a35bSChangpeng Fang if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
4330f20a35bSChangpeng Fang // Under V5, we need implicitarg_ptr + offsets to access private_base or
4340f20a35bSChangpeng Fang // shared_base. We do not actually need queue_ptr.
4350f20a35bSChangpeng Fang if (AMDGPU::getAmdhsaCodeObjectVersion() == 5)
4360f20a35bSChangpeng Fang removeAssumedBits(IMPLICIT_ARG_PTR);
4370f20a35bSChangpeng Fang else
438088cc636SMatt Arsenault removeAssumedBits(QUEUE_PTR);
43996709823SKuter Dinel }
44096709823SKuter Dinel
4418edaf259SChangpeng Fang if (funcRetrievesMultigridSyncArg(A)) {
4428edaf259SChangpeng Fang assert(!isAssumed(IMPLICIT_ARG_PTR) &&
4438edaf259SChangpeng Fang "multigrid_sync_arg needs implicitarg_ptr");
4448edaf259SChangpeng Fang removeAssumedBits(MULTIGRID_SYNC_ARG);
4458edaf259SChangpeng Fang }
4468edaf259SChangpeng Fang
447d8f99bb6SSameer Sahasrabuddhe if (funcRetrievesHostcallPtr(A)) {
448ca62b1dbSChangpeng Fang assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
449d8f99bb6SSameer Sahasrabuddhe removeAssumedBits(HOSTCALL_PTR);
450d8f99bb6SSameer Sahasrabuddhe }
451d8f99bb6SSameer Sahasrabuddhe
452ca62b1dbSChangpeng Fang if (funcRetrievesHeapPtr(A)) {
453ca62b1dbSChangpeng Fang assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
454ca62b1dbSChangpeng Fang removeAssumedBits(HEAP_PTR);
455ca62b1dbSChangpeng Fang }
456ca62b1dbSChangpeng Fang
4570f20a35bSChangpeng Fang if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
4580f20a35bSChangpeng Fang assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
4590f20a35bSChangpeng Fang removeAssumedBits(QUEUE_PTR);
4600f20a35bSChangpeng Fang }
4610f20a35bSChangpeng Fang
4623a205977SJon Chesterfield if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
4633a205977SJon Chesterfield removeAssumedBits(LDS_KERNEL_ID);
4643a205977SJon Chesterfield }
4653a205977SJon Chesterfield
46602a2e46fSSameer Sahasrabuddhe return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
46702a2e46fSSameer Sahasrabuddhe : ChangeStatus::UNCHANGED;
46896709823SKuter Dinel }
46996709823SKuter Dinel
manifest__anon4805a6110111::AAAMDAttributesFunction47096709823SKuter Dinel ChangeStatus manifest(Attributor &A) override {
47196709823SKuter Dinel SmallVector<Attribute, 8> AttrList;
47296709823SKuter Dinel LLVMContext &Ctx = getAssociatedFunction()->getContext();
47396709823SKuter Dinel
474088cc636SMatt Arsenault for (auto Attr : ImplicitAttrs) {
475088cc636SMatt Arsenault if (isKnown(Attr.first))
476088cc636SMatt Arsenault AttrList.push_back(Attribute::get(Ctx, Attr.second));
477088cc636SMatt Arsenault }
47896709823SKuter Dinel
47996709823SKuter Dinel return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
48096709823SKuter Dinel /* ForceReplace */ true);
48196709823SKuter Dinel }
48296709823SKuter Dinel
getAsStr__anon4805a6110111::AAAMDAttributesFunction48396709823SKuter Dinel const std::string getAsStr() const override {
484088cc636SMatt Arsenault std::string Str;
485088cc636SMatt Arsenault raw_string_ostream OS(Str);
486088cc636SMatt Arsenault OS << "AMDInfo[";
487088cc636SMatt Arsenault for (auto Attr : ImplicitAttrs)
488088cc636SMatt Arsenault OS << ' ' << Attr.second;
489088cc636SMatt Arsenault OS << " ]";
490088cc636SMatt Arsenault return OS.str();
49196709823SKuter Dinel }
49296709823SKuter Dinel
49396709823SKuter Dinel /// See AbstractAttribute::trackStatistics()
trackStatistics__anon4805a6110111::AAAMDAttributesFunction49496709823SKuter Dinel void trackStatistics() const override {}
49502a2e46fSSameer Sahasrabuddhe
49602a2e46fSSameer Sahasrabuddhe private:
checkForQueuePtr__anon4805a6110111::AAAMDAttributesFunction49702a2e46fSSameer Sahasrabuddhe bool checkForQueuePtr(Attributor &A) {
49802a2e46fSSameer Sahasrabuddhe Function *F = getAssociatedFunction();
49902a2e46fSSameer Sahasrabuddhe bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
50002a2e46fSSameer Sahasrabuddhe
50102a2e46fSSameer Sahasrabuddhe auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
50202a2e46fSSameer Sahasrabuddhe
50302a2e46fSSameer Sahasrabuddhe bool NeedsQueuePtr = false;
50402a2e46fSSameer Sahasrabuddhe
50502a2e46fSSameer Sahasrabuddhe auto CheckAddrSpaceCasts = [&](Instruction &I) {
50602a2e46fSSameer Sahasrabuddhe unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
50702a2e46fSSameer Sahasrabuddhe if (castRequiresQueuePtr(SrcAS)) {
50802a2e46fSSameer Sahasrabuddhe NeedsQueuePtr = true;
50902a2e46fSSameer Sahasrabuddhe return false;
51002a2e46fSSameer Sahasrabuddhe }
51102a2e46fSSameer Sahasrabuddhe return true;
51202a2e46fSSameer Sahasrabuddhe };
51302a2e46fSSameer Sahasrabuddhe
51402a2e46fSSameer Sahasrabuddhe bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
51502a2e46fSSameer Sahasrabuddhe
51602a2e46fSSameer Sahasrabuddhe // `checkForAllInstructions` is much more cheaper than going through all
51702a2e46fSSameer Sahasrabuddhe // instructions, try it first.
51802a2e46fSSameer Sahasrabuddhe
51902a2e46fSSameer Sahasrabuddhe // The queue pointer is not needed if aperture regs is present.
52002a2e46fSSameer Sahasrabuddhe if (!HasApertureRegs) {
52102a2e46fSSameer Sahasrabuddhe bool UsedAssumedInformation = false;
52202a2e46fSSameer Sahasrabuddhe A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
52302a2e46fSSameer Sahasrabuddhe {Instruction::AddrSpaceCast},
52402a2e46fSSameer Sahasrabuddhe UsedAssumedInformation);
52502a2e46fSSameer Sahasrabuddhe }
52602a2e46fSSameer Sahasrabuddhe
52702a2e46fSSameer Sahasrabuddhe // If we found that we need the queue pointer, nothing else to do.
52802a2e46fSSameer Sahasrabuddhe if (NeedsQueuePtr)
52902a2e46fSSameer Sahasrabuddhe return true;
53002a2e46fSSameer Sahasrabuddhe
53102a2e46fSSameer Sahasrabuddhe if (!IsNonEntryFunc && HasApertureRegs)
53202a2e46fSSameer Sahasrabuddhe return false;
53302a2e46fSSameer Sahasrabuddhe
53402a2e46fSSameer Sahasrabuddhe for (BasicBlock &BB : *F) {
53502a2e46fSSameer Sahasrabuddhe for (Instruction &I : BB) {
53602a2e46fSSameer Sahasrabuddhe for (const Use &U : I.operands()) {
53702a2e46fSSameer Sahasrabuddhe if (const auto *C = dyn_cast<Constant>(U)) {
53802a2e46fSSameer Sahasrabuddhe if (InfoCache.needsQueuePtr(C, *F))
53902a2e46fSSameer Sahasrabuddhe return true;
54002a2e46fSSameer Sahasrabuddhe }
54102a2e46fSSameer Sahasrabuddhe }
54202a2e46fSSameer Sahasrabuddhe }
54302a2e46fSSameer Sahasrabuddhe }
54402a2e46fSSameer Sahasrabuddhe
54502a2e46fSSameer Sahasrabuddhe return false;
54602a2e46fSSameer Sahasrabuddhe }
547d8f99bb6SSameer Sahasrabuddhe
funcRetrievesMultigridSyncArg__anon4805a6110111::AAAMDAttributesFunction5488edaf259SChangpeng Fang bool funcRetrievesMultigridSyncArg(Attributor &A) {
5498edaf259SChangpeng Fang auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition();
5508edaf259SChangpeng Fang AAPointerInfo::OffsetAndSize OAS(Pos, 8);
5518edaf259SChangpeng Fang return funcRetrievesImplicitKernelArg(A, OAS);
5528edaf259SChangpeng Fang }
5538edaf259SChangpeng Fang
funcRetrievesHostcallPtr__anon4805a6110111::AAAMDAttributesFunction554d8f99bb6SSameer Sahasrabuddhe bool funcRetrievesHostcallPtr(Attributor &A) {
555d8f99bb6SSameer Sahasrabuddhe auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
556ca62b1dbSChangpeng Fang AAPointerInfo::OffsetAndSize OAS(Pos, 8);
557ca62b1dbSChangpeng Fang return funcRetrievesImplicitKernelArg(A, OAS);
558ca62b1dbSChangpeng Fang }
559d8f99bb6SSameer Sahasrabuddhe
funcRetrievesHeapPtr__anon4805a6110111::AAAMDAttributesFunction560ca62b1dbSChangpeng Fang bool funcRetrievesHeapPtr(Attributor &A) {
561ca62b1dbSChangpeng Fang if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
562ca62b1dbSChangpeng Fang return false;
563dd5895ccSChangpeng Fang AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
564ca62b1dbSChangpeng Fang return funcRetrievesImplicitKernelArg(A, OAS);
565ca62b1dbSChangpeng Fang }
566ca62b1dbSChangpeng Fang
funcRetrievesQueuePtr__anon4805a6110111::AAAMDAttributesFunction5670f20a35bSChangpeng Fang bool funcRetrievesQueuePtr(Attributor &A) {
5680f20a35bSChangpeng Fang if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
5690f20a35bSChangpeng Fang return false;
570dd5895ccSChangpeng Fang AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
5710f20a35bSChangpeng Fang return funcRetrievesImplicitKernelArg(A, OAS);
5720f20a35bSChangpeng Fang }
5730f20a35bSChangpeng Fang
funcRetrievesImplicitKernelArg__anon4805a6110111::AAAMDAttributesFunction574ca62b1dbSChangpeng Fang bool funcRetrievesImplicitKernelArg(Attributor &A,
575ca62b1dbSChangpeng Fang AAPointerInfo::OffsetAndSize OAS) {
576d8f99bb6SSameer Sahasrabuddhe // Check if this is a call to the implicitarg_ptr builtin and it
577d8f99bb6SSameer Sahasrabuddhe // is used to retrieve the hostcall pointer. The implicit arg for
578d8f99bb6SSameer Sahasrabuddhe // hostcall is not used only if every use of the implicitarg_ptr
579d8f99bb6SSameer Sahasrabuddhe // is a load that clearly does not retrieve any byte of the
580d8f99bb6SSameer Sahasrabuddhe // hostcall pointer. We check this by tracing all the uses of the
581d8f99bb6SSameer Sahasrabuddhe // initial call to the implicitarg_ptr intrinsic.
582ca62b1dbSChangpeng Fang auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
583d8f99bb6SSameer Sahasrabuddhe auto &Call = cast<CallBase>(I);
584d8f99bb6SSameer Sahasrabuddhe if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
585d8f99bb6SSameer Sahasrabuddhe return true;
586d8f99bb6SSameer Sahasrabuddhe
587d8f99bb6SSameer Sahasrabuddhe const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
588d8f99bb6SSameer Sahasrabuddhe *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
589d8f99bb6SSameer Sahasrabuddhe
590d8f99bb6SSameer Sahasrabuddhe return PointerInfoAA.forallInterferingAccesses(
591d8f99bb6SSameer Sahasrabuddhe OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
592d8f99bb6SSameer Sahasrabuddhe return Acc.getRemoteInst()->isDroppable();
593d8f99bb6SSameer Sahasrabuddhe });
594d8f99bb6SSameer Sahasrabuddhe };
595d8f99bb6SSameer Sahasrabuddhe
596d8f99bb6SSameer Sahasrabuddhe bool UsedAssumedInformation = false;
597ca62b1dbSChangpeng Fang return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
598d8f99bb6SSameer Sahasrabuddhe UsedAssumedInformation);
599d8f99bb6SSameer Sahasrabuddhe }
6003a205977SJon Chesterfield
funcRetrievesLDSKernelId__anon4805a6110111::AAAMDAttributesFunction6013a205977SJon Chesterfield bool funcRetrievesLDSKernelId(Attributor &A) {
6023a205977SJon Chesterfield auto DoesNotRetrieve = [&](Instruction &I) {
6033a205977SJon Chesterfield auto &Call = cast<CallBase>(I);
6043a205977SJon Chesterfield return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
6053a205977SJon Chesterfield };
6063a205977SJon Chesterfield bool UsedAssumedInformation = false;
6073a205977SJon Chesterfield return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
6083a205977SJon Chesterfield UsedAssumedInformation);
6093a205977SJon Chesterfield }
61096709823SKuter Dinel };
61196709823SKuter Dinel
createForPosition(const IRPosition & IRP,Attributor & A)61296709823SKuter Dinel AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
61396709823SKuter Dinel Attributor &A) {
61496709823SKuter Dinel if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
61596709823SKuter Dinel return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
61696709823SKuter Dinel llvm_unreachable("AAAMDAttributes is only valid for function position");
61796709823SKuter Dinel }
61896709823SKuter Dinel
619ec57b375SMatt Arsenault /// Propagate amdgpu-flat-work-group-size attribute.
620ec57b375SMatt Arsenault struct AAAMDFlatWorkGroupSize
621ec57b375SMatt Arsenault : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
622ec57b375SMatt Arsenault using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
AAAMDFlatWorkGroupSize__anon4805a6110111::AAAMDFlatWorkGroupSize623ec57b375SMatt Arsenault AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
624ec57b375SMatt Arsenault : Base(IRP, 32) {}
625ec57b375SMatt Arsenault
626ec57b375SMatt Arsenault /// See AbstractAttribute::getState(...).
getState__anon4805a6110111::AAAMDFlatWorkGroupSize627ec57b375SMatt Arsenault IntegerRangeState &getState() override { return *this; }
getState__anon4805a6110111::AAAMDFlatWorkGroupSize628ec57b375SMatt Arsenault const IntegerRangeState &getState() const override { return *this; }
629ec57b375SMatt Arsenault
initialize__anon4805a6110111::AAAMDFlatWorkGroupSize630ec57b375SMatt Arsenault void initialize(Attributor &A) override {
631ec57b375SMatt Arsenault Function *F = getAssociatedFunction();
632ec57b375SMatt Arsenault auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
633ec57b375SMatt Arsenault unsigned MinGroupSize, MaxGroupSize;
634ec57b375SMatt Arsenault std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
635ec57b375SMatt Arsenault intersectKnown(
636ec57b375SMatt Arsenault ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
6376bcf1f91SMatt Arsenault
6386bcf1f91SMatt Arsenault if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
6396bcf1f91SMatt Arsenault indicatePessimisticFixpoint();
640ec57b375SMatt Arsenault }
641ec57b375SMatt Arsenault
updateImpl__anon4805a6110111::AAAMDFlatWorkGroupSize642ec57b375SMatt Arsenault ChangeStatus updateImpl(Attributor &A) override {
643ec57b375SMatt Arsenault ChangeStatus Change = ChangeStatus::UNCHANGED;
644ec57b375SMatt Arsenault
645ec57b375SMatt Arsenault auto CheckCallSite = [&](AbstractCallSite CS) {
646ec57b375SMatt Arsenault Function *Caller = CS.getInstruction()->getFunction();
647ec57b375SMatt Arsenault LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
648ec57b375SMatt Arsenault << "->" << getAssociatedFunction()->getName() << '\n');
649ec57b375SMatt Arsenault
650ec57b375SMatt Arsenault const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
651ec57b375SMatt Arsenault *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
652ec57b375SMatt Arsenault
653ec57b375SMatt Arsenault Change |=
654ec57b375SMatt Arsenault clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
655ec57b375SMatt Arsenault
656ec57b375SMatt Arsenault return true;
657ec57b375SMatt Arsenault };
658ec57b375SMatt Arsenault
659ec57b375SMatt Arsenault bool AllCallSitesKnown = true;
660ec57b375SMatt Arsenault if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
661ec57b375SMatt Arsenault return indicatePessimisticFixpoint();
662ec57b375SMatt Arsenault
663ec57b375SMatt Arsenault return Change;
664ec57b375SMatt Arsenault }
665ec57b375SMatt Arsenault
manifest__anon4805a6110111::AAAMDFlatWorkGroupSize666ec57b375SMatt Arsenault ChangeStatus manifest(Attributor &A) override {
667ec57b375SMatt Arsenault SmallVector<Attribute, 8> AttrList;
668ec57b375SMatt Arsenault Function *F = getAssociatedFunction();
669ec57b375SMatt Arsenault LLVMContext &Ctx = F->getContext();
670ec57b375SMatt Arsenault
671ec57b375SMatt Arsenault auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
672ec57b375SMatt Arsenault unsigned Min, Max;
673ec57b375SMatt Arsenault std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
674ec57b375SMatt Arsenault
675ec57b375SMatt Arsenault // Don't add the attribute if it's the implied default.
676ec57b375SMatt Arsenault if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
677ec57b375SMatt Arsenault return ChangeStatus::UNCHANGED;
678ec57b375SMatt Arsenault
679ec57b375SMatt Arsenault SmallString<10> Buffer;
680ec57b375SMatt Arsenault raw_svector_ostream OS(Buffer);
681ec57b375SMatt Arsenault OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
682ec57b375SMatt Arsenault
683ec57b375SMatt Arsenault AttrList.push_back(
684ec57b375SMatt Arsenault Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
685ec57b375SMatt Arsenault return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
686ec57b375SMatt Arsenault /* ForceReplace */ true);
687ec57b375SMatt Arsenault }
688ec57b375SMatt Arsenault
getAsStr__anon4805a6110111::AAAMDFlatWorkGroupSize689ec57b375SMatt Arsenault const std::string getAsStr() const override {
690ec57b375SMatt Arsenault std::string Str;
691ec57b375SMatt Arsenault raw_string_ostream OS(Str);
692ec57b375SMatt Arsenault OS << "AMDFlatWorkGroupSize[";
693ec57b375SMatt Arsenault OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
694ec57b375SMatt Arsenault OS << ']';
695ec57b375SMatt Arsenault return OS.str();
696ec57b375SMatt Arsenault }
697ec57b375SMatt Arsenault
698ec57b375SMatt Arsenault /// See AbstractAttribute::trackStatistics()
trackStatistics__anon4805a6110111::AAAMDFlatWorkGroupSize699ec57b375SMatt Arsenault void trackStatistics() const override {}
700ec57b375SMatt Arsenault
701ec57b375SMatt Arsenault /// Create an abstract attribute view for the position \p IRP.
702ec57b375SMatt Arsenault static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
703ec57b375SMatt Arsenault Attributor &A);
704ec57b375SMatt Arsenault
705ec57b375SMatt Arsenault /// See AbstractAttribute::getName()
getName__anon4805a6110111::AAAMDFlatWorkGroupSize706ec57b375SMatt Arsenault const std::string getName() const override {
707ec57b375SMatt Arsenault return "AAAMDFlatWorkGroupSize";
708ec57b375SMatt Arsenault }
709ec57b375SMatt Arsenault
710ec57b375SMatt Arsenault /// See AbstractAttribute::getIdAddr()
getIdAddr__anon4805a6110111::AAAMDFlatWorkGroupSize711ec57b375SMatt Arsenault const char *getIdAddr() const override { return &ID; }
712ec57b375SMatt Arsenault
713ec57b375SMatt Arsenault /// This function should return true if the type of the \p AA is
714ec57b375SMatt Arsenault /// AAAMDFlatWorkGroupSize
classof__anon4805a6110111::AAAMDFlatWorkGroupSize715ec57b375SMatt Arsenault static bool classof(const AbstractAttribute *AA) {
716ec57b375SMatt Arsenault return (AA->getIdAddr() == &ID);
717ec57b375SMatt Arsenault }
718ec57b375SMatt Arsenault
719ec57b375SMatt Arsenault /// Unique ID (due to the unique address)
720ec57b375SMatt Arsenault static const char ID;
721ec57b375SMatt Arsenault };
722ec57b375SMatt Arsenault
723ec57b375SMatt Arsenault const char AAAMDFlatWorkGroupSize::ID = 0;
724ec57b375SMatt Arsenault
725ec57b375SMatt Arsenault AAAMDFlatWorkGroupSize &
createForPosition(const IRPosition & IRP,Attributor & A)726ec57b375SMatt Arsenault AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
727ec57b375SMatt Arsenault Attributor &A) {
728ec57b375SMatt Arsenault if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
729ec57b375SMatt Arsenault return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
730ec57b375SMatt Arsenault llvm_unreachable(
731ec57b375SMatt Arsenault "AAAMDFlatWorkGroupSize is only valid for function position");
732ec57b375SMatt Arsenault }
733ec57b375SMatt Arsenault
73496709823SKuter Dinel class AMDGPUAttributor : public ModulePass {
73596709823SKuter Dinel public:
AMDGPUAttributor()73696709823SKuter Dinel AMDGPUAttributor() : ModulePass(ID) {}
73796709823SKuter Dinel
73896709823SKuter Dinel /// doInitialization - Virtual method overridden by subclasses to do
73996709823SKuter Dinel /// any necessary initialization before any pass is run.
doInitialization(Module &)74096709823SKuter Dinel bool doInitialization(Module &) override {
74196709823SKuter Dinel auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
74296709823SKuter Dinel if (!TPC)
74396709823SKuter Dinel report_fatal_error("TargetMachine is required");
74496709823SKuter Dinel
74596709823SKuter Dinel TM = &TPC->getTM<TargetMachine>();
74696709823SKuter Dinel return false;
74796709823SKuter Dinel }
74896709823SKuter Dinel
runOnModule(Module & M)74996709823SKuter Dinel bool runOnModule(Module &M) override {
75096709823SKuter Dinel SetVector<Function *> Functions;
75196709823SKuter Dinel AnalysisGetter AG;
752a77ae4aaSMatt Arsenault for (Function &F : M) {
753a77ae4aaSMatt Arsenault if (!F.isIntrinsic())
75496709823SKuter Dinel Functions.insert(&F);
755a77ae4aaSMatt Arsenault }
75696709823SKuter Dinel
75796709823SKuter Dinel CallGraphUpdater CGUpdater;
75896709823SKuter Dinel BumpPtrAllocator Allocator;
75996709823SKuter Dinel AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
76046d82e73SMatt Arsenault DenseSet<const char *> Allowed(
761ec57b375SMatt Arsenault {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
762*bf789b19SJohannes Doerfert &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID,
763*bf789b19SJohannes Doerfert &AAPointerInfo::ID});
76446d82e73SMatt Arsenault
7653be3b401SJohannes Doerfert AttributorConfig AC(CGUpdater);
7663be3b401SJohannes Doerfert AC.Allowed = &Allowed;
7673be3b401SJohannes Doerfert AC.IsModulePass = true;
7683be3b401SJohannes Doerfert AC.DefaultInitializeLiveInternals = false;
7693be3b401SJohannes Doerfert
7703be3b401SJohannes Doerfert Attributor A(Functions, InfoCache, AC);
77196709823SKuter Dinel
77296709823SKuter Dinel for (Function &F : M) {
773a77ae4aaSMatt Arsenault if (!F.isIntrinsic()) {
77496709823SKuter Dinel A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
775f1217420SMatt Arsenault A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
776ec57b375SMatt Arsenault if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
777ec57b375SMatt Arsenault A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
778ec57b375SMatt Arsenault }
77996709823SKuter Dinel }
780a77ae4aaSMatt Arsenault }
78196709823SKuter Dinel
78296709823SKuter Dinel ChangeStatus Change = A.run();
78396709823SKuter Dinel return Change == ChangeStatus::CHANGED;
78496709823SKuter Dinel }
78596709823SKuter Dinel
getPassName() const78696709823SKuter Dinel StringRef getPassName() const override { return "AMDGPU Attributor"; }
78796709823SKuter Dinel TargetMachine *TM;
78896709823SKuter Dinel static char ID;
78996709823SKuter Dinel };
7909b8b1645SBenjamin Kramer } // namespace
79196709823SKuter Dinel
79296709823SKuter Dinel char AMDGPUAttributor::ID = 0;
79396709823SKuter Dinel
createAMDGPUAttributorPass()79496709823SKuter Dinel Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
79596709823SKuter Dinel INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
796