1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "llvm/CodeGen/TargetPassConfig.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include "llvm/Transforms/IPO/Attributor.h"
20 
21 #define DEBUG_TYPE "amdgpu-attributor"
22 
23 using namespace llvm;
24 
25 enum ImplicitArgumentMask {
26   NOT_IMPLICIT_INPUT = 0,
27 
28   // SGPRs
29   DISPATCH_PTR = 1 << 0,
30   QUEUE_PTR = 1 << 1,
31   DISPATCH_ID = 1 << 2,
32   IMPLICIT_ARG_PTR = 1 << 3,
33   WORKGROUP_ID_X = 1 << 4,
34   WORKGROUP_ID_Y = 1 << 5,
35   WORKGROUP_ID_Z = 1 << 6,
36 
37   // VGPRS:
38   WORKITEM_ID_X = 1 << 7,
39   WORKITEM_ID_Y = 1 << 8,
40   WORKITEM_ID_Z = 1 << 9,
41   ALL_ARGUMENT_MASK = (1 << 10) - 1
42 };
43 
44 static constexpr std::pair<ImplicitArgumentMask,
45                            StringLiteral> ImplicitAttrs[] = {
46   {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
47   {QUEUE_PTR, "amdgpu-no-queue-ptr"},
48   {DISPATCH_ID, "amdgpu-no-dispatch-id"},
49   {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
50   {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
51   {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"},
52   {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"},
53   {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"},
54   {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"},
55   {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"}
56 };
57 
58 // We do not need to note the x workitem or workgroup id because they are always
59 // initialized.
60 //
61 // TODO: We should not add the attributes if the known compile time workgroup
62 // size is 1 for y/z.
63 static ImplicitArgumentMask
64 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
65   switch (ID) {
66   case Intrinsic::amdgcn_workitem_id_x:
67     NonKernelOnly = true;
68     return WORKITEM_ID_X;
69   case Intrinsic::amdgcn_workgroup_id_x:
70     NonKernelOnly = true;
71     return WORKGROUP_ID_X;
72   case Intrinsic::amdgcn_workitem_id_y:
73   case Intrinsic::r600_read_tidig_y:
74     return WORKITEM_ID_Y;
75   case Intrinsic::amdgcn_workitem_id_z:
76   case Intrinsic::r600_read_tidig_z:
77     return WORKITEM_ID_Z;
78   case Intrinsic::amdgcn_workgroup_id_y:
79   case Intrinsic::r600_read_tgid_y:
80     return WORKGROUP_ID_Y;
81   case Intrinsic::amdgcn_workgroup_id_z:
82   case Intrinsic::r600_read_tgid_z:
83     return WORKGROUP_ID_Z;
84   case Intrinsic::amdgcn_dispatch_ptr:
85     return DISPATCH_PTR;
86   case Intrinsic::amdgcn_dispatch_id:
87     return DISPATCH_ID;
88   case Intrinsic::amdgcn_implicitarg_ptr:
89     return IMPLICIT_ARG_PTR;
90   case Intrinsic::amdgcn_queue_ptr:
91   case Intrinsic::amdgcn_is_shared:
92   case Intrinsic::amdgcn_is_private:
93     // TODO: Does not require queue ptr on gfx9+
94   case Intrinsic::trap:
95   case Intrinsic::debugtrap:
96     IsQueuePtr = true;
97     return QUEUE_PTR;
98   default:
99     return NOT_IMPLICIT_INPUT;
100   }
101 }
102 
103 static bool castRequiresQueuePtr(unsigned SrcAS) {
104   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
105 }
106 
107 static bool isDSAddress(const Constant *C) {
108   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
109   if (!GV)
110     return false;
111   unsigned AS = GV->getAddressSpace();
112   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
113 }
114 
115 namespace {
116 class AMDGPUInformationCache : public InformationCache {
117 public:
118   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
119                          BumpPtrAllocator &Allocator,
120                          SetVector<Function *> *CGSCC, TargetMachine &TM)
121       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
122   TargetMachine &TM;
123 
124   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
125 
126   /// Check if the subtarget has aperture regs.
127   bool hasApertureRegs(Function &F) {
128     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
129     return ST.hasApertureRegs();
130   }
131 
132   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
133     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
134     return ST.getFlatWorkGroupSizes(F);
135   }
136 
137   std::pair<unsigned, unsigned>
138   getMaximumFlatWorkGroupRange(const Function &F) {
139     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
140     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
141   }
142 
143 private:
144   /// Check if the ConstantExpr \p CE requires queue ptr attribute.
145   static bool visitConstExpr(const ConstantExpr *CE) {
146     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
147       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
148       return castRequiresQueuePtr(SrcAS);
149     }
150     return false;
151   }
152 
153   /// Get the constant access bitmap for \p C.
154   uint8_t getConstantAccess(const Constant *C) {
155     auto It = ConstantStatus.find(C);
156     if (It != ConstantStatus.end())
157       return It->second;
158 
159     uint8_t Result = 0;
160     if (isDSAddress(C))
161       Result = DS_GLOBAL;
162 
163     if (const auto *CE = dyn_cast<ConstantExpr>(C))
164       if (visitConstExpr(CE))
165         Result |= ADDR_SPACE_CAST;
166 
167     for (const Use &U : C->operands()) {
168       const auto *OpC = dyn_cast<Constant>(U);
169       if (!OpC)
170         continue;
171 
172       Result |= getConstantAccess(OpC);
173     }
174     return Result;
175   }
176 
177 public:
178   /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
179   bool needsQueuePtr(const Constant *C, Function &Fn) {
180     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
181     bool HasAperture = hasApertureRegs(Fn);
182 
183     // No need to explore the constants.
184     if (!IsNonEntryFunc && HasAperture)
185       return false;
186 
187     uint8_t Access = getConstantAccess(C);
188 
189     // We need to trap on DS globals in non-entry functions.
190     if (IsNonEntryFunc && (Access & DS_GLOBAL))
191       return true;
192 
193     return !HasAperture && (Access & ADDR_SPACE_CAST);
194   }
195 
196 private:
197   /// Used to determine if the Constant needs a queue ptr attribute.
198   DenseMap<const Constant *, uint8_t> ConstantStatus;
199 };
200 
201 struct AAAMDAttributes : public StateWrapper<
202   BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
203   using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
204                             AbstractAttribute>;
205 
206   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
207 
208   /// Create an abstract attribute view for the position \p IRP.
209   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
210                                             Attributor &A);
211 
212   /// See AbstractAttribute::getName().
213   const std::string getName() const override { return "AAAMDAttributes"; }
214 
215   /// See AbstractAttribute::getIdAddr().
216   const char *getIdAddr() const override { return &ID; }
217 
218   /// This function should return true if the type of the \p AA is
219   /// AAAMDAttributes.
220   static bool classof(const AbstractAttribute *AA) {
221     return (AA->getIdAddr() == &ID);
222   }
223 
224   /// Unique ID (due to the unique address)
225   static const char ID;
226 };
227 const char AAAMDAttributes::ID = 0;
228 
229 struct AAUniformWorkGroupSize
230     : public StateWrapper<BooleanState, AbstractAttribute> {
231   using Base = StateWrapper<BooleanState, AbstractAttribute>;
232   AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
233 
234   /// Create an abstract attribute view for the position \p IRP.
235   static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
236                                                    Attributor &A);
237 
238   /// See AbstractAttribute::getName().
239   const std::string getName() const override {
240     return "AAUniformWorkGroupSize";
241   }
242 
243   /// See AbstractAttribute::getIdAddr().
244   const char *getIdAddr() const override { return &ID; }
245 
246   /// This function should return true if the type of the \p AA is
247   /// AAAMDAttributes.
248   static bool classof(const AbstractAttribute *AA) {
249     return (AA->getIdAddr() == &ID);
250   }
251 
252   /// Unique ID (due to the unique address)
253   static const char ID;
254 };
255 const char AAUniformWorkGroupSize::ID = 0;
256 
257 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
258   AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
259       : AAUniformWorkGroupSize(IRP, A) {}
260 
261   void initialize(Attributor &A) override {
262     Function *F = getAssociatedFunction();
263     CallingConv::ID CC = F->getCallingConv();
264 
265     if (CC != CallingConv::AMDGPU_KERNEL)
266       return;
267 
268     bool InitialValue = false;
269     if (F->hasFnAttribute("uniform-work-group-size"))
270       InitialValue = F->getFnAttribute("uniform-work-group-size")
271                          .getValueAsString()
272                          .equals("true");
273 
274     if (InitialValue)
275       indicateOptimisticFixpoint();
276     else
277       indicatePessimisticFixpoint();
278   }
279 
280   ChangeStatus updateImpl(Attributor &A) override {
281     ChangeStatus Change = ChangeStatus::UNCHANGED;
282 
283     auto CheckCallSite = [&](AbstractCallSite CS) {
284       Function *Caller = CS.getInstruction()->getFunction();
285       LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
286                         << "->" << getAssociatedFunction()->getName() << "\n");
287 
288       const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
289           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
290 
291       Change = Change | clampStateAndIndicateChange(this->getState(),
292                                                     CallerInfo.getState());
293 
294       return true;
295     };
296 
297     bool AllCallSitesKnown = true;
298     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
299       indicatePessimisticFixpoint();
300 
301     return Change;
302   }
303 
304   ChangeStatus manifest(Attributor &A) override {
305     SmallVector<Attribute, 8> AttrList;
306     LLVMContext &Ctx = getAssociatedFunction()->getContext();
307 
308     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
309                                       getAssumed() ? "true" : "false"));
310     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
311                                               /* ForceReplace */ true);
312   }
313 
314   bool isValidState() const override {
315     // This state is always valid, even when the state is false.
316     return true;
317   }
318 
319   const std::string getAsStr() const override {
320     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
321   }
322 
323   /// See AbstractAttribute::trackStatistics()
324   void trackStatistics() const override {}
325 };
326 
327 AAUniformWorkGroupSize &
328 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
329                                           Attributor &A) {
330   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
331     return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
332   llvm_unreachable(
333       "AAUniformWorkGroupSize is only valid for function position");
334 }
335 
336 struct AAAMDAttributesFunction : public AAAMDAttributes {
337   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
338       : AAAMDAttributes(IRP, A) {}
339 
340   void initialize(Attributor &A) override {
341     Function *F = getAssociatedFunction();
342     for (auto Attr : ImplicitAttrs) {
343       if (F->hasFnAttribute(Attr.second))
344         addKnownBits(Attr.first);
345     }
346 
347     if (F->isDeclaration())
348       return;
349 
350     // Ignore functions with graphics calling conventions, these are currently
351     // not allowed to have kernel arguments.
352     if (AMDGPU::isGraphics(F->getCallingConv())) {
353       indicatePessimisticFixpoint();
354       return;
355     }
356   }
357 
358   ChangeStatus updateImpl(Attributor &A) override {
359     Function *F = getAssociatedFunction();
360     // The current assumed state used to determine a change.
361     auto OrigAssumed = getAssumed();
362 
363     // Check for Intrinsics and propagate attributes.
364     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
365         *this, this->getIRPosition(), DepClassTy::REQUIRED);
366     if (AAEdges.hasNonAsmUnknownCallee())
367       return indicatePessimisticFixpoint();
368 
369     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
370     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
371 
372     bool NeedsQueuePtr = false;
373 
374     for (Function *Callee : AAEdges.getOptimisticEdges()) {
375       Intrinsic::ID IID = Callee->getIntrinsicID();
376       if (IID == Intrinsic::not_intrinsic) {
377         const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
378           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
379         *this &= AAAMD;
380         continue;
381       }
382 
383       bool NonKernelOnly = false;
384       ImplicitArgumentMask AttrMask =
385           intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr);
386       if (AttrMask != NOT_IMPLICIT_INPUT) {
387         if ((IsNonEntryFunc || !NonKernelOnly))
388           removeAssumedBits(AttrMask);
389       }
390     }
391 
392     // If we found that we need amdgpu-queue-ptr, nothing else to do.
393     if (NeedsQueuePtr) {
394       removeAssumedBits(QUEUE_PTR);
395       return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
396                                            ChangeStatus::UNCHANGED;
397     }
398 
399     auto CheckAddrSpaceCasts = [&](Instruction &I) {
400       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
401       if (castRequiresQueuePtr(SrcAS)) {
402         NeedsQueuePtr = true;
403         return false;
404       }
405       return true;
406     };
407 
408     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
409 
410     // `checkForAllInstructions` is much more cheaper than going through all
411     // instructions, try it first.
412 
413     // amdgpu-queue-ptr is not needed if aperture regs is present.
414     if (!HasApertureRegs) {
415       bool UsedAssumedInformation = false;
416       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
417                                 {Instruction::AddrSpaceCast},
418                                 UsedAssumedInformation);
419     }
420 
421     // If we found  that we need amdgpu-queue-ptr, nothing else to do.
422     if (NeedsQueuePtr) {
423       removeAssumedBits(QUEUE_PTR);
424       return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
425                                            ChangeStatus::UNCHANGED;
426     }
427 
428     if (!IsNonEntryFunc && HasApertureRegs) {
429       return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
430                                            ChangeStatus::UNCHANGED;
431     }
432 
433     for (BasicBlock &BB : *F) {
434       for (Instruction &I : BB) {
435         for (const Use &U : I.operands()) {
436           if (const auto *C = dyn_cast<Constant>(U)) {
437             if (InfoCache.needsQueuePtr(C, *F)) {
438               removeAssumedBits(QUEUE_PTR);
439               return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
440                                                    ChangeStatus::UNCHANGED;
441             }
442           }
443         }
444       }
445     }
446 
447     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
448                                          ChangeStatus::UNCHANGED;
449   }
450 
451   ChangeStatus manifest(Attributor &A) override {
452     SmallVector<Attribute, 8> AttrList;
453     LLVMContext &Ctx = getAssociatedFunction()->getContext();
454 
455     for (auto Attr : ImplicitAttrs) {
456       if (isKnown(Attr.first))
457         AttrList.push_back(Attribute::get(Ctx, Attr.second));
458     }
459 
460     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
461                                               /* ForceReplace */ true);
462   }
463 
464   const std::string getAsStr() const override {
465     std::string Str;
466     raw_string_ostream OS(Str);
467     OS << "AMDInfo[";
468     for (auto Attr : ImplicitAttrs)
469       OS << ' ' << Attr.second;
470     OS << " ]";
471     return OS.str();
472   }
473 
474   /// See AbstractAttribute::trackStatistics()
475   void trackStatistics() const override {}
476 };
477 
478 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
479                                                     Attributor &A) {
480   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
481     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
482   llvm_unreachable("AAAMDAttributes is only valid for function position");
483 }
484 
485 /// Propagate amdgpu-flat-work-group-size attribute.
486 struct AAAMDFlatWorkGroupSize
487     : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
488   using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
489   AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
490       : Base(IRP, 32) {}
491 
492   /// See AbstractAttribute::getState(...).
493   IntegerRangeState &getState() override { return *this; }
494   const IntegerRangeState &getState() const override { return *this; }
495 
496   void initialize(Attributor &A) override {
497     Function *F = getAssociatedFunction();
498     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
499     unsigned MinGroupSize, MaxGroupSize;
500     std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
501     intersectKnown(
502         ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
503   }
504 
505   ChangeStatus updateImpl(Attributor &A) override {
506     ChangeStatus Change = ChangeStatus::UNCHANGED;
507 
508     auto CheckCallSite = [&](AbstractCallSite CS) {
509       Function *Caller = CS.getInstruction()->getFunction();
510       LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
511                         << "->" << getAssociatedFunction()->getName() << '\n');
512 
513       const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
514           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
515 
516       Change |=
517           clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
518 
519       return true;
520     };
521 
522     bool AllCallSitesKnown = true;
523     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
524       return indicatePessimisticFixpoint();
525 
526     return Change;
527   }
528 
529   ChangeStatus manifest(Attributor &A) override {
530     SmallVector<Attribute, 8> AttrList;
531     Function *F = getAssociatedFunction();
532     LLVMContext &Ctx = F->getContext();
533 
534     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
535     unsigned Min, Max;
536     std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
537 
538     // Don't add the attribute if it's the implied default.
539     if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
540       return ChangeStatus::UNCHANGED;
541 
542     SmallString<10> Buffer;
543     raw_svector_ostream OS(Buffer);
544     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
545 
546     AttrList.push_back(
547         Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
548     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
549                                               /* ForceReplace */ true);
550   }
551 
552   const std::string getAsStr() const override {
553     std::string Str;
554     raw_string_ostream OS(Str);
555     OS << "AMDFlatWorkGroupSize[";
556     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
557     OS << ']';
558     return OS.str();
559   }
560 
561   /// See AbstractAttribute::trackStatistics()
562   void trackStatistics() const override {}
563 
564   /// Create an abstract attribute view for the position \p IRP.
565   static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
566                                                    Attributor &A);
567 
568   /// See AbstractAttribute::getName()
569   const std::string getName() const override {
570     return "AAAMDFlatWorkGroupSize";
571   }
572 
573   /// See AbstractAttribute::getIdAddr()
574   const char *getIdAddr() const override { return &ID; }
575 
576   /// This function should return true if the type of the \p AA is
577   /// AAAMDFlatWorkGroupSize
578   static bool classof(const AbstractAttribute *AA) {
579     return (AA->getIdAddr() == &ID);
580   }
581 
582   /// Unique ID (due to the unique address)
583   static const char ID;
584 };
585 
586 const char AAAMDFlatWorkGroupSize::ID = 0;
587 
588 AAAMDFlatWorkGroupSize &
589 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
590                                           Attributor &A) {
591   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
592     return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
593   llvm_unreachable(
594       "AAAMDFlatWorkGroupSize is only valid for function position");
595 }
596 
597 class AMDGPUAttributor : public ModulePass {
598 public:
599   AMDGPUAttributor() : ModulePass(ID) {}
600 
601   /// doInitialization - Virtual method overridden by subclasses to do
602   /// any necessary initialization before any pass is run.
603   bool doInitialization(Module &) override {
604     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
605     if (!TPC)
606       report_fatal_error("TargetMachine is required");
607 
608     TM = &TPC->getTM<TargetMachine>();
609     return false;
610   }
611 
612   bool runOnModule(Module &M) override {
613     SetVector<Function *> Functions;
614     AnalysisGetter AG;
615     for (Function &F : M) {
616       if (!F.isIntrinsic())
617         Functions.insert(&F);
618     }
619 
620     CallGraphUpdater CGUpdater;
621     BumpPtrAllocator Allocator;
622     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
623     DenseSet<const char *> Allowed(
624         {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
625          &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
626 
627     Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
628 
629     for (Function &F : M) {
630       if (!F.isIntrinsic()) {
631         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
632         A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
633         if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
634           A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
635         }
636       }
637     }
638 
639     ChangeStatus Change = A.run();
640     return Change == ChangeStatus::CHANGED;
641   }
642 
643   StringRef getPassName() const override { return "AMDGPU Attributor"; }
644   TargetMachine *TM;
645   static char ID;
646 };
647 } // namespace
648 
649 char AMDGPUAttributor::ID = 0;
650 
651 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
652 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
653