1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "llvm/CodeGen/TargetPassConfig.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include "llvm/Transforms/IPO/Attributor.h"
20 
21 #define DEBUG_TYPE "amdgpu-attributor"
22 
23 using namespace llvm;
24 
25 enum ImplicitArgumentMask {
26   NOT_IMPLICIT_INPUT = 0,
27 
28   // SGPRs
29   DISPATCH_PTR = 1 << 0,
30   QUEUE_PTR = 1 << 1,
31   DISPATCH_ID = 1 << 2,
32   IMPLICIT_ARG_PTR = 1 << 3,
33   WORKGROUP_ID_X = 1 << 4,
34   WORKGROUP_ID_Y = 1 << 5,
35   WORKGROUP_ID_Z = 1 << 6,
36 
37   // VGPRS:
38   WORKITEM_ID_X = 1 << 7,
39   WORKITEM_ID_Y = 1 << 8,
40   WORKITEM_ID_Z = 1 << 9,
41   ALL_ARGUMENT_MASK = (1 << 10) - 1
42 };
43 
44 static constexpr std::pair<ImplicitArgumentMask,
45                            StringLiteral> ImplicitAttrs[] = {
46   {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
47   {QUEUE_PTR, "amdgpu-no-queue-ptr"},
48   {DISPATCH_ID, "amdgpu-no-dispatch-id"},
49   {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
50   {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
51   {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"},
52   {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"},
53   {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"},
54   {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"},
55   {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"}
56 };
57 
58 // We do not need to note the x workitem or workgroup id because they are always
59 // initialized.
60 //
61 // TODO: We should not add the attributes if the known compile time workgroup
62 // size is 1 for y/z.
63 static ImplicitArgumentMask
64 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
65   switch (ID) {
66   case Intrinsic::amdgcn_workitem_id_x:
67     NonKernelOnly = true;
68     return WORKITEM_ID_X;
69   case Intrinsic::amdgcn_workgroup_id_x:
70     NonKernelOnly = true;
71     return WORKGROUP_ID_X;
72   case Intrinsic::amdgcn_workitem_id_y:
73   case Intrinsic::r600_read_tidig_y:
74     return WORKITEM_ID_Y;
75   case Intrinsic::amdgcn_workitem_id_z:
76   case Intrinsic::r600_read_tidig_z:
77     return WORKITEM_ID_Z;
78   case Intrinsic::amdgcn_workgroup_id_y:
79   case Intrinsic::r600_read_tgid_y:
80     return WORKGROUP_ID_Y;
81   case Intrinsic::amdgcn_workgroup_id_z:
82   case Intrinsic::r600_read_tgid_z:
83     return WORKGROUP_ID_Z;
84   case Intrinsic::amdgcn_dispatch_ptr:
85     return DISPATCH_PTR;
86   case Intrinsic::amdgcn_dispatch_id:
87     return DISPATCH_ID;
88   case Intrinsic::amdgcn_implicitarg_ptr:
89     return IMPLICIT_ARG_PTR;
90   case Intrinsic::amdgcn_queue_ptr:
91   case Intrinsic::amdgcn_is_shared:
92   case Intrinsic::amdgcn_is_private:
93     // TODO: Does not require queue ptr on gfx9+
94   case Intrinsic::trap:
95   case Intrinsic::debugtrap:
96     IsQueuePtr = true;
97     return QUEUE_PTR;
98   default:
99     return NOT_IMPLICIT_INPUT;
100   }
101 }
102 
103 static bool castRequiresQueuePtr(unsigned SrcAS) {
104   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
105 }
106 
107 static bool isDSAddress(const Constant *C) {
108   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
109   if (!GV)
110     return false;
111   unsigned AS = GV->getAddressSpace();
112   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
113 }
114 
115 class AMDGPUInformationCache : public InformationCache {
116 public:
117   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
118                          BumpPtrAllocator &Allocator,
119                          SetVector<Function *> *CGSCC, TargetMachine &TM)
120       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
121   TargetMachine &TM;
122 
123   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
124 
125   /// Check if the subtarget has aperture regs.
126   bool hasApertureRegs(Function &F) {
127     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
128     return ST.hasApertureRegs();
129   }
130 
131   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
132     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
133     return ST.getFlatWorkGroupSizes(F);
134   }
135 
136   std::pair<unsigned, unsigned>
137   getMaximumFlatWorkGroupRange(const Function &F) {
138     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
139     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
140   }
141 
142 private:
143   /// Check if the ConstantExpr \p CE requires queue ptr attribute.
144   static bool visitConstExpr(const ConstantExpr *CE) {
145     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
146       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
147       return castRequiresQueuePtr(SrcAS);
148     }
149     return false;
150   }
151 
152   /// Get the constant access bitmap for \p C.
153   uint8_t getConstantAccess(const Constant *C) {
154     auto It = ConstantStatus.find(C);
155     if (It != ConstantStatus.end())
156       return It->second;
157 
158     uint8_t Result = 0;
159     if (isDSAddress(C))
160       Result = DS_GLOBAL;
161 
162     if (const auto *CE = dyn_cast<ConstantExpr>(C))
163       if (visitConstExpr(CE))
164         Result |= ADDR_SPACE_CAST;
165 
166     for (const Use &U : C->operands()) {
167       const auto *OpC = dyn_cast<Constant>(U);
168       if (!OpC)
169         continue;
170 
171       Result |= getConstantAccess(OpC);
172     }
173     return Result;
174   }
175 
176 public:
177   /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
178   bool needsQueuePtr(const Constant *C, Function &Fn) {
179     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
180     bool HasAperture = hasApertureRegs(Fn);
181 
182     // No need to explore the constants.
183     if (!IsNonEntryFunc && HasAperture)
184       return false;
185 
186     uint8_t Access = getConstantAccess(C);
187 
188     // We need to trap on DS globals in non-entry functions.
189     if (IsNonEntryFunc && (Access & DS_GLOBAL))
190       return true;
191 
192     return !HasAperture && (Access & ADDR_SPACE_CAST);
193   }
194 
195 private:
196   /// Used to determine if the Constant needs a queue ptr attribute.
197   DenseMap<const Constant *, uint8_t> ConstantStatus;
198 };
199 
200 struct AAAMDAttributes : public StateWrapper<
201   BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
202   using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
203                             AbstractAttribute>;
204 
205   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
206 
207   /// Create an abstract attribute view for the position \p IRP.
208   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
209                                             Attributor &A);
210 
211   /// See AbstractAttribute::getName().
212   const std::string getName() const override { return "AAAMDAttributes"; }
213 
214   /// See AbstractAttribute::getIdAddr().
215   const char *getIdAddr() const override { return &ID; }
216 
217   /// This function should return true if the type of the \p AA is
218   /// AAAMDAttributes.
219   static bool classof(const AbstractAttribute *AA) {
220     return (AA->getIdAddr() == &ID);
221   }
222 
223   /// Unique ID (due to the unique address)
224   static const char ID;
225 };
226 const char AAAMDAttributes::ID = 0;
227 
228 struct AAUniformWorkGroupSize
229     : public StateWrapper<BooleanState, AbstractAttribute> {
230   using Base = StateWrapper<BooleanState, AbstractAttribute>;
231   AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
232 
233   /// Create an abstract attribute view for the position \p IRP.
234   static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
235                                                    Attributor &A);
236 
237   /// See AbstractAttribute::getName().
238   const std::string getName() const override {
239     return "AAUniformWorkGroupSize";
240   }
241 
242   /// See AbstractAttribute::getIdAddr().
243   const char *getIdAddr() const override { return &ID; }
244 
245   /// This function should return true if the type of the \p AA is
246   /// AAAMDAttributes.
247   static bool classof(const AbstractAttribute *AA) {
248     return (AA->getIdAddr() == &ID);
249   }
250 
251   /// Unique ID (due to the unique address)
252   static const char ID;
253 };
254 const char AAUniformWorkGroupSize::ID = 0;
255 
256 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
257   AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
258       : AAUniformWorkGroupSize(IRP, A) {}
259 
260   void initialize(Attributor &A) override {
261     Function *F = getAssociatedFunction();
262     CallingConv::ID CC = F->getCallingConv();
263 
264     if (CC != CallingConv::AMDGPU_KERNEL)
265       return;
266 
267     bool InitialValue = false;
268     if (F->hasFnAttribute("uniform-work-group-size"))
269       InitialValue = F->getFnAttribute("uniform-work-group-size")
270                          .getValueAsString()
271                          .equals("true");
272 
273     if (InitialValue)
274       indicateOptimisticFixpoint();
275     else
276       indicatePessimisticFixpoint();
277   }
278 
279   ChangeStatus updateImpl(Attributor &A) override {
280     ChangeStatus Change = ChangeStatus::UNCHANGED;
281 
282     auto CheckCallSite = [&](AbstractCallSite CS) {
283       Function *Caller = CS.getInstruction()->getFunction();
284       LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
285                         << "->" << getAssociatedFunction()->getName() << "\n");
286 
287       const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
288           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
289 
290       Change = Change | clampStateAndIndicateChange(this->getState(),
291                                                     CallerInfo.getState());
292 
293       return true;
294     };
295 
296     bool AllCallSitesKnown = true;
297     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
298       indicatePessimisticFixpoint();
299 
300     return Change;
301   }
302 
303   ChangeStatus manifest(Attributor &A) override {
304     SmallVector<Attribute, 8> AttrList;
305     LLVMContext &Ctx = getAssociatedFunction()->getContext();
306 
307     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
308                                       getAssumed() ? "true" : "false"));
309     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
310                                               /* ForceReplace */ true);
311   }
312 
313   bool isValidState() const override {
314     // This state is always valid, even when the state is false.
315     return true;
316   }
317 
318   const std::string getAsStr() const override {
319     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
320   }
321 
322   /// See AbstractAttribute::trackStatistics()
323   void trackStatistics() const override {}
324 };
325 
326 AAUniformWorkGroupSize &
327 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
328                                           Attributor &A) {
329   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
330     return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
331   llvm_unreachable(
332       "AAUniformWorkGroupSize is only valid for function position");
333 }
334 
335 struct AAAMDAttributesFunction : public AAAMDAttributes {
336   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
337       : AAAMDAttributes(IRP, A) {}
338 
339   void initialize(Attributor &A) override {
340     Function *F = getAssociatedFunction();
341     for (auto Attr : ImplicitAttrs) {
342       if (F->hasFnAttribute(Attr.second))
343         addKnownBits(Attr.first);
344     }
345 
346     if (F->isDeclaration())
347       return;
348 
349     // Ignore functions with graphics calling conventions, these are currently
350     // not allowed to have kernel arguments.
351     if (AMDGPU::isGraphics(F->getCallingConv())) {
352       indicatePessimisticFixpoint();
353       return;
354     }
355   }
356 
357   ChangeStatus updateImpl(Attributor &A) override {
358     Function *F = getAssociatedFunction();
359     // The current assumed state used to determine a change.
360     auto OrigAssumed = getAssumed();
361 
362     // Check for Intrinsics and propagate attributes.
363     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
364         *this, this->getIRPosition(), DepClassTy::REQUIRED);
365     if (AAEdges.hasNonAsmUnknownCallee())
366       return indicatePessimisticFixpoint();
367 
368     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
369     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
370 
371     bool NeedsQueuePtr = false;
372 
373     for (Function *Callee : AAEdges.getOptimisticEdges()) {
374       Intrinsic::ID IID = Callee->getIntrinsicID();
375       if (IID == Intrinsic::not_intrinsic) {
376         const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
377           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
378         *this &= AAAMD;
379         continue;
380       }
381 
382       bool NonKernelOnly = false;
383       ImplicitArgumentMask AttrMask =
384           intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr);
385       if (AttrMask != NOT_IMPLICIT_INPUT) {
386         if ((IsNonEntryFunc || !NonKernelOnly))
387           removeAssumedBits(AttrMask);
388       }
389     }
390 
391     // If we found that we need amdgpu-queue-ptr, nothing else to do.
392     if (NeedsQueuePtr) {
393       removeAssumedBits(QUEUE_PTR);
394       return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
395                                            ChangeStatus::UNCHANGED;
396     }
397 
398     auto CheckAddrSpaceCasts = [&](Instruction &I) {
399       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
400       if (castRequiresQueuePtr(SrcAS)) {
401         NeedsQueuePtr = true;
402         return false;
403       }
404       return true;
405     };
406 
407     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
408 
409     // `checkForAllInstructions` is much more cheaper than going through all
410     // instructions, try it first.
411 
412     // amdgpu-queue-ptr is not needed if aperture regs is present.
413     if (!HasApertureRegs) {
414       bool UsedAssumedInformation = false;
415       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
416                                 {Instruction::AddrSpaceCast},
417                                 UsedAssumedInformation);
418     }
419 
420     // If we found  that we need amdgpu-queue-ptr, nothing else to do.
421     if (NeedsQueuePtr) {
422       removeAssumedBits(QUEUE_PTR);
423       return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
424                                            ChangeStatus::UNCHANGED;
425     }
426 
427     if (!IsNonEntryFunc && HasApertureRegs) {
428       return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
429                                            ChangeStatus::UNCHANGED;
430     }
431 
432     for (BasicBlock &BB : *F) {
433       for (Instruction &I : BB) {
434         for (const Use &U : I.operands()) {
435           if (const auto *C = dyn_cast<Constant>(U)) {
436             if (InfoCache.needsQueuePtr(C, *F)) {
437               removeAssumedBits(QUEUE_PTR);
438               return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
439                                                    ChangeStatus::UNCHANGED;
440             }
441           }
442         }
443       }
444     }
445 
446     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
447                                          ChangeStatus::UNCHANGED;
448   }
449 
450   ChangeStatus manifest(Attributor &A) override {
451     SmallVector<Attribute, 8> AttrList;
452     LLVMContext &Ctx = getAssociatedFunction()->getContext();
453 
454     for (auto Attr : ImplicitAttrs) {
455       if (isKnown(Attr.first))
456         AttrList.push_back(Attribute::get(Ctx, Attr.second));
457     }
458 
459     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
460                                               /* ForceReplace */ true);
461   }
462 
463   const std::string getAsStr() const override {
464     std::string Str;
465     raw_string_ostream OS(Str);
466     OS << "AMDInfo[";
467     for (auto Attr : ImplicitAttrs)
468       OS << ' ' << Attr.second;
469     OS << " ]";
470     return OS.str();
471   }
472 
473   /// See AbstractAttribute::trackStatistics()
474   void trackStatistics() const override {}
475 };
476 
477 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
478                                                     Attributor &A) {
479   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
480     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
481   llvm_unreachable("AAAMDAttributes is only valid for function position");
482 }
483 
484 /// Propagate amdgpu-flat-work-group-size attribute.
485 struct AAAMDFlatWorkGroupSize
486     : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
487   using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
488   AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
489       : Base(IRP, 32) {}
490 
491   /// See AbstractAttribute::getState(...).
492   IntegerRangeState &getState() override { return *this; }
493   const IntegerRangeState &getState() const override { return *this; }
494 
495   void initialize(Attributor &A) override {
496     Function *F = getAssociatedFunction();
497     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
498     unsigned MinGroupSize, MaxGroupSize;
499     std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
500     intersectKnown(
501         ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
502   }
503 
504   ChangeStatus updateImpl(Attributor &A) override {
505     ChangeStatus Change = ChangeStatus::UNCHANGED;
506 
507     auto CheckCallSite = [&](AbstractCallSite CS) {
508       Function *Caller = CS.getInstruction()->getFunction();
509       LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
510                         << "->" << getAssociatedFunction()->getName() << '\n');
511 
512       const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
513           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
514 
515       Change |=
516           clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
517 
518       return true;
519     };
520 
521     bool AllCallSitesKnown = true;
522     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
523       return indicatePessimisticFixpoint();
524 
525     return Change;
526   }
527 
528   ChangeStatus manifest(Attributor &A) override {
529     SmallVector<Attribute, 8> AttrList;
530     Function *F = getAssociatedFunction();
531     LLVMContext &Ctx = F->getContext();
532 
533     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
534     unsigned Min, Max;
535     std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
536 
537     // Don't add the attribute if it's the implied default.
538     if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
539       return ChangeStatus::UNCHANGED;
540 
541     SmallString<10> Buffer;
542     raw_svector_ostream OS(Buffer);
543     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
544 
545     AttrList.push_back(
546         Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
547     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
548                                               /* ForceReplace */ true);
549   }
550 
551   const std::string getAsStr() const override {
552     std::string Str;
553     raw_string_ostream OS(Str);
554     OS << "AMDFlatWorkGroupSize[";
555     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
556     OS << ']';
557     return OS.str();
558   }
559 
560   /// See AbstractAttribute::trackStatistics()
561   void trackStatistics() const override {}
562 
563   /// Create an abstract attribute view for the position \p IRP.
564   static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
565                                                    Attributor &A);
566 
567   /// See AbstractAttribute::getName()
568   const std::string getName() const override {
569     return "AAAMDFlatWorkGroupSize";
570   }
571 
572   /// See AbstractAttribute::getIdAddr()
573   const char *getIdAddr() const override { return &ID; }
574 
575   /// This function should return true if the type of the \p AA is
576   /// AAAMDFlatWorkGroupSize
577   static bool classof(const AbstractAttribute *AA) {
578     return (AA->getIdAddr() == &ID);
579   }
580 
581   /// Unique ID (due to the unique address)
582   static const char ID;
583 };
584 
585 const char AAAMDFlatWorkGroupSize::ID = 0;
586 
587 AAAMDFlatWorkGroupSize &
588 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
589                                           Attributor &A) {
590   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
591     return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
592   llvm_unreachable(
593       "AAAMDFlatWorkGroupSize is only valid for function position");
594 }
595 
596 class AMDGPUAttributor : public ModulePass {
597 public:
598   AMDGPUAttributor() : ModulePass(ID) {}
599 
600   /// doInitialization - Virtual method overridden by subclasses to do
601   /// any necessary initialization before any pass is run.
602   bool doInitialization(Module &) override {
603     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
604     if (!TPC)
605       report_fatal_error("TargetMachine is required");
606 
607     TM = &TPC->getTM<TargetMachine>();
608     return false;
609   }
610 
611   bool runOnModule(Module &M) override {
612     SetVector<Function *> Functions;
613     AnalysisGetter AG;
614     for (Function &F : M) {
615       if (!F.isIntrinsic())
616         Functions.insert(&F);
617     }
618 
619     CallGraphUpdater CGUpdater;
620     BumpPtrAllocator Allocator;
621     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
622     DenseSet<const char *> Allowed(
623         {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
624          &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
625 
626     Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
627 
628     for (Function &F : M) {
629       if (!F.isIntrinsic()) {
630         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
631         A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
632         if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
633           A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
634         }
635       }
636     }
637 
638     ChangeStatus Change = A.run();
639     return Change == ChangeStatus::CHANGED;
640   }
641 
642   StringRef getPassName() const override { return "AMDGPU Attributor"; }
643   TargetMachine *TM;
644   static char ID;
645 };
646 
647 char AMDGPUAttributor::ID = 0;
648 
649 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
650 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
651