1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "llvm/CodeGen/TargetPassConfig.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include "llvm/Transforms/IPO/Attributor.h"
20 
21 #define DEBUG_TYPE "amdgpu-attributor"
22 
23 using namespace llvm;
24 
25 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
26 
27 enum ImplicitArgumentPositions {
28   #include "AMDGPUAttributes.def"
29   LAST_ARG_POS
30 };
31 
32 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
33 
34 enum ImplicitArgumentMask {
35   NOT_IMPLICIT_INPUT = 0,
36   #include "AMDGPUAttributes.def"
37   ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
38 };
39 
40 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
41 static constexpr std::pair<ImplicitArgumentMask,
42                            StringLiteral> ImplicitAttrs[] = {
43  #include "AMDGPUAttributes.def"
44 };
45 
46 // We do not need to note the x workitem or workgroup id because they are always
47 // initialized.
48 //
49 // TODO: We should not add the attributes if the known compile time workgroup
50 // size is 1 for y/z.
51 static ImplicitArgumentMask
52 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
53   switch (ID) {
54   case Intrinsic::amdgcn_workitem_id_x:
55     NonKernelOnly = true;
56     return WORKITEM_ID_X;
57   case Intrinsic::amdgcn_workgroup_id_x:
58     NonKernelOnly = true;
59     return WORKGROUP_ID_X;
60   case Intrinsic::amdgcn_workitem_id_y:
61   case Intrinsic::r600_read_tidig_y:
62     return WORKITEM_ID_Y;
63   case Intrinsic::amdgcn_workitem_id_z:
64   case Intrinsic::r600_read_tidig_z:
65     return WORKITEM_ID_Z;
66   case Intrinsic::amdgcn_workgroup_id_y:
67   case Intrinsic::r600_read_tgid_y:
68     return WORKGROUP_ID_Y;
69   case Intrinsic::amdgcn_workgroup_id_z:
70   case Intrinsic::r600_read_tgid_z:
71     return WORKGROUP_ID_Z;
72   case Intrinsic::amdgcn_dispatch_ptr:
73     return DISPATCH_PTR;
74   case Intrinsic::amdgcn_dispatch_id:
75     return DISPATCH_ID;
76   case Intrinsic::amdgcn_implicitarg_ptr:
77     return IMPLICIT_ARG_PTR;
78   case Intrinsic::amdgcn_queue_ptr:
79   case Intrinsic::amdgcn_is_shared:
80   case Intrinsic::amdgcn_is_private:
81     // TODO: Does not require the queue pointer on gfx9+
82   case Intrinsic::trap:
83   case Intrinsic::debugtrap:
84     IsQueuePtr = true;
85     return QUEUE_PTR;
86   default:
87     return NOT_IMPLICIT_INPUT;
88   }
89 }
90 
91 static bool castRequiresQueuePtr(unsigned SrcAS) {
92   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
93 }
94 
95 static bool isDSAddress(const Constant *C) {
96   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
97   if (!GV)
98     return false;
99   unsigned AS = GV->getAddressSpace();
100   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
101 }
102 
103 /// Returns true if the function requires the implicit argument be passed
104 /// regardless of the function contents.
105 static bool funcRequiresImplicitArgPtr(const Function &F) {
106   // Sanitizers require the hostcall buffer passed in the implicit arguments.
107   return F.hasFnAttribute(Attribute::SanitizeAddress) ||
108          F.hasFnAttribute(Attribute::SanitizeThread) ||
109          F.hasFnAttribute(Attribute::SanitizeMemory) ||
110          F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
111          F.hasFnAttribute(Attribute::SanitizeMemTag);
112 }
113 
114 namespace {
115 class AMDGPUInformationCache : public InformationCache {
116 public:
117   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
118                          BumpPtrAllocator &Allocator,
119                          SetVector<Function *> *CGSCC, TargetMachine &TM)
120       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
121   TargetMachine &TM;
122 
123   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
124 
125   /// Check if the subtarget has aperture regs.
126   bool hasApertureRegs(Function &F) {
127     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
128     return ST.hasApertureRegs();
129   }
130 
131   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
132     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
133     return ST.getFlatWorkGroupSizes(F);
134   }
135 
136   std::pair<unsigned, unsigned>
137   getMaximumFlatWorkGroupRange(const Function &F) {
138     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
139     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
140   }
141 
142 private:
143   /// Check if the ConstantExpr \p CE requires the queue pointer.
144   static bool visitConstExpr(const ConstantExpr *CE) {
145     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
146       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
147       return castRequiresQueuePtr(SrcAS);
148     }
149     return false;
150   }
151 
152   /// Get the constant access bitmap for \p C.
153   uint8_t getConstantAccess(const Constant *C) {
154     auto It = ConstantStatus.find(C);
155     if (It != ConstantStatus.end())
156       return It->second;
157 
158     uint8_t Result = 0;
159     if (isDSAddress(C))
160       Result = DS_GLOBAL;
161 
162     if (const auto *CE = dyn_cast<ConstantExpr>(C))
163       if (visitConstExpr(CE))
164         Result |= ADDR_SPACE_CAST;
165 
166     for (const Use &U : C->operands()) {
167       const auto *OpC = dyn_cast<Constant>(U);
168       if (!OpC)
169         continue;
170 
171       Result |= getConstantAccess(OpC);
172     }
173     return Result;
174   }
175 
176 public:
177   /// Returns true if \p Fn needs the queue pointer because of \p C.
178   bool needsQueuePtr(const Constant *C, Function &Fn) {
179     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
180     bool HasAperture = hasApertureRegs(Fn);
181 
182     // No need to explore the constants.
183     if (!IsNonEntryFunc && HasAperture)
184       return false;
185 
186     uint8_t Access = getConstantAccess(C);
187 
188     // We need to trap on DS globals in non-entry functions.
189     if (IsNonEntryFunc && (Access & DS_GLOBAL))
190       return true;
191 
192     return !HasAperture && (Access & ADDR_SPACE_CAST);
193   }
194 
195 private:
196   /// Used to determine if the Constant needs the queue pointer.
197   DenseMap<const Constant *, uint8_t> ConstantStatus;
198 };
199 
200 struct AAAMDAttributes : public StateWrapper<
201   BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
202   using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
203                             AbstractAttribute>;
204 
205   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
206 
207   /// Create an abstract attribute view for the position \p IRP.
208   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
209                                             Attributor &A);
210 
211   /// See AbstractAttribute::getName().
212   const std::string getName() const override { return "AAAMDAttributes"; }
213 
214   /// See AbstractAttribute::getIdAddr().
215   const char *getIdAddr() const override { return &ID; }
216 
217   /// This function should return true if the type of the \p AA is
218   /// AAAMDAttributes.
219   static bool classof(const AbstractAttribute *AA) {
220     return (AA->getIdAddr() == &ID);
221   }
222 
223   /// Unique ID (due to the unique address)
224   static const char ID;
225 };
226 const char AAAMDAttributes::ID = 0;
227 
228 struct AAUniformWorkGroupSize
229     : public StateWrapper<BooleanState, AbstractAttribute> {
230   using Base = StateWrapper<BooleanState, AbstractAttribute>;
231   AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
232 
233   /// Create an abstract attribute view for the position \p IRP.
234   static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
235                                                    Attributor &A);
236 
237   /// See AbstractAttribute::getName().
238   const std::string getName() const override {
239     return "AAUniformWorkGroupSize";
240   }
241 
242   /// See AbstractAttribute::getIdAddr().
243   const char *getIdAddr() const override { return &ID; }
244 
245   /// This function should return true if the type of the \p AA is
246   /// AAAMDAttributes.
247   static bool classof(const AbstractAttribute *AA) {
248     return (AA->getIdAddr() == &ID);
249   }
250 
251   /// Unique ID (due to the unique address)
252   static const char ID;
253 };
254 const char AAUniformWorkGroupSize::ID = 0;
255 
256 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
257   AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
258       : AAUniformWorkGroupSize(IRP, A) {}
259 
260   void initialize(Attributor &A) override {
261     Function *F = getAssociatedFunction();
262     CallingConv::ID CC = F->getCallingConv();
263 
264     if (CC != CallingConv::AMDGPU_KERNEL)
265       return;
266 
267     bool InitialValue = false;
268     if (F->hasFnAttribute("uniform-work-group-size"))
269       InitialValue = F->getFnAttribute("uniform-work-group-size")
270                          .getValueAsString()
271                          .equals("true");
272 
273     if (InitialValue)
274       indicateOptimisticFixpoint();
275     else
276       indicatePessimisticFixpoint();
277   }
278 
279   ChangeStatus updateImpl(Attributor &A) override {
280     ChangeStatus Change = ChangeStatus::UNCHANGED;
281 
282     auto CheckCallSite = [&](AbstractCallSite CS) {
283       Function *Caller = CS.getInstruction()->getFunction();
284       LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
285                         << "->" << getAssociatedFunction()->getName() << "\n");
286 
287       const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
288           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
289 
290       Change = Change | clampStateAndIndicateChange(this->getState(),
291                                                     CallerInfo.getState());
292 
293       return true;
294     };
295 
296     bool AllCallSitesKnown = true;
297     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
298       return indicatePessimisticFixpoint();
299 
300     return Change;
301   }
302 
303   ChangeStatus manifest(Attributor &A) override {
304     SmallVector<Attribute, 8> AttrList;
305     LLVMContext &Ctx = getAssociatedFunction()->getContext();
306 
307     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
308                                       getAssumed() ? "true" : "false"));
309     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
310                                               /* ForceReplace */ true);
311   }
312 
313   bool isValidState() const override {
314     // This state is always valid, even when the state is false.
315     return true;
316   }
317 
318   const std::string getAsStr() const override {
319     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
320   }
321 
322   /// See AbstractAttribute::trackStatistics()
323   void trackStatistics() const override {}
324 };
325 
326 AAUniformWorkGroupSize &
327 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
328                                           Attributor &A) {
329   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
330     return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
331   llvm_unreachable(
332       "AAUniformWorkGroupSize is only valid for function position");
333 }
334 
335 struct AAAMDAttributesFunction : public AAAMDAttributes {
336   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
337       : AAAMDAttributes(IRP, A) {}
338 
339   void initialize(Attributor &A) override {
340     Function *F = getAssociatedFunction();
341 
342     // If the function requires the implicit arg pointer due to sanitizers,
343     // assume it's needed even if explicitly marked as not requiring it.
344     const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F);
345     if (NeedsImplicit)
346       removeAssumedBits(IMPLICIT_ARG_PTR);
347 
348     for (auto Attr : ImplicitAttrs) {
349       if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR)
350         continue;
351 
352       if (F->hasFnAttribute(Attr.second))
353         addKnownBits(Attr.first);
354     }
355 
356     if (F->isDeclaration())
357       return;
358 
359     // Ignore functions with graphics calling conventions, these are currently
360     // not allowed to have kernel arguments.
361     if (AMDGPU::isGraphics(F->getCallingConv())) {
362       indicatePessimisticFixpoint();
363       return;
364     }
365   }
366 
367   ChangeStatus updateImpl(Attributor &A) override {
368     Function *F = getAssociatedFunction();
369     // The current assumed state used to determine a change.
370     auto OrigAssumed = getAssumed();
371 
372     // Check for Intrinsics and propagate attributes.
373     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
374         *this, this->getIRPosition(), DepClassTy::REQUIRED);
375     if (AAEdges.hasNonAsmUnknownCallee())
376       return indicatePessimisticFixpoint();
377 
378     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
379 
380     bool NeedsQueuePtr = false;
381 
382     for (Function *Callee : AAEdges.getOptimisticEdges()) {
383       Intrinsic::ID IID = Callee->getIntrinsicID();
384       if (IID == Intrinsic::not_intrinsic) {
385         const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
386           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
387         *this &= AAAMD;
388         continue;
389       }
390 
391       bool NonKernelOnly = false;
392       ImplicitArgumentMask AttrMask =
393           intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr);
394       if (AttrMask != NOT_IMPLICIT_INPUT) {
395         if ((IsNonEntryFunc || !NonKernelOnly))
396           removeAssumedBits(AttrMask);
397       }
398     }
399 
400     if (!NeedsQueuePtr) {
401       NeedsQueuePtr = checkForQueuePtr(A);
402     }
403 
404     if (NeedsQueuePtr) {
405       removeAssumedBits(QUEUE_PTR);
406     }
407 
408     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
409                                        : ChangeStatus::UNCHANGED;
410   }
411 
412   ChangeStatus manifest(Attributor &A) override {
413     SmallVector<Attribute, 8> AttrList;
414     LLVMContext &Ctx = getAssociatedFunction()->getContext();
415 
416     for (auto Attr : ImplicitAttrs) {
417       if (isKnown(Attr.first))
418         AttrList.push_back(Attribute::get(Ctx, Attr.second));
419     }
420 
421     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
422                                               /* ForceReplace */ true);
423   }
424 
425   const std::string getAsStr() const override {
426     std::string Str;
427     raw_string_ostream OS(Str);
428     OS << "AMDInfo[";
429     for (auto Attr : ImplicitAttrs)
430       OS << ' ' << Attr.second;
431     OS << " ]";
432     return OS.str();
433   }
434 
435   /// See AbstractAttribute::trackStatistics()
436   void trackStatistics() const override {}
437 
438 private:
439   bool checkForQueuePtr(Attributor &A) {
440     Function *F = getAssociatedFunction();
441     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
442 
443     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
444 
445     bool NeedsQueuePtr = false;
446 
447     auto CheckAddrSpaceCasts = [&](Instruction &I) {
448       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
449       if (castRequiresQueuePtr(SrcAS)) {
450         NeedsQueuePtr = true;
451         return false;
452       }
453       return true;
454     };
455 
456     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
457 
458     // `checkForAllInstructions` is much more cheaper than going through all
459     // instructions, try it first.
460 
461     // The queue pointer is not needed if aperture regs is present.
462     if (!HasApertureRegs) {
463       bool UsedAssumedInformation = false;
464       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
465                                 {Instruction::AddrSpaceCast},
466                                 UsedAssumedInformation);
467     }
468 
469     // If we found  that we need the queue pointer, nothing else to do.
470     if (NeedsQueuePtr)
471       return true;
472 
473     if (!IsNonEntryFunc && HasApertureRegs)
474       return false;
475 
476     for (BasicBlock &BB : *F) {
477       for (Instruction &I : BB) {
478         for (const Use &U : I.operands()) {
479           if (const auto *C = dyn_cast<Constant>(U)) {
480             if (InfoCache.needsQueuePtr(C, *F))
481               return true;
482           }
483         }
484       }
485     }
486 
487     return false;
488   }
489 };
490 
491 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
492                                                     Attributor &A) {
493   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
494     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
495   llvm_unreachable("AAAMDAttributes is only valid for function position");
496 }
497 
498 /// Propagate amdgpu-flat-work-group-size attribute.
499 struct AAAMDFlatWorkGroupSize
500     : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
501   using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
502   AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
503       : Base(IRP, 32) {}
504 
505   /// See AbstractAttribute::getState(...).
506   IntegerRangeState &getState() override { return *this; }
507   const IntegerRangeState &getState() const override { return *this; }
508 
509   void initialize(Attributor &A) override {
510     Function *F = getAssociatedFunction();
511     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
512     unsigned MinGroupSize, MaxGroupSize;
513     std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
514     intersectKnown(
515         ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
516 
517     if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
518       indicatePessimisticFixpoint();
519   }
520 
521   ChangeStatus updateImpl(Attributor &A) override {
522     ChangeStatus Change = ChangeStatus::UNCHANGED;
523 
524     auto CheckCallSite = [&](AbstractCallSite CS) {
525       Function *Caller = CS.getInstruction()->getFunction();
526       LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
527                         << "->" << getAssociatedFunction()->getName() << '\n');
528 
529       const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
530           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
531 
532       Change |=
533           clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
534 
535       return true;
536     };
537 
538     bool AllCallSitesKnown = true;
539     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
540       return indicatePessimisticFixpoint();
541 
542     return Change;
543   }
544 
545   ChangeStatus manifest(Attributor &A) override {
546     SmallVector<Attribute, 8> AttrList;
547     Function *F = getAssociatedFunction();
548     LLVMContext &Ctx = F->getContext();
549 
550     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
551     unsigned Min, Max;
552     std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
553 
554     // Don't add the attribute if it's the implied default.
555     if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
556       return ChangeStatus::UNCHANGED;
557 
558     SmallString<10> Buffer;
559     raw_svector_ostream OS(Buffer);
560     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
561 
562     AttrList.push_back(
563         Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
564     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
565                                               /* ForceReplace */ true);
566   }
567 
568   const std::string getAsStr() const override {
569     std::string Str;
570     raw_string_ostream OS(Str);
571     OS << "AMDFlatWorkGroupSize[";
572     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
573     OS << ']';
574     return OS.str();
575   }
576 
577   /// See AbstractAttribute::trackStatistics()
578   void trackStatistics() const override {}
579 
580   /// Create an abstract attribute view for the position \p IRP.
581   static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
582                                                    Attributor &A);
583 
584   /// See AbstractAttribute::getName()
585   const std::string getName() const override {
586     return "AAAMDFlatWorkGroupSize";
587   }
588 
589   /// See AbstractAttribute::getIdAddr()
590   const char *getIdAddr() const override { return &ID; }
591 
592   /// This function should return true if the type of the \p AA is
593   /// AAAMDFlatWorkGroupSize
594   static bool classof(const AbstractAttribute *AA) {
595     return (AA->getIdAddr() == &ID);
596   }
597 
598   /// Unique ID (due to the unique address)
599   static const char ID;
600 };
601 
602 const char AAAMDFlatWorkGroupSize::ID = 0;
603 
604 AAAMDFlatWorkGroupSize &
605 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
606                                           Attributor &A) {
607   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
608     return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
609   llvm_unreachable(
610       "AAAMDFlatWorkGroupSize is only valid for function position");
611 }
612 
613 class AMDGPUAttributor : public ModulePass {
614 public:
615   AMDGPUAttributor() : ModulePass(ID) {}
616 
617   /// doInitialization - Virtual method overridden by subclasses to do
618   /// any necessary initialization before any pass is run.
619   bool doInitialization(Module &) override {
620     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
621     if (!TPC)
622       report_fatal_error("TargetMachine is required");
623 
624     TM = &TPC->getTM<TargetMachine>();
625     return false;
626   }
627 
628   bool runOnModule(Module &M) override {
629     SetVector<Function *> Functions;
630     AnalysisGetter AG;
631     for (Function &F : M) {
632       if (!F.isIntrinsic())
633         Functions.insert(&F);
634     }
635 
636     CallGraphUpdater CGUpdater;
637     BumpPtrAllocator Allocator;
638     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
639     DenseSet<const char *> Allowed(
640         {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
641          &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
642 
643     Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
644 
645     for (Function &F : M) {
646       if (!F.isIntrinsic()) {
647         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
648         A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
649         if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
650           A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
651         }
652       }
653     }
654 
655     ChangeStatus Change = A.run();
656     return Change == ChangeStatus::CHANGED;
657   }
658 
659   StringRef getPassName() const override { return "AMDGPU Attributor"; }
660   TargetMachine *TM;
661   static char ID;
662 };
663 } // namespace
664 
665 char AMDGPUAttributor::ID = 0;
666 
667 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
668 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
669