1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "llvm/CodeGen/TargetPassConfig.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include "llvm/Transforms/IPO/Attributor.h"
20 
21 #define DEBUG_TYPE "amdgpu-attributor"
22 
23 using namespace llvm;
24 
25 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
26 
27 enum ImplicitArgumentPositions {
28   #include "AMDGPUAttributes.def"
29   LAST_ARG_POS
30 };
31 
32 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
33 
34 enum ImplicitArgumentMask {
35   NOT_IMPLICIT_INPUT = 0,
36   #include "AMDGPUAttributes.def"
37   ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
38 };
39 
40 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
41 static constexpr std::pair<ImplicitArgumentMask,
42                            StringLiteral> ImplicitAttrs[] = {
43  #include "AMDGPUAttributes.def"
44 };
45 
46 // We do not need to note the x workitem or workgroup id because they are always
47 // initialized.
48 //
49 // TODO: We should not add the attributes if the known compile time workgroup
50 // size is 1 for y/z.
51 static ImplicitArgumentMask
52 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
53   switch (ID) {
54   case Intrinsic::amdgcn_workitem_id_x:
55     NonKernelOnly = true;
56     return WORKITEM_ID_X;
57   case Intrinsic::amdgcn_workgroup_id_x:
58     NonKernelOnly = true;
59     return WORKGROUP_ID_X;
60   case Intrinsic::amdgcn_workitem_id_y:
61   case Intrinsic::r600_read_tidig_y:
62     return WORKITEM_ID_Y;
63   case Intrinsic::amdgcn_workitem_id_z:
64   case Intrinsic::r600_read_tidig_z:
65     return WORKITEM_ID_Z;
66   case Intrinsic::amdgcn_workgroup_id_y:
67   case Intrinsic::r600_read_tgid_y:
68     return WORKGROUP_ID_Y;
69   case Intrinsic::amdgcn_workgroup_id_z:
70   case Intrinsic::r600_read_tgid_z:
71     return WORKGROUP_ID_Z;
72   case Intrinsic::amdgcn_dispatch_ptr:
73     return DISPATCH_PTR;
74   case Intrinsic::amdgcn_dispatch_id:
75     return DISPATCH_ID;
76   case Intrinsic::amdgcn_implicitarg_ptr:
77     return IMPLICIT_ARG_PTR;
78   case Intrinsic::amdgcn_queue_ptr:
79   case Intrinsic::amdgcn_is_shared:
80   case Intrinsic::amdgcn_is_private:
81     // TODO: Does not require the queue pointer on gfx9+
82   case Intrinsic::trap:
83   case Intrinsic::debugtrap:
84     IsQueuePtr = true;
85     return QUEUE_PTR;
86   default:
87     return NOT_IMPLICIT_INPUT;
88   }
89 }
90 
91 static bool castRequiresQueuePtr(unsigned SrcAS) {
92   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
93 }
94 
95 static bool isDSAddress(const Constant *C) {
96   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
97   if (!GV)
98     return false;
99   unsigned AS = GV->getAddressSpace();
100   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
101 }
102 
103 /// Returns true if the function requires the implicit argument be passed
104 /// regardless of the function contents.
105 static bool funcRequiresImplicitArgPtr(const Function &F) {
106   // Sanitizers require the hostcall buffer passed in the implicit arguments.
107   return F.hasFnAttribute(Attribute::SanitizeAddress) ||
108          F.hasFnAttribute(Attribute::SanitizeThread) ||
109          F.hasFnAttribute(Attribute::SanitizeMemory) ||
110          F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
111          F.hasFnAttribute(Attribute::SanitizeMemTag);
112 }
113 
114 namespace {
115 class AMDGPUInformationCache : public InformationCache {
116 public:
117   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
118                          BumpPtrAllocator &Allocator,
119                          SetVector<Function *> *CGSCC, TargetMachine &TM)
120       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
121   TargetMachine &TM;
122 
123   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
124 
125   /// Check if the subtarget has aperture regs.
126   bool hasApertureRegs(Function &F) {
127     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
128     return ST.hasApertureRegs();
129   }
130 
131   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
132     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
133     return ST.getFlatWorkGroupSizes(F);
134   }
135 
136   std::pair<unsigned, unsigned>
137   getMaximumFlatWorkGroupRange(const Function &F) {
138     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
139     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
140   }
141 
142 private:
143   /// Check if the ConstantExpr \p CE requires the queue pointer.
144   static bool visitConstExpr(const ConstantExpr *CE) {
145     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
146       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
147       return castRequiresQueuePtr(SrcAS);
148     }
149     return false;
150   }
151 
152   /// Get the constant access bitmap for \p C.
153   uint8_t getConstantAccess(const Constant *C) {
154     auto It = ConstantStatus.find(C);
155     if (It != ConstantStatus.end())
156       return It->second;
157 
158     uint8_t Result = 0;
159     if (isDSAddress(C))
160       Result = DS_GLOBAL;
161 
162     if (const auto *CE = dyn_cast<ConstantExpr>(C))
163       if (visitConstExpr(CE))
164         Result |= ADDR_SPACE_CAST;
165 
166     for (const Use &U : C->operands()) {
167       const auto *OpC = dyn_cast<Constant>(U);
168       if (!OpC)
169         continue;
170 
171       Result |= getConstantAccess(OpC);
172     }
173     return Result;
174   }
175 
176 public:
177   /// Returns true if \p Fn needs the queue pointer because of \p C.
178   bool needsQueuePtr(const Constant *C, Function &Fn) {
179     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
180     bool HasAperture = hasApertureRegs(Fn);
181 
182     // No need to explore the constants.
183     if (!IsNonEntryFunc && HasAperture)
184       return false;
185 
186     uint8_t Access = getConstantAccess(C);
187 
188     // We need to trap on DS globals in non-entry functions.
189     if (IsNonEntryFunc && (Access & DS_GLOBAL))
190       return true;
191 
192     return !HasAperture && (Access & ADDR_SPACE_CAST);
193   }
194 
195 private:
196   /// Used to determine if the Constant needs the queue pointer.
197   DenseMap<const Constant *, uint8_t> ConstantStatus;
198 };
199 
200 struct AAAMDAttributes : public StateWrapper<
201   BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
202   using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
203                             AbstractAttribute>;
204 
205   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
206 
207   /// Create an abstract attribute view for the position \p IRP.
208   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
209                                             Attributor &A);
210 
211   /// See AbstractAttribute::getName().
212   const std::string getName() const override { return "AAAMDAttributes"; }
213 
214   /// See AbstractAttribute::getIdAddr().
215   const char *getIdAddr() const override { return &ID; }
216 
217   /// This function should return true if the type of the \p AA is
218   /// AAAMDAttributes.
219   static bool classof(const AbstractAttribute *AA) {
220     return (AA->getIdAddr() == &ID);
221   }
222 
223   /// Unique ID (due to the unique address)
224   static const char ID;
225 };
226 const char AAAMDAttributes::ID = 0;
227 
228 struct AAUniformWorkGroupSize
229     : public StateWrapper<BooleanState, AbstractAttribute> {
230   using Base = StateWrapper<BooleanState, AbstractAttribute>;
231   AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
232 
233   /// Create an abstract attribute view for the position \p IRP.
234   static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
235                                                    Attributor &A);
236 
237   /// See AbstractAttribute::getName().
238   const std::string getName() const override {
239     return "AAUniformWorkGroupSize";
240   }
241 
242   /// See AbstractAttribute::getIdAddr().
243   const char *getIdAddr() const override { return &ID; }
244 
245   /// This function should return true if the type of the \p AA is
246   /// AAAMDAttributes.
247   static bool classof(const AbstractAttribute *AA) {
248     return (AA->getIdAddr() == &ID);
249   }
250 
251   /// Unique ID (due to the unique address)
252   static const char ID;
253 };
254 const char AAUniformWorkGroupSize::ID = 0;
255 
256 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
257   AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
258       : AAUniformWorkGroupSize(IRP, A) {}
259 
260   void initialize(Attributor &A) override {
261     Function *F = getAssociatedFunction();
262     CallingConv::ID CC = F->getCallingConv();
263 
264     if (CC != CallingConv::AMDGPU_KERNEL)
265       return;
266 
267     bool InitialValue = false;
268     if (F->hasFnAttribute("uniform-work-group-size"))
269       InitialValue = F->getFnAttribute("uniform-work-group-size")
270                          .getValueAsString()
271                          .equals("true");
272 
273     if (InitialValue)
274       indicateOptimisticFixpoint();
275     else
276       indicatePessimisticFixpoint();
277   }
278 
279   ChangeStatus updateImpl(Attributor &A) override {
280     ChangeStatus Change = ChangeStatus::UNCHANGED;
281 
282     auto CheckCallSite = [&](AbstractCallSite CS) {
283       Function *Caller = CS.getInstruction()->getFunction();
284       LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
285                         << "->" << getAssociatedFunction()->getName() << "\n");
286 
287       const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
288           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
289 
290       Change = Change | clampStateAndIndicateChange(this->getState(),
291                                                     CallerInfo.getState());
292 
293       return true;
294     };
295 
296     bool AllCallSitesKnown = true;
297     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
298       return indicatePessimisticFixpoint();
299 
300     return Change;
301   }
302 
303   ChangeStatus manifest(Attributor &A) override {
304     SmallVector<Attribute, 8> AttrList;
305     LLVMContext &Ctx = getAssociatedFunction()->getContext();
306 
307     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
308                                       getAssumed() ? "true" : "false"));
309     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
310                                               /* ForceReplace */ true);
311   }
312 
313   bool isValidState() const override {
314     // This state is always valid, even when the state is false.
315     return true;
316   }
317 
318   const std::string getAsStr() const override {
319     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
320   }
321 
322   /// See AbstractAttribute::trackStatistics()
323   void trackStatistics() const override {}
324 };
325 
326 AAUniformWorkGroupSize &
327 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
328                                           Attributor &A) {
329   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
330     return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
331   llvm_unreachable(
332       "AAUniformWorkGroupSize is only valid for function position");
333 }
334 
335 struct AAAMDAttributesFunction : public AAAMDAttributes {
336   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
337       : AAAMDAttributes(IRP, A) {}
338 
339   void initialize(Attributor &A) override {
340     Function *F = getAssociatedFunction();
341 
342     // If the function requires the implicit arg pointer due to sanitizers,
343     // assume it's needed even if explicitly marked as not requiring it.
344     const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F);
345     if (NeedsImplicit)
346       removeAssumedBits(IMPLICIT_ARG_PTR);
347 
348     for (auto Attr : ImplicitAttrs) {
349       if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR)
350         continue;
351 
352       if (F->hasFnAttribute(Attr.second))
353         addKnownBits(Attr.first);
354     }
355 
356     if (F->isDeclaration())
357       return;
358 
359     // Ignore functions with graphics calling conventions, these are currently
360     // not allowed to have kernel arguments.
361     if (AMDGPU::isGraphics(F->getCallingConv())) {
362       indicatePessimisticFixpoint();
363       return;
364     }
365   }
366 
367   ChangeStatus updateImpl(Attributor &A) override {
368     Function *F = getAssociatedFunction();
369     // The current assumed state used to determine a change.
370     auto OrigAssumed = getAssumed();
371 
372     // Check for Intrinsics and propagate attributes.
373     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
374         *this, this->getIRPosition(), DepClassTy::REQUIRED);
375     if (AAEdges.hasNonAsmUnknownCallee())
376       return indicatePessimisticFixpoint();
377 
378     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
379 
380     bool NeedsQueuePtr = false;
381 
382     for (Function *Callee : AAEdges.getOptimisticEdges()) {
383       Intrinsic::ID IID = Callee->getIntrinsicID();
384       if (IID == Intrinsic::not_intrinsic) {
385         const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
386           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
387         *this &= AAAMD;
388         continue;
389       }
390 
391       bool NonKernelOnly = false;
392       ImplicitArgumentMask AttrMask =
393           intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr);
394       if (AttrMask != NOT_IMPLICIT_INPUT) {
395         if ((IsNonEntryFunc || !NonKernelOnly))
396           removeAssumedBits(AttrMask);
397       }
398     }
399 
400     NeedsQueuePtr |= checkForQueuePtr(A);
401     if (NeedsQueuePtr) {
402       removeAssumedBits(QUEUE_PTR);
403     }
404 
405     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
406                                        : ChangeStatus::UNCHANGED;
407   }
408 
409   ChangeStatus manifest(Attributor &A) override {
410     SmallVector<Attribute, 8> AttrList;
411     LLVMContext &Ctx = getAssociatedFunction()->getContext();
412 
413     for (auto Attr : ImplicitAttrs) {
414       if (isKnown(Attr.first))
415         AttrList.push_back(Attribute::get(Ctx, Attr.second));
416     }
417 
418     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
419                                               /* ForceReplace */ true);
420   }
421 
422   const std::string getAsStr() const override {
423     std::string Str;
424     raw_string_ostream OS(Str);
425     OS << "AMDInfo[";
426     for (auto Attr : ImplicitAttrs)
427       OS << ' ' << Attr.second;
428     OS << " ]";
429     return OS.str();
430   }
431 
432   /// See AbstractAttribute::trackStatistics()
433   void trackStatistics() const override {}
434 
435 private:
436   bool checkForQueuePtr(Attributor &A) {
437     Function *F = getAssociatedFunction();
438     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
439 
440     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
441 
442     bool NeedsQueuePtr = false;
443 
444     auto CheckAddrSpaceCasts = [&](Instruction &I) {
445       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
446       if (castRequiresQueuePtr(SrcAS)) {
447         NeedsQueuePtr = true;
448         return false;
449       }
450       return true;
451     };
452 
453     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
454 
455     // `checkForAllInstructions` is much more cheaper than going through all
456     // instructions, try it first.
457 
458     // The queue pointer is not needed if aperture regs is present.
459     if (!HasApertureRegs) {
460       bool UsedAssumedInformation = false;
461       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
462                                 {Instruction::AddrSpaceCast},
463                                 UsedAssumedInformation);
464     }
465 
466     // If we found  that we need the queue pointer, nothing else to do.
467     if (NeedsQueuePtr)
468       return true;
469 
470     if (!IsNonEntryFunc && HasApertureRegs)
471       return false;
472 
473     for (BasicBlock &BB : *F) {
474       for (Instruction &I : BB) {
475         for (const Use &U : I.operands()) {
476           if (const auto *C = dyn_cast<Constant>(U)) {
477             if (InfoCache.needsQueuePtr(C, *F))
478               return true;
479           }
480         }
481       }
482     }
483 
484     return false;
485   }
486 };
487 
488 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
489                                                     Attributor &A) {
490   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
491     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
492   llvm_unreachable("AAAMDAttributes is only valid for function position");
493 }
494 
495 /// Propagate amdgpu-flat-work-group-size attribute.
496 struct AAAMDFlatWorkGroupSize
497     : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
498   using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
499   AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
500       : Base(IRP, 32) {}
501 
502   /// See AbstractAttribute::getState(...).
503   IntegerRangeState &getState() override { return *this; }
504   const IntegerRangeState &getState() const override { return *this; }
505 
506   void initialize(Attributor &A) override {
507     Function *F = getAssociatedFunction();
508     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
509     unsigned MinGroupSize, MaxGroupSize;
510     std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
511     intersectKnown(
512         ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
513 
514     if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
515       indicatePessimisticFixpoint();
516   }
517 
518   ChangeStatus updateImpl(Attributor &A) override {
519     ChangeStatus Change = ChangeStatus::UNCHANGED;
520 
521     auto CheckCallSite = [&](AbstractCallSite CS) {
522       Function *Caller = CS.getInstruction()->getFunction();
523       LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
524                         << "->" << getAssociatedFunction()->getName() << '\n');
525 
526       const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
527           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
528 
529       Change |=
530           clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
531 
532       return true;
533     };
534 
535     bool AllCallSitesKnown = true;
536     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
537       return indicatePessimisticFixpoint();
538 
539     return Change;
540   }
541 
542   ChangeStatus manifest(Attributor &A) override {
543     SmallVector<Attribute, 8> AttrList;
544     Function *F = getAssociatedFunction();
545     LLVMContext &Ctx = F->getContext();
546 
547     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
548     unsigned Min, Max;
549     std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
550 
551     // Don't add the attribute if it's the implied default.
552     if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
553       return ChangeStatus::UNCHANGED;
554 
555     SmallString<10> Buffer;
556     raw_svector_ostream OS(Buffer);
557     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
558 
559     AttrList.push_back(
560         Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
561     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
562                                               /* ForceReplace */ true);
563   }
564 
565   const std::string getAsStr() const override {
566     std::string Str;
567     raw_string_ostream OS(Str);
568     OS << "AMDFlatWorkGroupSize[";
569     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
570     OS << ']';
571     return OS.str();
572   }
573 
574   /// See AbstractAttribute::trackStatistics()
575   void trackStatistics() const override {}
576 
577   /// Create an abstract attribute view for the position \p IRP.
578   static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
579                                                    Attributor &A);
580 
581   /// See AbstractAttribute::getName()
582   const std::string getName() const override {
583     return "AAAMDFlatWorkGroupSize";
584   }
585 
586   /// See AbstractAttribute::getIdAddr()
587   const char *getIdAddr() const override { return &ID; }
588 
589   /// This function should return true if the type of the \p AA is
590   /// AAAMDFlatWorkGroupSize
591   static bool classof(const AbstractAttribute *AA) {
592     return (AA->getIdAddr() == &ID);
593   }
594 
595   /// Unique ID (due to the unique address)
596   static const char ID;
597 };
598 
599 const char AAAMDFlatWorkGroupSize::ID = 0;
600 
601 AAAMDFlatWorkGroupSize &
602 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
603                                           Attributor &A) {
604   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
605     return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
606   llvm_unreachable(
607       "AAAMDFlatWorkGroupSize is only valid for function position");
608 }
609 
610 class AMDGPUAttributor : public ModulePass {
611 public:
612   AMDGPUAttributor() : ModulePass(ID) {}
613 
614   /// doInitialization - Virtual method overridden by subclasses to do
615   /// any necessary initialization before any pass is run.
616   bool doInitialization(Module &) override {
617     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
618     if (!TPC)
619       report_fatal_error("TargetMachine is required");
620 
621     TM = &TPC->getTM<TargetMachine>();
622     return false;
623   }
624 
625   bool runOnModule(Module &M) override {
626     SetVector<Function *> Functions;
627     AnalysisGetter AG;
628     for (Function &F : M) {
629       if (!F.isIntrinsic())
630         Functions.insert(&F);
631     }
632 
633     CallGraphUpdater CGUpdater;
634     BumpPtrAllocator Allocator;
635     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
636     DenseSet<const char *> Allowed(
637         {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
638          &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
639 
640     Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
641 
642     for (Function &F : M) {
643       if (!F.isIntrinsic()) {
644         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
645         A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
646         if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
647           A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
648         }
649       }
650     }
651 
652     ChangeStatus Change = A.run();
653     return Change == ChangeStatus::CHANGED;
654   }
655 
656   StringRef getPassName() const override { return "AMDGPU Attributor"; }
657   TargetMachine *TM;
658   static char ID;
659 };
660 } // namespace
661 
662 char AMDGPUAttributor::ID = 0;
663 
664 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
665 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
666