1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "Utils/AMDGPUBaseInfo.h"
16 #include "llvm/CodeGen/TargetPassConfig.h"
17 #include "llvm/IR/IntrinsicsAMDGPU.h"
18 #include "llvm/IR/IntrinsicsR600.h"
19 #include "llvm/Target/TargetMachine.h"
20 #include "llvm/Transforms/IPO/Attributor.h"
21 
22 #define DEBUG_TYPE "amdgpu-attributor"
23 
24 using namespace llvm;
25 
26 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
27 
28 enum ImplicitArgumentPositions {
29   #include "AMDGPUAttributes.def"
30   LAST_ARG_POS
31 };
32 
33 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
34 
35 enum ImplicitArgumentMask {
36   NOT_IMPLICIT_INPUT = 0,
37   #include "AMDGPUAttributes.def"
38   ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
39 };
40 
41 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
42 static constexpr std::pair<ImplicitArgumentMask,
43                            StringLiteral> ImplicitAttrs[] = {
44  #include "AMDGPUAttributes.def"
45 };
46 
47 // We do not need to note the x workitem or workgroup id because they are always
48 // initialized.
49 //
50 // TODO: We should not add the attributes if the known compile time workgroup
51 // size is 1 for y/z.
52 static ImplicitArgumentMask
53 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
54   switch (ID) {
55   case Intrinsic::amdgcn_workitem_id_x:
56     NonKernelOnly = true;
57     return WORKITEM_ID_X;
58   case Intrinsic::amdgcn_workgroup_id_x:
59     NonKernelOnly = true;
60     return WORKGROUP_ID_X;
61   case Intrinsic::amdgcn_workitem_id_y:
62   case Intrinsic::r600_read_tidig_y:
63     return WORKITEM_ID_Y;
64   case Intrinsic::amdgcn_workitem_id_z:
65   case Intrinsic::r600_read_tidig_z:
66     return WORKITEM_ID_Z;
67   case Intrinsic::amdgcn_workgroup_id_y:
68   case Intrinsic::r600_read_tgid_y:
69     return WORKGROUP_ID_Y;
70   case Intrinsic::amdgcn_workgroup_id_z:
71   case Intrinsic::r600_read_tgid_z:
72     return WORKGROUP_ID_Z;
73   case Intrinsic::amdgcn_dispatch_ptr:
74     return DISPATCH_PTR;
75   case Intrinsic::amdgcn_dispatch_id:
76     return DISPATCH_ID;
77   case Intrinsic::amdgcn_implicitarg_ptr:
78     return IMPLICIT_ARG_PTR;
79   case Intrinsic::amdgcn_queue_ptr:
80   case Intrinsic::amdgcn_is_shared:
81   case Intrinsic::amdgcn_is_private:
82     // TODO: Does not require the queue pointer on gfx9+
83   case Intrinsic::trap:
84   case Intrinsic::debugtrap:
85     IsQueuePtr = true;
86     return QUEUE_PTR;
87   default:
88     return NOT_IMPLICIT_INPUT;
89   }
90 }
91 
92 static bool castRequiresQueuePtr(unsigned SrcAS) {
93   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
94 }
95 
96 static bool isDSAddress(const Constant *C) {
97   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
98   if (!GV)
99     return false;
100   unsigned AS = GV->getAddressSpace();
101   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
102 }
103 
104 /// Returns true if the function requires the implicit argument be passed
105 /// regardless of the function contents.
106 static bool funcRequiresHostcallPtr(const Function &F) {
107   // Sanitizers require the hostcall buffer passed in the implicit arguments.
108   return F.hasFnAttribute(Attribute::SanitizeAddress) ||
109          F.hasFnAttribute(Attribute::SanitizeThread) ||
110          F.hasFnAttribute(Attribute::SanitizeMemory) ||
111          F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
112          F.hasFnAttribute(Attribute::SanitizeMemTag);
113 }
114 
115 namespace {
116 class AMDGPUInformationCache : public InformationCache {
117 public:
118   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
119                          BumpPtrAllocator &Allocator,
120                          SetVector<Function *> *CGSCC, TargetMachine &TM)
121       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
122   TargetMachine &TM;
123 
124   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
125 
126   /// Check if the subtarget has aperture regs.
127   bool hasApertureRegs(Function &F) {
128     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
129     return ST.hasApertureRegs();
130   }
131 
132   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
133     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
134     return ST.getFlatWorkGroupSizes(F);
135   }
136 
137   std::pair<unsigned, unsigned>
138   getMaximumFlatWorkGroupRange(const Function &F) {
139     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
140     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
141   }
142 
143 private:
144   /// Check if the ConstantExpr \p CE requires the queue pointer.
145   static bool visitConstExpr(const ConstantExpr *CE) {
146     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
147       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
148       return castRequiresQueuePtr(SrcAS);
149     }
150     return false;
151   }
152 
153   /// Get the constant access bitmap for \p C.
154   uint8_t getConstantAccess(const Constant *C) {
155     auto It = ConstantStatus.find(C);
156     if (It != ConstantStatus.end())
157       return It->second;
158 
159     uint8_t Result = 0;
160     if (isDSAddress(C))
161       Result = DS_GLOBAL;
162 
163     if (const auto *CE = dyn_cast<ConstantExpr>(C))
164       if (visitConstExpr(CE))
165         Result |= ADDR_SPACE_CAST;
166 
167     for (const Use &U : C->operands()) {
168       const auto *OpC = dyn_cast<Constant>(U);
169       if (!OpC)
170         continue;
171 
172       Result |= getConstantAccess(OpC);
173     }
174     return Result;
175   }
176 
177 public:
178   /// Returns true if \p Fn needs the queue pointer because of \p C.
179   bool needsQueuePtr(const Constant *C, Function &Fn) {
180     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
181     bool HasAperture = hasApertureRegs(Fn);
182 
183     // No need to explore the constants.
184     if (!IsNonEntryFunc && HasAperture)
185       return false;
186 
187     uint8_t Access = getConstantAccess(C);
188 
189     // We need to trap on DS globals in non-entry functions.
190     if (IsNonEntryFunc && (Access & DS_GLOBAL))
191       return true;
192 
193     return !HasAperture && (Access & ADDR_SPACE_CAST);
194   }
195 
196 private:
197   /// Used to determine if the Constant needs the queue pointer.
198   DenseMap<const Constant *, uint8_t> ConstantStatus;
199 };
200 
201 struct AAAMDAttributes : public StateWrapper<
202   BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
203   using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
204                             AbstractAttribute>;
205 
206   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
207 
208   /// Create an abstract attribute view for the position \p IRP.
209   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
210                                             Attributor &A);
211 
212   /// See AbstractAttribute::getName().
213   const std::string getName() const override { return "AAAMDAttributes"; }
214 
215   /// See AbstractAttribute::getIdAddr().
216   const char *getIdAddr() const override { return &ID; }
217 
218   /// This function should return true if the type of the \p AA is
219   /// AAAMDAttributes.
220   static bool classof(const AbstractAttribute *AA) {
221     return (AA->getIdAddr() == &ID);
222   }
223 
224   /// Unique ID (due to the unique address)
225   static const char ID;
226 };
227 const char AAAMDAttributes::ID = 0;
228 
229 struct AAUniformWorkGroupSize
230     : public StateWrapper<BooleanState, AbstractAttribute> {
231   using Base = StateWrapper<BooleanState, AbstractAttribute>;
232   AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
233 
234   /// Create an abstract attribute view for the position \p IRP.
235   static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
236                                                    Attributor &A);
237 
238   /// See AbstractAttribute::getName().
239   const std::string getName() const override {
240     return "AAUniformWorkGroupSize";
241   }
242 
243   /// See AbstractAttribute::getIdAddr().
244   const char *getIdAddr() const override { return &ID; }
245 
246   /// This function should return true if the type of the \p AA is
247   /// AAAMDAttributes.
248   static bool classof(const AbstractAttribute *AA) {
249     return (AA->getIdAddr() == &ID);
250   }
251 
252   /// Unique ID (due to the unique address)
253   static const char ID;
254 };
255 const char AAUniformWorkGroupSize::ID = 0;
256 
257 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
258   AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
259       : AAUniformWorkGroupSize(IRP, A) {}
260 
261   void initialize(Attributor &A) override {
262     Function *F = getAssociatedFunction();
263     CallingConv::ID CC = F->getCallingConv();
264 
265     if (CC != CallingConv::AMDGPU_KERNEL)
266       return;
267 
268     bool InitialValue = false;
269     if (F->hasFnAttribute("uniform-work-group-size"))
270       InitialValue = F->getFnAttribute("uniform-work-group-size")
271                          .getValueAsString()
272                          .equals("true");
273 
274     if (InitialValue)
275       indicateOptimisticFixpoint();
276     else
277       indicatePessimisticFixpoint();
278   }
279 
280   ChangeStatus updateImpl(Attributor &A) override {
281     ChangeStatus Change = ChangeStatus::UNCHANGED;
282 
283     auto CheckCallSite = [&](AbstractCallSite CS) {
284       Function *Caller = CS.getInstruction()->getFunction();
285       LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
286                         << "->" << getAssociatedFunction()->getName() << "\n");
287 
288       const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
289           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
290 
291       Change = Change | clampStateAndIndicateChange(this->getState(),
292                                                     CallerInfo.getState());
293 
294       return true;
295     };
296 
297     bool AllCallSitesKnown = true;
298     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
299       return indicatePessimisticFixpoint();
300 
301     return Change;
302   }
303 
304   ChangeStatus manifest(Attributor &A) override {
305     SmallVector<Attribute, 8> AttrList;
306     LLVMContext &Ctx = getAssociatedFunction()->getContext();
307 
308     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
309                                       getAssumed() ? "true" : "false"));
310     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
311                                               /* ForceReplace */ true);
312   }
313 
314   bool isValidState() const override {
315     // This state is always valid, even when the state is false.
316     return true;
317   }
318 
319   const std::string getAsStr() const override {
320     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
321   }
322 
323   /// See AbstractAttribute::trackStatistics()
324   void trackStatistics() const override {}
325 };
326 
327 AAUniformWorkGroupSize &
328 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
329                                           Attributor &A) {
330   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
331     return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
332   llvm_unreachable(
333       "AAUniformWorkGroupSize is only valid for function position");
334 }
335 
336 struct AAAMDAttributesFunction : public AAAMDAttributes {
337   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
338       : AAAMDAttributes(IRP, A) {}
339 
340   void initialize(Attributor &A) override {
341     Function *F = getAssociatedFunction();
342 
343     // If the function requires the implicit arg pointer due to sanitizers,
344     // assume it's needed even if explicitly marked as not requiring it.
345     const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
346     if (NeedsHostcall) {
347       removeAssumedBits(IMPLICIT_ARG_PTR);
348       removeAssumedBits(HOSTCALL_PTR);
349     }
350 
351     for (auto Attr : ImplicitAttrs) {
352       if (NeedsHostcall &&
353           (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
354         continue;
355 
356       if (F->hasFnAttribute(Attr.second))
357         addKnownBits(Attr.first);
358     }
359 
360     if (F->isDeclaration())
361       return;
362 
363     // Ignore functions with graphics calling conventions, these are currently
364     // not allowed to have kernel arguments.
365     if (AMDGPU::isGraphics(F->getCallingConv())) {
366       indicatePessimisticFixpoint();
367       return;
368     }
369   }
370 
371   ChangeStatus updateImpl(Attributor &A) override {
372     Function *F = getAssociatedFunction();
373     // The current assumed state used to determine a change.
374     auto OrigAssumed = getAssumed();
375 
376     // Check for Intrinsics and propagate attributes.
377     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
378         *this, this->getIRPosition(), DepClassTy::REQUIRED);
379     if (AAEdges.hasNonAsmUnknownCallee())
380       return indicatePessimisticFixpoint();
381 
382     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
383 
384     bool NeedsQueuePtr = false;
385 
386     for (Function *Callee : AAEdges.getOptimisticEdges()) {
387       Intrinsic::ID IID = Callee->getIntrinsicID();
388       if (IID == Intrinsic::not_intrinsic) {
389         const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
390           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
391         *this &= AAAMD;
392         continue;
393       }
394 
395       bool NonKernelOnly = false;
396       ImplicitArgumentMask AttrMask =
397           intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr);
398       if (AttrMask != NOT_IMPLICIT_INPUT) {
399         if ((IsNonEntryFunc || !NonKernelOnly))
400           removeAssumedBits(AttrMask);
401       }
402     }
403 
404     if (!NeedsQueuePtr) {
405       NeedsQueuePtr = checkForQueuePtr(A);
406     }
407 
408     if (NeedsQueuePtr) {
409       removeAssumedBits(QUEUE_PTR);
410     }
411 
412     if (funcRetrievesHostcallPtr(A)) {
413       assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
414       removeAssumedBits(HOSTCALL_PTR);
415     }
416 
417     if (funcRetrievesHeapPtr(A)) {
418       assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
419       removeAssumedBits(HEAP_PTR);
420     }
421 
422     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
423                                        : ChangeStatus::UNCHANGED;
424   }
425 
426   ChangeStatus manifest(Attributor &A) override {
427     SmallVector<Attribute, 8> AttrList;
428     LLVMContext &Ctx = getAssociatedFunction()->getContext();
429 
430     for (auto Attr : ImplicitAttrs) {
431       if (isKnown(Attr.first))
432         AttrList.push_back(Attribute::get(Ctx, Attr.second));
433     }
434 
435     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
436                                               /* ForceReplace */ true);
437   }
438 
439   const std::string getAsStr() const override {
440     std::string Str;
441     raw_string_ostream OS(Str);
442     OS << "AMDInfo[";
443     for (auto Attr : ImplicitAttrs)
444       OS << ' ' << Attr.second;
445     OS << " ]";
446     return OS.str();
447   }
448 
449   /// See AbstractAttribute::trackStatistics()
450   void trackStatistics() const override {}
451 
452 private:
453   bool checkForQueuePtr(Attributor &A) {
454     Function *F = getAssociatedFunction();
455     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
456 
457     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
458 
459     bool NeedsQueuePtr = false;
460 
461     auto CheckAddrSpaceCasts = [&](Instruction &I) {
462       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
463       if (castRequiresQueuePtr(SrcAS)) {
464         NeedsQueuePtr = true;
465         return false;
466       }
467       return true;
468     };
469 
470     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
471 
472     // `checkForAllInstructions` is much more cheaper than going through all
473     // instructions, try it first.
474 
475     // The queue pointer is not needed if aperture regs is present.
476     if (!HasApertureRegs) {
477       bool UsedAssumedInformation = false;
478       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
479                                 {Instruction::AddrSpaceCast},
480                                 UsedAssumedInformation);
481     }
482 
483     // If we found  that we need the queue pointer, nothing else to do.
484     if (NeedsQueuePtr)
485       return true;
486 
487     if (!IsNonEntryFunc && HasApertureRegs)
488       return false;
489 
490     for (BasicBlock &BB : *F) {
491       for (Instruction &I : BB) {
492         for (const Use &U : I.operands()) {
493           if (const auto *C = dyn_cast<Constant>(U)) {
494             if (InfoCache.needsQueuePtr(C, *F))
495               return true;
496           }
497         }
498       }
499     }
500 
501     return false;
502   }
503 
504   bool funcRetrievesHostcallPtr(Attributor &A) {
505     auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
506     AAPointerInfo::OffsetAndSize OAS(Pos, 8);
507     return funcRetrievesImplicitKernelArg(A, OAS);
508   }
509 
510   bool funcRetrievesHeapPtr(Attributor &A) {
511     if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
512       return false;
513     auto Pos = llvm::AMDGPU::getHeapPtrImplicitArgPosition();
514     AAPointerInfo::OffsetAndSize OAS(Pos, 8);
515     return funcRetrievesImplicitKernelArg(A, OAS);
516   }
517 
518   bool funcRetrievesImplicitKernelArg(Attributor &A,
519                                       AAPointerInfo::OffsetAndSize OAS) {
520     // Check if this is a call to the implicitarg_ptr builtin and it
521     // is used to retrieve the hostcall pointer. The implicit arg for
522     // hostcall is not used only if every use of the implicitarg_ptr
523     // is a load that clearly does not retrieve any byte of the
524     // hostcall pointer. We check this by tracing all the uses of the
525     // initial call to the implicitarg_ptr intrinsic.
526     auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
527       auto &Call = cast<CallBase>(I);
528       if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
529         return true;
530 
531       const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
532           *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
533 
534       return PointerInfoAA.forallInterferingAccesses(
535           OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
536             return Acc.getRemoteInst()->isDroppable();
537           });
538     };
539 
540     bool UsedAssumedInformation = false;
541     return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
542                                               UsedAssumedInformation);
543   }
544 };
545 
546 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
547                                                     Attributor &A) {
548   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
549     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
550   llvm_unreachable("AAAMDAttributes is only valid for function position");
551 }
552 
553 /// Propagate amdgpu-flat-work-group-size attribute.
554 struct AAAMDFlatWorkGroupSize
555     : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
556   using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
557   AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
558       : Base(IRP, 32) {}
559 
560   /// See AbstractAttribute::getState(...).
561   IntegerRangeState &getState() override { return *this; }
562   const IntegerRangeState &getState() const override { return *this; }
563 
564   void initialize(Attributor &A) override {
565     Function *F = getAssociatedFunction();
566     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
567     unsigned MinGroupSize, MaxGroupSize;
568     std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
569     intersectKnown(
570         ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
571 
572     if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
573       indicatePessimisticFixpoint();
574   }
575 
576   ChangeStatus updateImpl(Attributor &A) override {
577     ChangeStatus Change = ChangeStatus::UNCHANGED;
578 
579     auto CheckCallSite = [&](AbstractCallSite CS) {
580       Function *Caller = CS.getInstruction()->getFunction();
581       LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
582                         << "->" << getAssociatedFunction()->getName() << '\n');
583 
584       const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
585           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
586 
587       Change |=
588           clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
589 
590       return true;
591     };
592 
593     bool AllCallSitesKnown = true;
594     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
595       return indicatePessimisticFixpoint();
596 
597     return Change;
598   }
599 
600   ChangeStatus manifest(Attributor &A) override {
601     SmallVector<Attribute, 8> AttrList;
602     Function *F = getAssociatedFunction();
603     LLVMContext &Ctx = F->getContext();
604 
605     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
606     unsigned Min, Max;
607     std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
608 
609     // Don't add the attribute if it's the implied default.
610     if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
611       return ChangeStatus::UNCHANGED;
612 
613     SmallString<10> Buffer;
614     raw_svector_ostream OS(Buffer);
615     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
616 
617     AttrList.push_back(
618         Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
619     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
620                                               /* ForceReplace */ true);
621   }
622 
623   const std::string getAsStr() const override {
624     std::string Str;
625     raw_string_ostream OS(Str);
626     OS << "AMDFlatWorkGroupSize[";
627     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
628     OS << ']';
629     return OS.str();
630   }
631 
632   /// See AbstractAttribute::trackStatistics()
633   void trackStatistics() const override {}
634 
635   /// Create an abstract attribute view for the position \p IRP.
636   static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
637                                                    Attributor &A);
638 
639   /// See AbstractAttribute::getName()
640   const std::string getName() const override {
641     return "AAAMDFlatWorkGroupSize";
642   }
643 
644   /// See AbstractAttribute::getIdAddr()
645   const char *getIdAddr() const override { return &ID; }
646 
647   /// This function should return true if the type of the \p AA is
648   /// AAAMDFlatWorkGroupSize
649   static bool classof(const AbstractAttribute *AA) {
650     return (AA->getIdAddr() == &ID);
651   }
652 
653   /// Unique ID (due to the unique address)
654   static const char ID;
655 };
656 
657 const char AAAMDFlatWorkGroupSize::ID = 0;
658 
659 AAAMDFlatWorkGroupSize &
660 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
661                                           Attributor &A) {
662   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
663     return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
664   llvm_unreachable(
665       "AAAMDFlatWorkGroupSize is only valid for function position");
666 }
667 
668 class AMDGPUAttributor : public ModulePass {
669 public:
670   AMDGPUAttributor() : ModulePass(ID) {}
671 
672   /// doInitialization - Virtual method overridden by subclasses to do
673   /// any necessary initialization before any pass is run.
674   bool doInitialization(Module &) override {
675     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
676     if (!TPC)
677       report_fatal_error("TargetMachine is required");
678 
679     TM = &TPC->getTM<TargetMachine>();
680     return false;
681   }
682 
683   bool runOnModule(Module &M) override {
684     SetVector<Function *> Functions;
685     AnalysisGetter AG;
686     for (Function &F : M) {
687       if (!F.isIntrinsic())
688         Functions.insert(&F);
689     }
690 
691     CallGraphUpdater CGUpdater;
692     BumpPtrAllocator Allocator;
693     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
694     DenseSet<const char *> Allowed(
695         {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
696          &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID});
697 
698     Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
699 
700     for (Function &F : M) {
701       if (!F.isIntrinsic()) {
702         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
703         A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
704         if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
705           A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
706         }
707       }
708     }
709 
710     ChangeStatus Change = A.run();
711     return Change == ChangeStatus::CHANGED;
712   }
713 
714   StringRef getPassName() const override { return "AMDGPU Attributor"; }
715   TargetMachine *TM;
716   static char ID;
717 };
718 } // namespace
719 
720 char AMDGPUAttributor::ID = 0;
721 
722 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
723 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
724