1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "llvm/CodeGen/TargetPassConfig.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include "llvm/Transforms/IPO/Attributor.h"
20 
21 #define DEBUG_TYPE "amdgpu-attributor"
22 
23 using namespace llvm;
24 
25 static constexpr StringLiteral ImplicitAttrNames[] = {
26     // X ids unnecessarily propagated to kernels.
27     "amdgpu-work-item-id-x",  "amdgpu-work-item-id-y",
28     "amdgpu-work-item-id-z",  "amdgpu-work-group-id-x",
29     "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
30     "amdgpu-dispatch-ptr",    "amdgpu-dispatch-id",
31     "amdgpu-queue-ptr",       "amdgpu-implicitarg-ptr"};
32 
33 // We do not need to note the x workitem or workgroup id because they are always
34 // initialized.
35 //
36 // TODO: We should not add the attributes if the known compile time workgroup
37 // size is 1 for y/z.
38 static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
39                                      bool &IsQueuePtr) {
40   switch (ID) {
41   case Intrinsic::amdgcn_workitem_id_x:
42     NonKernelOnly = true;
43     return "amdgpu-work-item-id-x";
44   case Intrinsic::amdgcn_workgroup_id_x:
45     NonKernelOnly = true;
46     return "amdgpu-work-group-id-x";
47   case Intrinsic::amdgcn_workitem_id_y:
48   case Intrinsic::r600_read_tidig_y:
49     return "amdgpu-work-item-id-y";
50   case Intrinsic::amdgcn_workitem_id_z:
51   case Intrinsic::r600_read_tidig_z:
52     return "amdgpu-work-item-id-z";
53   case Intrinsic::amdgcn_workgroup_id_y:
54   case Intrinsic::r600_read_tgid_y:
55     return "amdgpu-work-group-id-y";
56   case Intrinsic::amdgcn_workgroup_id_z:
57   case Intrinsic::r600_read_tgid_z:
58     return "amdgpu-work-group-id-z";
59   case Intrinsic::amdgcn_dispatch_ptr:
60     return "amdgpu-dispatch-ptr";
61   case Intrinsic::amdgcn_dispatch_id:
62     return "amdgpu-dispatch-id";
63   case Intrinsic::amdgcn_implicitarg_ptr:
64     return "amdgpu-implicitarg-ptr";
65   case Intrinsic::amdgcn_queue_ptr:
66   case Intrinsic::amdgcn_is_shared:
67   case Intrinsic::amdgcn_is_private:
68     // TODO: Does not require queue ptr on gfx9+
69   case Intrinsic::trap:
70   case Intrinsic::debugtrap:
71     IsQueuePtr = true;
72     return "amdgpu-queue-ptr";
73   default:
74     return "";
75   }
76 }
77 
78 static bool castRequiresQueuePtr(unsigned SrcAS) {
79   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
80 }
81 
82 static bool isDSAddress(const Constant *C) {
83   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
84   if (!GV)
85     return false;
86   unsigned AS = GV->getAddressSpace();
87   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
88 }
89 
90 class AMDGPUInformationCache : public InformationCache {
91 public:
92   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
93                          BumpPtrAllocator &Allocator,
94                          SetVector<Function *> *CGSCC, TargetMachine &TM)
95       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
96   TargetMachine &TM;
97 
98   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
99 
100   /// Check if the subtarget has aperture regs.
101   bool hasApertureRegs(Function &F) {
102     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
103     return ST.hasApertureRegs();
104   }
105 
106 private:
107   /// Check if the ConstantExpr \p CE requires queue ptr attribute.
108   static bool visitConstExpr(const ConstantExpr *CE) {
109     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
110       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
111       return castRequiresQueuePtr(SrcAS);
112     }
113     return false;
114   }
115 
116   /// Get the constant access bitmap for \p C.
117   uint8_t getConstantAccess(const Constant *C) {
118     auto It = ConstantStatus.find(C);
119     if (It != ConstantStatus.end())
120       return It->second;
121 
122     uint8_t Result = 0;
123     if (isDSAddress(C))
124       Result = DS_GLOBAL;
125 
126     if (const auto *CE = dyn_cast<ConstantExpr>(C))
127       if (visitConstExpr(CE))
128         Result |= ADDR_SPACE_CAST;
129 
130     for (const Use &U : C->operands()) {
131       const auto *OpC = dyn_cast<Constant>(U);
132       if (!OpC)
133         continue;
134 
135       Result |= getConstantAccess(OpC);
136     }
137     return Result;
138   }
139 
140 public:
141   /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
142   bool needsQueuePtr(const Constant *C, Function &Fn) {
143     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
144     bool HasAperture = hasApertureRegs(Fn);
145 
146     // No need to explore the constants.
147     if (!IsNonEntryFunc && HasAperture)
148       return false;
149 
150     uint8_t Access = getConstantAccess(C);
151 
152     // We need to trap on DS globals in non-entry functions.
153     if (IsNonEntryFunc && (Access & DS_GLOBAL))
154       return true;
155 
156     return !HasAperture && (Access & ADDR_SPACE_CAST);
157   }
158 
159 private:
160   /// Used to determine if the Constant needs a queue ptr attribute.
161   DenseMap<const Constant *, uint8_t> ConstantStatus;
162 };
163 
164 struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
165   using Base = StateWrapper<BooleanState, AbstractAttribute>;
166   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
167 
168   /// Create an abstract attribute view for the position \p IRP.
169   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
170                                             Attributor &A);
171 
172   /// See AbstractAttribute::getName().
173   const std::string getName() const override { return "AAAMDAttributes"; }
174 
175   /// See AbstractAttribute::getIdAddr().
176   const char *getIdAddr() const override { return &ID; }
177 
178   /// This function should return true if the type of the \p AA is
179   /// AAAMDAttributes.
180   static bool classof(const AbstractAttribute *AA) {
181     return (AA->getIdAddr() == &ID);
182   }
183 
184   virtual const DenseSet<StringRef> &getAttributes() const = 0;
185 
186   /// Unique ID (due to the unique address)
187   static const char ID;
188 };
189 const char AAAMDAttributes::ID = 0;
190 
191 struct AAAMDWorkGroupSize
192     : public StateWrapper<BooleanState, AbstractAttribute> {
193   using Base = StateWrapper<BooleanState, AbstractAttribute>;
194   AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
195 
196   /// Create an abstract attribute view for the position \p IRP.
197   static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
198                                                Attributor &A);
199 
200   /// See AbstractAttribute::getName().
201   const std::string getName() const override { return "AAAMDWorkGroupSize"; }
202 
203   /// See AbstractAttribute::getIdAddr().
204   const char *getIdAddr() const override { return &ID; }
205 
206   /// This function should return true if the type of the \p AA is
207   /// AAAMDAttributes.
208   static bool classof(const AbstractAttribute *AA) {
209     return (AA->getIdAddr() == &ID);
210   }
211 
212   /// Unique ID (due to the unique address)
213   static const char ID;
214 };
215 const char AAAMDWorkGroupSize::ID = 0;
216 
217 struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
218   AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
219       : AAAMDWorkGroupSize(IRP, A) {}
220 
221   void initialize(Attributor &A) override {
222     Function *F = getAssociatedFunction();
223     CallingConv::ID CC = F->getCallingConv();
224 
225     if (CC != CallingConv::AMDGPU_KERNEL)
226       return;
227 
228     bool InitialValue = false;
229     if (F->hasFnAttribute("uniform-work-group-size"))
230       InitialValue = F->getFnAttribute("uniform-work-group-size")
231                          .getValueAsString()
232                          .equals("true");
233 
234     if (InitialValue)
235       indicateOptimisticFixpoint();
236     else
237       indicatePessimisticFixpoint();
238   }
239 
240   ChangeStatus updateImpl(Attributor &A) override {
241     ChangeStatus Change = ChangeStatus::UNCHANGED;
242 
243     auto CheckCallSite = [&](AbstractCallSite CS) {
244       Function *Caller = CS.getInstruction()->getFunction();
245       LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
246                         << "->" << getAssociatedFunction()->getName() << "\n");
247 
248       const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
249           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
250 
251       Change = Change | clampStateAndIndicateChange(this->getState(),
252                                                     CallerInfo.getState());
253 
254       return true;
255     };
256 
257     bool AllCallSitesKnown = true;
258     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
259       indicatePessimisticFixpoint();
260 
261     return Change;
262   }
263 
264   ChangeStatus manifest(Attributor &A) override {
265     SmallVector<Attribute, 8> AttrList;
266     LLVMContext &Ctx = getAssociatedFunction()->getContext();
267 
268     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
269                                       getAssumed() ? "true" : "false"));
270     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
271                                               /* ForceReplace */ true);
272   }
273 
274   bool isValidState() const override {
275     // This state is always valid, even when the state is false.
276     return true;
277   }
278 
279   const std::string getAsStr() const override {
280     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
281   }
282 
283   /// See AbstractAttribute::trackStatistics()
284   void trackStatistics() const override {}
285 };
286 
287 AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
288                                                           Attributor &A) {
289   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
290     return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
291   llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
292 }
293 
294 struct AAAMDAttributesFunction : public AAAMDAttributes {
295   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
296       : AAAMDAttributes(IRP, A) {}
297 
298   void initialize(Attributor &A) override {
299     Function *F = getAssociatedFunction();
300     CallingConv::ID CC = F->getCallingConv();
301     bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
302 
303     // Ignore functions with graphics calling conventions, these are currently
304     // not allowed to have kernel arguments.
305     if (AMDGPU::isGraphics(F->getCallingConv())) {
306       indicatePessimisticFixpoint();
307       return;
308     }
309 
310     for (StringRef Attr : ImplicitAttrNames) {
311       if (F->hasFnAttribute(Attr))
312         Attributes.insert(Attr);
313     }
314 
315     // TODO: We shouldn't need this in the future.
316     if (CallingConvSupportsAllImplicits &&
317         F->hasAddressTaken(nullptr, true, true, true)) {
318       for (StringRef AttrName : ImplicitAttrNames) {
319         Attributes.insert(AttrName);
320       }
321     }
322   }
323 
324   ChangeStatus updateImpl(Attributor &A) override {
325     Function *F = getAssociatedFunction();
326     ChangeStatus Change = ChangeStatus::UNCHANGED;
327     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
328     CallingConv::ID CC = F->getCallingConv();
329     bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
330     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
331 
332     auto AddAttribute = [&](StringRef AttrName) {
333       if (Attributes.insert(AttrName).second)
334         Change = ChangeStatus::CHANGED;
335     };
336 
337     // Check for Intrinsics and propagate attributes.
338     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
339         *this, this->getIRPosition(), DepClassTy::REQUIRED);
340 
341     // We have to assume that we can reach a function with these attributes.
342     // We do not consider inline assembly as a unknown callee.
343     if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) {
344       for (StringRef AttrName : ImplicitAttrNames) {
345         AddAttribute(AttrName);
346       }
347     }
348 
349     bool NeedsQueuePtr = false;
350     for (Function *Callee : AAEdges.getOptimisticEdges()) {
351       Intrinsic::ID IID = Callee->getIntrinsicID();
352       if (IID != Intrinsic::not_intrinsic) {
353         bool NonKernelOnly = false;
354         StringRef AttrName =
355             intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);
356 
357         if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly))
358           AddAttribute(AttrName);
359 
360         continue;
361       }
362 
363       const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
364           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
365       const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
366       // Propagate implicit attributes from called function.
367       for (StringRef AttrName : ImplicitAttrNames)
368         if (CalleeAttributes.count(AttrName))
369           AddAttribute(AttrName);
370     }
371 
372     // If we found that we need amdgpu-queue-ptr, nothing else to do.
373     if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) {
374       AddAttribute("amdgpu-queue-ptr");
375       return Change;
376     }
377 
378     auto CheckAddrSpaceCasts = [&](Instruction &I) {
379       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
380       if (castRequiresQueuePtr(SrcAS)) {
381         NeedsQueuePtr = true;
382         return false;
383       }
384       return true;
385     };
386 
387     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
388 
389     // `checkForAllInstructions` is much more cheaper than going through all
390     // instructions, try it first.
391 
392     // amdgpu-queue-ptr is not needed if aperture regs is present.
393     if (!HasApertureRegs) {
394       bool UsedAssumedInformation = false;
395       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
396                                 {Instruction::AddrSpaceCast},
397                                 UsedAssumedInformation);
398     }
399 
400     // If we found  that we need amdgpu-queue-ptr, nothing else to do.
401     if (NeedsQueuePtr) {
402       AddAttribute("amdgpu-queue-ptr");
403       return Change;
404     }
405 
406     if (!IsNonEntryFunc && HasApertureRegs)
407       return Change;
408 
409     for (BasicBlock &BB : *F) {
410       for (Instruction &I : BB) {
411         for (const Use &U : I.operands()) {
412           if (const auto *C = dyn_cast<Constant>(U)) {
413             if (InfoCache.needsQueuePtr(C, *F)) {
414               AddAttribute("amdgpu-queue-ptr");
415               return Change;
416             }
417           }
418         }
419       }
420     }
421 
422     return Change;
423   }
424 
425   ChangeStatus manifest(Attributor &A) override {
426     SmallVector<Attribute, 8> AttrList;
427     LLVMContext &Ctx = getAssociatedFunction()->getContext();
428 
429     for (StringRef AttrName : Attributes)
430       AttrList.push_back(Attribute::get(Ctx, AttrName));
431 
432     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
433                                               /* ForceReplace */ true);
434   }
435 
436   const std::string getAsStr() const override {
437     return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
438   }
439 
440   const DenseSet<StringRef> &getAttributes() const override {
441     return Attributes;
442   }
443 
444   /// See AbstractAttribute::trackStatistics()
445   void trackStatistics() const override {}
446 
447 private:
448   DenseSet<StringRef> Attributes;
449 };
450 
451 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
452                                                     Attributor &A) {
453   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
454     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
455   llvm_unreachable("AAAMDAttributes is only valid for function position");
456 }
457 
458 class AMDGPUAttributor : public ModulePass {
459 public:
460   AMDGPUAttributor() : ModulePass(ID) {}
461 
462   /// doInitialization - Virtual method overridden by subclasses to do
463   /// any necessary initialization before any pass is run.
464   bool doInitialization(Module &) override {
465     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
466     if (!TPC)
467       report_fatal_error("TargetMachine is required");
468 
469     TM = &TPC->getTM<TargetMachine>();
470     return false;
471   }
472 
473   bool runOnModule(Module &M) override {
474     SetVector<Function *> Functions;
475     AnalysisGetter AG;
476     for (Function &F : M) {
477       if (!F.isIntrinsic())
478         Functions.insert(&F);
479     }
480 
481     CallGraphUpdater CGUpdater;
482     BumpPtrAllocator Allocator;
483     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
484     Attributor A(Functions, InfoCache, CGUpdater);
485 
486     for (Function &F : M) {
487       if (!F.isIntrinsic()) {
488         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
489         A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
490       }
491     }
492 
493     ChangeStatus Change = A.run();
494     return Change == ChangeStatus::CHANGED;
495   }
496 
497   StringRef getPassName() const override { return "AMDGPU Attributor"; }
498   TargetMachine *TM;
499   static char ID;
500 };
501 
502 char AMDGPUAttributor::ID = 0;
503 
504 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
505 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
506