1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "Utils/AMDGPUBaseInfo.h"
16 #include "llvm/CodeGen/TargetPassConfig.h"
17 #include "llvm/IR/IntrinsicsAMDGPU.h"
18 #include "llvm/IR/IntrinsicsR600.h"
19 #include "llvm/Target/TargetMachine.h"
20 #include "llvm/Transforms/IPO/Attributor.h"
21
22 #define DEBUG_TYPE "amdgpu-attributor"
23
24 using namespace llvm;
25
26 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
27
28 enum ImplicitArgumentPositions {
29 #include "AMDGPUAttributes.def"
30 LAST_ARG_POS
31 };
32
33 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
34
35 enum ImplicitArgumentMask {
36 NOT_IMPLICIT_INPUT = 0,
37 #include "AMDGPUAttributes.def"
38 ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
39 };
40
41 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
42 static constexpr std::pair<ImplicitArgumentMask,
43 StringLiteral> ImplicitAttrs[] = {
44 #include "AMDGPUAttributes.def"
45 };
46
47 // We do not need to note the x workitem or workgroup id because they are always
48 // initialized.
49 //
50 // TODO: We should not add the attributes if the known compile time workgroup
51 // size is 1 for y/z.
52 static ImplicitArgumentMask
intrinsicToAttrMask(Intrinsic::ID ID,bool & NonKernelOnly,bool & NeedsImplicit,bool HasApertureRegs,bool SupportsGetDoorBellID)53 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
54 bool HasApertureRegs, bool SupportsGetDoorBellID) {
55 unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion();
56 switch (ID) {
57 case Intrinsic::amdgcn_workitem_id_x:
58 NonKernelOnly = true;
59 return WORKITEM_ID_X;
60 case Intrinsic::amdgcn_workgroup_id_x:
61 NonKernelOnly = true;
62 return WORKGROUP_ID_X;
63 case Intrinsic::amdgcn_workitem_id_y:
64 case Intrinsic::r600_read_tidig_y:
65 return WORKITEM_ID_Y;
66 case Intrinsic::amdgcn_workitem_id_z:
67 case Intrinsic::r600_read_tidig_z:
68 return WORKITEM_ID_Z;
69 case Intrinsic::amdgcn_workgroup_id_y:
70 case Intrinsic::r600_read_tgid_y:
71 return WORKGROUP_ID_Y;
72 case Intrinsic::amdgcn_workgroup_id_z:
73 case Intrinsic::r600_read_tgid_z:
74 return WORKGROUP_ID_Z;
75 case Intrinsic::amdgcn_lds_kernel_id:
76 return LDS_KERNEL_ID;
77 case Intrinsic::amdgcn_dispatch_ptr:
78 return DISPATCH_PTR;
79 case Intrinsic::amdgcn_dispatch_id:
80 return DISPATCH_ID;
81 case Intrinsic::amdgcn_implicitarg_ptr:
82 return IMPLICIT_ARG_PTR;
83 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
84 // queue_ptr.
85 case Intrinsic::amdgcn_queue_ptr:
86 NeedsImplicit = (CodeObjectVersion == 5);
87 return QUEUE_PTR;
88 case Intrinsic::amdgcn_is_shared:
89 case Intrinsic::amdgcn_is_private:
90 if (HasApertureRegs)
91 return NOT_IMPLICIT_INPUT;
92 // Under V5, we need implicitarg_ptr + offsets to access private_base or
93 // shared_base. For pre-V5, however, need to access them through queue_ptr +
94 // offsets.
95 return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
96 case Intrinsic::trap:
97 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
98 return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR;
99 NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5.
100 return QUEUE_PTR;
101 default:
102 return NOT_IMPLICIT_INPUT;
103 }
104 }
105
castRequiresQueuePtr(unsigned SrcAS)106 static bool castRequiresQueuePtr(unsigned SrcAS) {
107 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
108 }
109
isDSAddress(const Constant * C)110 static bool isDSAddress(const Constant *C) {
111 const GlobalValue *GV = dyn_cast<GlobalValue>(C);
112 if (!GV)
113 return false;
114 unsigned AS = GV->getAddressSpace();
115 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
116 }
117
118 /// Returns true if the function requires the implicit argument be passed
119 /// regardless of the function contents.
funcRequiresHostcallPtr(const Function & F)120 static bool funcRequiresHostcallPtr(const Function &F) {
121 // Sanitizers require the hostcall buffer passed in the implicit arguments.
122 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
123 F.hasFnAttribute(Attribute::SanitizeThread) ||
124 F.hasFnAttribute(Attribute::SanitizeMemory) ||
125 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
126 F.hasFnAttribute(Attribute::SanitizeMemTag);
127 }
128
129 namespace {
130 class AMDGPUInformationCache : public InformationCache {
131 public:
AMDGPUInformationCache(const Module & M,AnalysisGetter & AG,BumpPtrAllocator & Allocator,SetVector<Function * > * CGSCC,TargetMachine & TM)132 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
133 BumpPtrAllocator &Allocator,
134 SetVector<Function *> *CGSCC, TargetMachine &TM)
135 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
136 TargetMachine &TM;
137
138 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
139
140 /// Check if the subtarget has aperture regs.
hasApertureRegs(Function & F)141 bool hasApertureRegs(Function &F) {
142 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
143 return ST.hasApertureRegs();
144 }
145
146 /// Check if the subtarget supports GetDoorbellID.
supportsGetDoorbellID(Function & F)147 bool supportsGetDoorbellID(Function &F) {
148 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
149 return ST.supportsGetDoorbellID();
150 }
151
getFlatWorkGroupSizes(const Function & F)152 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
153 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
154 return ST.getFlatWorkGroupSizes(F);
155 }
156
157 std::pair<unsigned, unsigned>
getMaximumFlatWorkGroupRange(const Function & F)158 getMaximumFlatWorkGroupRange(const Function &F) {
159 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
160 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
161 }
162
163 private:
164 /// Check if the ConstantExpr \p CE requires the queue pointer.
visitConstExpr(const ConstantExpr * CE)165 static bool visitConstExpr(const ConstantExpr *CE) {
166 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
167 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
168 return castRequiresQueuePtr(SrcAS);
169 }
170 return false;
171 }
172
173 /// Get the constant access bitmap for \p C.
getConstantAccess(const Constant * C)174 uint8_t getConstantAccess(const Constant *C) {
175 auto It = ConstantStatus.find(C);
176 if (It != ConstantStatus.end())
177 return It->second;
178
179 uint8_t Result = 0;
180 if (isDSAddress(C))
181 Result = DS_GLOBAL;
182
183 if (const auto *CE = dyn_cast<ConstantExpr>(C))
184 if (visitConstExpr(CE))
185 Result |= ADDR_SPACE_CAST;
186
187 for (const Use &U : C->operands()) {
188 const auto *OpC = dyn_cast<Constant>(U);
189 if (!OpC)
190 continue;
191
192 Result |= getConstantAccess(OpC);
193 }
194 return Result;
195 }
196
197 public:
198 /// Returns true if \p Fn needs the queue pointer because of \p C.
needsQueuePtr(const Constant * C,Function & Fn)199 bool needsQueuePtr(const Constant *C, Function &Fn) {
200 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
201 bool HasAperture = hasApertureRegs(Fn);
202
203 // No need to explore the constants.
204 if (!IsNonEntryFunc && HasAperture)
205 return false;
206
207 uint8_t Access = getConstantAccess(C);
208
209 // We need to trap on DS globals in non-entry functions.
210 if (IsNonEntryFunc && (Access & DS_GLOBAL))
211 return true;
212
213 return !HasAperture && (Access & ADDR_SPACE_CAST);
214 }
215
216 private:
217 /// Used to determine if the Constant needs the queue pointer.
218 DenseMap<const Constant *, uint8_t> ConstantStatus;
219 };
220
221 struct AAAMDAttributes : public StateWrapper<
222 BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
223 using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
224 AbstractAttribute>;
225
AAAMDAttributes__anon4805a6110111::AAAMDAttributes226 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
227
228 /// Create an abstract attribute view for the position \p IRP.
229 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
230 Attributor &A);
231
232 /// See AbstractAttribute::getName().
getName__anon4805a6110111::AAAMDAttributes233 const std::string getName() const override { return "AAAMDAttributes"; }
234
235 /// See AbstractAttribute::getIdAddr().
getIdAddr__anon4805a6110111::AAAMDAttributes236 const char *getIdAddr() const override { return &ID; }
237
238 /// This function should return true if the type of the \p AA is
239 /// AAAMDAttributes.
classof__anon4805a6110111::AAAMDAttributes240 static bool classof(const AbstractAttribute *AA) {
241 return (AA->getIdAddr() == &ID);
242 }
243
244 /// Unique ID (due to the unique address)
245 static const char ID;
246 };
247 const char AAAMDAttributes::ID = 0;
248
249 struct AAUniformWorkGroupSize
250 : public StateWrapper<BooleanState, AbstractAttribute> {
251 using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAUniformWorkGroupSize__anon4805a6110111::AAUniformWorkGroupSize252 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
253
254 /// Create an abstract attribute view for the position \p IRP.
255 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
256 Attributor &A);
257
258 /// See AbstractAttribute::getName().
getName__anon4805a6110111::AAUniformWorkGroupSize259 const std::string getName() const override {
260 return "AAUniformWorkGroupSize";
261 }
262
263 /// See AbstractAttribute::getIdAddr().
getIdAddr__anon4805a6110111::AAUniformWorkGroupSize264 const char *getIdAddr() const override { return &ID; }
265
266 /// This function should return true if the type of the \p AA is
267 /// AAAMDAttributes.
classof__anon4805a6110111::AAUniformWorkGroupSize268 static bool classof(const AbstractAttribute *AA) {
269 return (AA->getIdAddr() == &ID);
270 }
271
272 /// Unique ID (due to the unique address)
273 static const char ID;
274 };
275 const char AAUniformWorkGroupSize::ID = 0;
276
277 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
AAUniformWorkGroupSizeFunction__anon4805a6110111::AAUniformWorkGroupSizeFunction278 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
279 : AAUniformWorkGroupSize(IRP, A) {}
280
initialize__anon4805a6110111::AAUniformWorkGroupSizeFunction281 void initialize(Attributor &A) override {
282 Function *F = getAssociatedFunction();
283 CallingConv::ID CC = F->getCallingConv();
284
285 if (CC != CallingConv::AMDGPU_KERNEL)
286 return;
287
288 bool InitialValue = false;
289 if (F->hasFnAttribute("uniform-work-group-size"))
290 InitialValue = F->getFnAttribute("uniform-work-group-size")
291 .getValueAsString()
292 .equals("true");
293
294 if (InitialValue)
295 indicateOptimisticFixpoint();
296 else
297 indicatePessimisticFixpoint();
298 }
299
updateImpl__anon4805a6110111::AAUniformWorkGroupSizeFunction300 ChangeStatus updateImpl(Attributor &A) override {
301 ChangeStatus Change = ChangeStatus::UNCHANGED;
302
303 auto CheckCallSite = [&](AbstractCallSite CS) {
304 Function *Caller = CS.getInstruction()->getFunction();
305 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
306 << "->" << getAssociatedFunction()->getName() << "\n");
307
308 const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
309 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
310
311 Change = Change | clampStateAndIndicateChange(this->getState(),
312 CallerInfo.getState());
313
314 return true;
315 };
316
317 bool AllCallSitesKnown = true;
318 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
319 return indicatePessimisticFixpoint();
320
321 return Change;
322 }
323
manifest__anon4805a6110111::AAUniformWorkGroupSizeFunction324 ChangeStatus manifest(Attributor &A) override {
325 SmallVector<Attribute, 8> AttrList;
326 LLVMContext &Ctx = getAssociatedFunction()->getContext();
327
328 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
329 getAssumed() ? "true" : "false"));
330 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
331 /* ForceReplace */ true);
332 }
333
isValidState__anon4805a6110111::AAUniformWorkGroupSizeFunction334 bool isValidState() const override {
335 // This state is always valid, even when the state is false.
336 return true;
337 }
338
getAsStr__anon4805a6110111::AAUniformWorkGroupSizeFunction339 const std::string getAsStr() const override {
340 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
341 }
342
343 /// See AbstractAttribute::trackStatistics()
trackStatistics__anon4805a6110111::AAUniformWorkGroupSizeFunction344 void trackStatistics() const override {}
345 };
346
347 AAUniformWorkGroupSize &
createForPosition(const IRPosition & IRP,Attributor & A)348 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
349 Attributor &A) {
350 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
351 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
352 llvm_unreachable(
353 "AAUniformWorkGroupSize is only valid for function position");
354 }
355
356 struct AAAMDAttributesFunction : public AAAMDAttributes {
AAAMDAttributesFunction__anon4805a6110111::AAAMDAttributesFunction357 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
358 : AAAMDAttributes(IRP, A) {}
359
initialize__anon4805a6110111::AAAMDAttributesFunction360 void initialize(Attributor &A) override {
361 Function *F = getAssociatedFunction();
362
363 // If the function requires the implicit arg pointer due to sanitizers,
364 // assume it's needed even if explicitly marked as not requiring it.
365 const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
366 if (NeedsHostcall) {
367 removeAssumedBits(IMPLICIT_ARG_PTR);
368 removeAssumedBits(HOSTCALL_PTR);
369 }
370
371 for (auto Attr : ImplicitAttrs) {
372 if (NeedsHostcall &&
373 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
374 continue;
375
376 if (F->hasFnAttribute(Attr.second))
377 addKnownBits(Attr.first);
378 }
379
380 if (F->isDeclaration())
381 return;
382
383 // Ignore functions with graphics calling conventions, these are currently
384 // not allowed to have kernel arguments.
385 if (AMDGPU::isGraphics(F->getCallingConv())) {
386 indicatePessimisticFixpoint();
387 return;
388 }
389 }
390
updateImpl__anon4805a6110111::AAAMDAttributesFunction391 ChangeStatus updateImpl(Attributor &A) override {
392 Function *F = getAssociatedFunction();
393 // The current assumed state used to determine a change.
394 auto OrigAssumed = getAssumed();
395
396 // Check for Intrinsics and propagate attributes.
397 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
398 *this, this->getIRPosition(), DepClassTy::REQUIRED);
399 if (AAEdges.hasNonAsmUnknownCallee())
400 return indicatePessimisticFixpoint();
401
402 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
403
404 bool NeedsImplicit = false;
405 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
406 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
407 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
408
409 for (Function *Callee : AAEdges.getOptimisticEdges()) {
410 Intrinsic::ID IID = Callee->getIntrinsicID();
411 if (IID == Intrinsic::not_intrinsic) {
412 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
413 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
414 *this &= AAAMD;
415 continue;
416 }
417
418 bool NonKernelOnly = false;
419 ImplicitArgumentMask AttrMask =
420 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
421 HasApertureRegs, SupportsGetDoorbellID);
422 if (AttrMask != NOT_IMPLICIT_INPUT) {
423 if ((IsNonEntryFunc || !NonKernelOnly))
424 removeAssumedBits(AttrMask);
425 }
426 }
427
428 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
429 if (NeedsImplicit)
430 removeAssumedBits(IMPLICIT_ARG_PTR);
431
432 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
433 // Under V5, we need implicitarg_ptr + offsets to access private_base or
434 // shared_base. We do not actually need queue_ptr.
435 if (AMDGPU::getAmdhsaCodeObjectVersion() == 5)
436 removeAssumedBits(IMPLICIT_ARG_PTR);
437 else
438 removeAssumedBits(QUEUE_PTR);
439 }
440
441 if (funcRetrievesMultigridSyncArg(A)) {
442 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
443 "multigrid_sync_arg needs implicitarg_ptr");
444 removeAssumedBits(MULTIGRID_SYNC_ARG);
445 }
446
447 if (funcRetrievesHostcallPtr(A)) {
448 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
449 removeAssumedBits(HOSTCALL_PTR);
450 }
451
452 if (funcRetrievesHeapPtr(A)) {
453 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
454 removeAssumedBits(HEAP_PTR);
455 }
456
457 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
458 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
459 removeAssumedBits(QUEUE_PTR);
460 }
461
462 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
463 removeAssumedBits(LDS_KERNEL_ID);
464 }
465
466 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
467 : ChangeStatus::UNCHANGED;
468 }
469
manifest__anon4805a6110111::AAAMDAttributesFunction470 ChangeStatus manifest(Attributor &A) override {
471 SmallVector<Attribute, 8> AttrList;
472 LLVMContext &Ctx = getAssociatedFunction()->getContext();
473
474 for (auto Attr : ImplicitAttrs) {
475 if (isKnown(Attr.first))
476 AttrList.push_back(Attribute::get(Ctx, Attr.second));
477 }
478
479 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
480 /* ForceReplace */ true);
481 }
482
getAsStr__anon4805a6110111::AAAMDAttributesFunction483 const std::string getAsStr() const override {
484 std::string Str;
485 raw_string_ostream OS(Str);
486 OS << "AMDInfo[";
487 for (auto Attr : ImplicitAttrs)
488 OS << ' ' << Attr.second;
489 OS << " ]";
490 return OS.str();
491 }
492
493 /// See AbstractAttribute::trackStatistics()
trackStatistics__anon4805a6110111::AAAMDAttributesFunction494 void trackStatistics() const override {}
495
496 private:
checkForQueuePtr__anon4805a6110111::AAAMDAttributesFunction497 bool checkForQueuePtr(Attributor &A) {
498 Function *F = getAssociatedFunction();
499 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
500
501 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
502
503 bool NeedsQueuePtr = false;
504
505 auto CheckAddrSpaceCasts = [&](Instruction &I) {
506 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
507 if (castRequiresQueuePtr(SrcAS)) {
508 NeedsQueuePtr = true;
509 return false;
510 }
511 return true;
512 };
513
514 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
515
516 // `checkForAllInstructions` is much more cheaper than going through all
517 // instructions, try it first.
518
519 // The queue pointer is not needed if aperture regs is present.
520 if (!HasApertureRegs) {
521 bool UsedAssumedInformation = false;
522 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
523 {Instruction::AddrSpaceCast},
524 UsedAssumedInformation);
525 }
526
527 // If we found that we need the queue pointer, nothing else to do.
528 if (NeedsQueuePtr)
529 return true;
530
531 if (!IsNonEntryFunc && HasApertureRegs)
532 return false;
533
534 for (BasicBlock &BB : *F) {
535 for (Instruction &I : BB) {
536 for (const Use &U : I.operands()) {
537 if (const auto *C = dyn_cast<Constant>(U)) {
538 if (InfoCache.needsQueuePtr(C, *F))
539 return true;
540 }
541 }
542 }
543 }
544
545 return false;
546 }
547
funcRetrievesMultigridSyncArg__anon4805a6110111::AAAMDAttributesFunction548 bool funcRetrievesMultigridSyncArg(Attributor &A) {
549 auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition();
550 AAPointerInfo::OffsetAndSize OAS(Pos, 8);
551 return funcRetrievesImplicitKernelArg(A, OAS);
552 }
553
funcRetrievesHostcallPtr__anon4805a6110111::AAAMDAttributesFunction554 bool funcRetrievesHostcallPtr(Attributor &A) {
555 auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
556 AAPointerInfo::OffsetAndSize OAS(Pos, 8);
557 return funcRetrievesImplicitKernelArg(A, OAS);
558 }
559
funcRetrievesHeapPtr__anon4805a6110111::AAAMDAttributesFunction560 bool funcRetrievesHeapPtr(Attributor &A) {
561 if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
562 return false;
563 AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
564 return funcRetrievesImplicitKernelArg(A, OAS);
565 }
566
funcRetrievesQueuePtr__anon4805a6110111::AAAMDAttributesFunction567 bool funcRetrievesQueuePtr(Attributor &A) {
568 if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
569 return false;
570 AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
571 return funcRetrievesImplicitKernelArg(A, OAS);
572 }
573
funcRetrievesImplicitKernelArg__anon4805a6110111::AAAMDAttributesFunction574 bool funcRetrievesImplicitKernelArg(Attributor &A,
575 AAPointerInfo::OffsetAndSize OAS) {
576 // Check if this is a call to the implicitarg_ptr builtin and it
577 // is used to retrieve the hostcall pointer. The implicit arg for
578 // hostcall is not used only if every use of the implicitarg_ptr
579 // is a load that clearly does not retrieve any byte of the
580 // hostcall pointer. We check this by tracing all the uses of the
581 // initial call to the implicitarg_ptr intrinsic.
582 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
583 auto &Call = cast<CallBase>(I);
584 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
585 return true;
586
587 const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
588 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
589
590 return PointerInfoAA.forallInterferingAccesses(
591 OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
592 return Acc.getRemoteInst()->isDroppable();
593 });
594 };
595
596 bool UsedAssumedInformation = false;
597 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
598 UsedAssumedInformation);
599 }
600
funcRetrievesLDSKernelId__anon4805a6110111::AAAMDAttributesFunction601 bool funcRetrievesLDSKernelId(Attributor &A) {
602 auto DoesNotRetrieve = [&](Instruction &I) {
603 auto &Call = cast<CallBase>(I);
604 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
605 };
606 bool UsedAssumedInformation = false;
607 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
608 UsedAssumedInformation);
609 }
610 };
611
createForPosition(const IRPosition & IRP,Attributor & A)612 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
613 Attributor &A) {
614 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
615 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
616 llvm_unreachable("AAAMDAttributes is only valid for function position");
617 }
618
619 /// Propagate amdgpu-flat-work-group-size attribute.
620 struct AAAMDFlatWorkGroupSize
621 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
622 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
AAAMDFlatWorkGroupSize__anon4805a6110111::AAAMDFlatWorkGroupSize623 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
624 : Base(IRP, 32) {}
625
626 /// See AbstractAttribute::getState(...).
getState__anon4805a6110111::AAAMDFlatWorkGroupSize627 IntegerRangeState &getState() override { return *this; }
getState__anon4805a6110111::AAAMDFlatWorkGroupSize628 const IntegerRangeState &getState() const override { return *this; }
629
initialize__anon4805a6110111::AAAMDFlatWorkGroupSize630 void initialize(Attributor &A) override {
631 Function *F = getAssociatedFunction();
632 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
633 unsigned MinGroupSize, MaxGroupSize;
634 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
635 intersectKnown(
636 ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
637
638 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
639 indicatePessimisticFixpoint();
640 }
641
updateImpl__anon4805a6110111::AAAMDFlatWorkGroupSize642 ChangeStatus updateImpl(Attributor &A) override {
643 ChangeStatus Change = ChangeStatus::UNCHANGED;
644
645 auto CheckCallSite = [&](AbstractCallSite CS) {
646 Function *Caller = CS.getInstruction()->getFunction();
647 LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
648 << "->" << getAssociatedFunction()->getName() << '\n');
649
650 const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
651 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
652
653 Change |=
654 clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
655
656 return true;
657 };
658
659 bool AllCallSitesKnown = true;
660 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
661 return indicatePessimisticFixpoint();
662
663 return Change;
664 }
665
manifest__anon4805a6110111::AAAMDFlatWorkGroupSize666 ChangeStatus manifest(Attributor &A) override {
667 SmallVector<Attribute, 8> AttrList;
668 Function *F = getAssociatedFunction();
669 LLVMContext &Ctx = F->getContext();
670
671 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
672 unsigned Min, Max;
673 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
674
675 // Don't add the attribute if it's the implied default.
676 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
677 return ChangeStatus::UNCHANGED;
678
679 SmallString<10> Buffer;
680 raw_svector_ostream OS(Buffer);
681 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
682
683 AttrList.push_back(
684 Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
685 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
686 /* ForceReplace */ true);
687 }
688
getAsStr__anon4805a6110111::AAAMDFlatWorkGroupSize689 const std::string getAsStr() const override {
690 std::string Str;
691 raw_string_ostream OS(Str);
692 OS << "AMDFlatWorkGroupSize[";
693 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
694 OS << ']';
695 return OS.str();
696 }
697
698 /// See AbstractAttribute::trackStatistics()
trackStatistics__anon4805a6110111::AAAMDFlatWorkGroupSize699 void trackStatistics() const override {}
700
701 /// Create an abstract attribute view for the position \p IRP.
702 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
703 Attributor &A);
704
705 /// See AbstractAttribute::getName()
getName__anon4805a6110111::AAAMDFlatWorkGroupSize706 const std::string getName() const override {
707 return "AAAMDFlatWorkGroupSize";
708 }
709
710 /// See AbstractAttribute::getIdAddr()
getIdAddr__anon4805a6110111::AAAMDFlatWorkGroupSize711 const char *getIdAddr() const override { return &ID; }
712
713 /// This function should return true if the type of the \p AA is
714 /// AAAMDFlatWorkGroupSize
classof__anon4805a6110111::AAAMDFlatWorkGroupSize715 static bool classof(const AbstractAttribute *AA) {
716 return (AA->getIdAddr() == &ID);
717 }
718
719 /// Unique ID (due to the unique address)
720 static const char ID;
721 };
722
723 const char AAAMDFlatWorkGroupSize::ID = 0;
724
725 AAAMDFlatWorkGroupSize &
createForPosition(const IRPosition & IRP,Attributor & A)726 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
727 Attributor &A) {
728 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
729 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
730 llvm_unreachable(
731 "AAAMDFlatWorkGroupSize is only valid for function position");
732 }
733
734 class AMDGPUAttributor : public ModulePass {
735 public:
AMDGPUAttributor()736 AMDGPUAttributor() : ModulePass(ID) {}
737
738 /// doInitialization - Virtual method overridden by subclasses to do
739 /// any necessary initialization before any pass is run.
doInitialization(Module &)740 bool doInitialization(Module &) override {
741 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
742 if (!TPC)
743 report_fatal_error("TargetMachine is required");
744
745 TM = &TPC->getTM<TargetMachine>();
746 return false;
747 }
748
runOnModule(Module & M)749 bool runOnModule(Module &M) override {
750 SetVector<Function *> Functions;
751 AnalysisGetter AG;
752 for (Function &F : M) {
753 if (!F.isIntrinsic())
754 Functions.insert(&F);
755 }
756
757 CallGraphUpdater CGUpdater;
758 BumpPtrAllocator Allocator;
759 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
760 DenseSet<const char *> Allowed(
761 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
762 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID,
763 &AAPointerInfo::ID});
764
765 AttributorConfig AC(CGUpdater);
766 AC.Allowed = &Allowed;
767 AC.IsModulePass = true;
768 AC.DefaultInitializeLiveInternals = false;
769
770 Attributor A(Functions, InfoCache, AC);
771
772 for (Function &F : M) {
773 if (!F.isIntrinsic()) {
774 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
775 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
776 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
777 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
778 }
779 }
780 }
781
782 ChangeStatus Change = A.run();
783 return Change == ChangeStatus::CHANGED;
784 }
785
getPassName() const786 StringRef getPassName() const override { return "AMDGPU Attributor"; }
787 TargetMachine *TM;
788 static char ID;
789 };
790 } // namespace
791
792 char AMDGPUAttributor::ID = 0;
793
createAMDGPUAttributorPass()794 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
795 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
796