1 //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "GCNSubtarget.h" 15 #include "Utils/AMDGPUBaseInfo.h" 16 #include "llvm/CodeGen/TargetPassConfig.h" 17 #include "llvm/IR/IntrinsicsAMDGPU.h" 18 #include "llvm/IR/IntrinsicsR600.h" 19 #include "llvm/Target/TargetMachine.h" 20 #include "llvm/Transforms/IPO/Attributor.h" 21 22 #define DEBUG_TYPE "amdgpu-attributor" 23 24 using namespace llvm; 25 26 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS, 27 28 enum ImplicitArgumentPositions { 29 #include "AMDGPUAttributes.def" 30 LAST_ARG_POS 31 }; 32 33 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS, 34 35 enum ImplicitArgumentMask { 36 NOT_IMPLICIT_INPUT = 0, 37 #include "AMDGPUAttributes.def" 38 ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1 39 }; 40 41 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str}, 42 static constexpr std::pair<ImplicitArgumentMask, 43 StringLiteral> ImplicitAttrs[] = { 44 #include "AMDGPUAttributes.def" 45 }; 46 47 // We do not need to note the x workitem or workgroup id because they are always 48 // initialized. 49 // 50 // TODO: We should not add the attributes if the known compile time workgroup 51 // size is 1 for y/z. 52 static ImplicitArgumentMask 53 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, 54 bool HasApertureRegs, bool SupportsGetDoorBellID) { 55 unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion(); 56 switch (ID) { 57 case Intrinsic::amdgcn_workitem_id_x: 58 NonKernelOnly = true; 59 return WORKITEM_ID_X; 60 case Intrinsic::amdgcn_workgroup_id_x: 61 NonKernelOnly = true; 62 return WORKGROUP_ID_X; 63 case Intrinsic::amdgcn_workitem_id_y: 64 case Intrinsic::r600_read_tidig_y: 65 return WORKITEM_ID_Y; 66 case Intrinsic::amdgcn_workitem_id_z: 67 case Intrinsic::r600_read_tidig_z: 68 return WORKITEM_ID_Z; 69 case Intrinsic::amdgcn_workgroup_id_y: 70 case Intrinsic::r600_read_tgid_y: 71 return WORKGROUP_ID_Y; 72 case Intrinsic::amdgcn_workgroup_id_z: 73 case Intrinsic::r600_read_tgid_z: 74 return WORKGROUP_ID_Z; 75 case Intrinsic::amdgcn_dispatch_ptr: 76 return DISPATCH_PTR; 77 case Intrinsic::amdgcn_dispatch_id: 78 return DISPATCH_ID; 79 case Intrinsic::amdgcn_implicitarg_ptr: 80 return IMPLICIT_ARG_PTR; 81 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access 82 // queue_ptr. 83 case Intrinsic::amdgcn_queue_ptr: 84 NeedsImplicit = (CodeObjectVersion == 5); 85 return QUEUE_PTR; 86 case Intrinsic::amdgcn_is_shared: 87 case Intrinsic::amdgcn_is_private: 88 if (HasApertureRegs) 89 return NOT_IMPLICIT_INPUT; 90 // Under V5, we need implicitarg_ptr + offsets to access private_base or 91 // shared_base. For pre-V5, however, need to access them through queue_ptr + 92 // offsets. 93 return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR; 94 case Intrinsic::trap: 95 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4. 96 return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR; 97 NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5. 98 return QUEUE_PTR; 99 default: 100 return NOT_IMPLICIT_INPUT; 101 } 102 } 103 104 static bool castRequiresQueuePtr(unsigned SrcAS) { 105 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 106 } 107 108 static bool isDSAddress(const Constant *C) { 109 const GlobalValue *GV = dyn_cast<GlobalValue>(C); 110 if (!GV) 111 return false; 112 unsigned AS = GV->getAddressSpace(); 113 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 114 } 115 116 /// Returns true if the function requires the implicit argument be passed 117 /// regardless of the function contents. 118 static bool funcRequiresHostcallPtr(const Function &F) { 119 // Sanitizers require the hostcall buffer passed in the implicit arguments. 120 return F.hasFnAttribute(Attribute::SanitizeAddress) || 121 F.hasFnAttribute(Attribute::SanitizeThread) || 122 F.hasFnAttribute(Attribute::SanitizeMemory) || 123 F.hasFnAttribute(Attribute::SanitizeHWAddress) || 124 F.hasFnAttribute(Attribute::SanitizeMemTag); 125 } 126 127 namespace { 128 class AMDGPUInformationCache : public InformationCache { 129 public: 130 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 131 BumpPtrAllocator &Allocator, 132 SetVector<Function *> *CGSCC, TargetMachine &TM) 133 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 134 TargetMachine &TM; 135 136 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 137 138 /// Check if the subtarget has aperture regs. 139 bool hasApertureRegs(Function &F) { 140 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 141 return ST.hasApertureRegs(); 142 } 143 144 /// Check if the subtarget supports GetDoorbellID. 145 bool supportsGetDoorbellID(Function &F) { 146 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 147 return ST.supportsGetDoorbellID(); 148 } 149 150 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) { 151 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 152 return ST.getFlatWorkGroupSizes(F); 153 } 154 155 std::pair<unsigned, unsigned> 156 getMaximumFlatWorkGroupRange(const Function &F) { 157 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 158 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; 159 } 160 161 private: 162 /// Check if the ConstantExpr \p CE requires the queue pointer. 163 static bool visitConstExpr(const ConstantExpr *CE) { 164 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 165 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 166 return castRequiresQueuePtr(SrcAS); 167 } 168 return false; 169 } 170 171 /// Get the constant access bitmap for \p C. 172 uint8_t getConstantAccess(const Constant *C) { 173 auto It = ConstantStatus.find(C); 174 if (It != ConstantStatus.end()) 175 return It->second; 176 177 uint8_t Result = 0; 178 if (isDSAddress(C)) 179 Result = DS_GLOBAL; 180 181 if (const auto *CE = dyn_cast<ConstantExpr>(C)) 182 if (visitConstExpr(CE)) 183 Result |= ADDR_SPACE_CAST; 184 185 for (const Use &U : C->operands()) { 186 const auto *OpC = dyn_cast<Constant>(U); 187 if (!OpC) 188 continue; 189 190 Result |= getConstantAccess(OpC); 191 } 192 return Result; 193 } 194 195 public: 196 /// Returns true if \p Fn needs the queue pointer because of \p C. 197 bool needsQueuePtr(const Constant *C, Function &Fn) { 198 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 199 bool HasAperture = hasApertureRegs(Fn); 200 201 // No need to explore the constants. 202 if (!IsNonEntryFunc && HasAperture) 203 return false; 204 205 uint8_t Access = getConstantAccess(C); 206 207 // We need to trap on DS globals in non-entry functions. 208 if (IsNonEntryFunc && (Access & DS_GLOBAL)) 209 return true; 210 211 return !HasAperture && (Access & ADDR_SPACE_CAST); 212 } 213 214 private: 215 /// Used to determine if the Constant needs the queue pointer. 216 DenseMap<const Constant *, uint8_t> ConstantStatus; 217 }; 218 219 struct AAAMDAttributes : public StateWrapper< 220 BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> { 221 using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, 222 AbstractAttribute>; 223 224 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 225 226 /// Create an abstract attribute view for the position \p IRP. 227 static AAAMDAttributes &createForPosition(const IRPosition &IRP, 228 Attributor &A); 229 230 /// See AbstractAttribute::getName(). 231 const std::string getName() const override { return "AAAMDAttributes"; } 232 233 /// See AbstractAttribute::getIdAddr(). 234 const char *getIdAddr() const override { return &ID; } 235 236 /// This function should return true if the type of the \p AA is 237 /// AAAMDAttributes. 238 static bool classof(const AbstractAttribute *AA) { 239 return (AA->getIdAddr() == &ID); 240 } 241 242 /// Unique ID (due to the unique address) 243 static const char ID; 244 }; 245 const char AAAMDAttributes::ID = 0; 246 247 struct AAUniformWorkGroupSize 248 : public StateWrapper<BooleanState, AbstractAttribute> { 249 using Base = StateWrapper<BooleanState, AbstractAttribute>; 250 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 251 252 /// Create an abstract attribute view for the position \p IRP. 253 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP, 254 Attributor &A); 255 256 /// See AbstractAttribute::getName(). 257 const std::string getName() const override { 258 return "AAUniformWorkGroupSize"; 259 } 260 261 /// See AbstractAttribute::getIdAddr(). 262 const char *getIdAddr() const override { return &ID; } 263 264 /// This function should return true if the type of the \p AA is 265 /// AAAMDAttributes. 266 static bool classof(const AbstractAttribute *AA) { 267 return (AA->getIdAddr() == &ID); 268 } 269 270 /// Unique ID (due to the unique address) 271 static const char ID; 272 }; 273 const char AAUniformWorkGroupSize::ID = 0; 274 275 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { 276 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 277 : AAUniformWorkGroupSize(IRP, A) {} 278 279 void initialize(Attributor &A) override { 280 Function *F = getAssociatedFunction(); 281 CallingConv::ID CC = F->getCallingConv(); 282 283 if (CC != CallingConv::AMDGPU_KERNEL) 284 return; 285 286 bool InitialValue = false; 287 if (F->hasFnAttribute("uniform-work-group-size")) 288 InitialValue = F->getFnAttribute("uniform-work-group-size") 289 .getValueAsString() 290 .equals("true"); 291 292 if (InitialValue) 293 indicateOptimisticFixpoint(); 294 else 295 indicatePessimisticFixpoint(); 296 } 297 298 ChangeStatus updateImpl(Attributor &A) override { 299 ChangeStatus Change = ChangeStatus::UNCHANGED; 300 301 auto CheckCallSite = [&](AbstractCallSite CS) { 302 Function *Caller = CS.getInstruction()->getFunction(); 303 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName() 304 << "->" << getAssociatedFunction()->getName() << "\n"); 305 306 const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>( 307 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 308 309 Change = Change | clampStateAndIndicateChange(this->getState(), 310 CallerInfo.getState()); 311 312 return true; 313 }; 314 315 bool AllCallSitesKnown = true; 316 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 317 return indicatePessimisticFixpoint(); 318 319 return Change; 320 } 321 322 ChangeStatus manifest(Attributor &A) override { 323 SmallVector<Attribute, 8> AttrList; 324 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 325 326 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 327 getAssumed() ? "true" : "false")); 328 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 329 /* ForceReplace */ true); 330 } 331 332 bool isValidState() const override { 333 // This state is always valid, even when the state is false. 334 return true; 335 } 336 337 const std::string getAsStr() const override { 338 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 339 } 340 341 /// See AbstractAttribute::trackStatistics() 342 void trackStatistics() const override {} 343 }; 344 345 AAUniformWorkGroupSize & 346 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP, 347 Attributor &A) { 348 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 349 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A); 350 llvm_unreachable( 351 "AAUniformWorkGroupSize is only valid for function position"); 352 } 353 354 struct AAAMDAttributesFunction : public AAAMDAttributes { 355 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 356 : AAAMDAttributes(IRP, A) {} 357 358 void initialize(Attributor &A) override { 359 Function *F = getAssociatedFunction(); 360 361 // If the function requires the implicit arg pointer due to sanitizers, 362 // assume it's needed even if explicitly marked as not requiring it. 363 const bool NeedsHostcall = funcRequiresHostcallPtr(*F); 364 if (NeedsHostcall) { 365 removeAssumedBits(IMPLICIT_ARG_PTR); 366 removeAssumedBits(HOSTCALL_PTR); 367 } 368 369 for (auto Attr : ImplicitAttrs) { 370 if (NeedsHostcall && 371 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR)) 372 continue; 373 374 if (F->hasFnAttribute(Attr.second)) 375 addKnownBits(Attr.first); 376 } 377 378 if (F->isDeclaration()) 379 return; 380 381 // Ignore functions with graphics calling conventions, these are currently 382 // not allowed to have kernel arguments. 383 if (AMDGPU::isGraphics(F->getCallingConv())) { 384 indicatePessimisticFixpoint(); 385 return; 386 } 387 } 388 389 ChangeStatus updateImpl(Attributor &A) override { 390 Function *F = getAssociatedFunction(); 391 // The current assumed state used to determine a change. 392 auto OrigAssumed = getAssumed(); 393 394 // Check for Intrinsics and propagate attributes. 395 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 396 *this, this->getIRPosition(), DepClassTy::REQUIRED); 397 if (AAEdges.hasNonAsmUnknownCallee()) 398 return indicatePessimisticFixpoint(); 399 400 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 401 402 bool NeedsImplicit = false; 403 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 404 bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 405 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F); 406 407 for (Function *Callee : AAEdges.getOptimisticEdges()) { 408 Intrinsic::ID IID = Callee->getIntrinsicID(); 409 if (IID == Intrinsic::not_intrinsic) { 410 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 411 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 412 *this &= AAAMD; 413 continue; 414 } 415 416 bool NonKernelOnly = false; 417 ImplicitArgumentMask AttrMask = 418 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit, 419 HasApertureRegs, SupportsGetDoorbellID); 420 if (AttrMask != NOT_IMPLICIT_INPUT) { 421 if ((IsNonEntryFunc || !NonKernelOnly)) 422 removeAssumedBits(AttrMask); 423 } 424 } 425 426 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base. 427 if (NeedsImplicit) 428 removeAssumedBits(IMPLICIT_ARG_PTR); 429 430 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) { 431 // Under V5, we need implicitarg_ptr + offsets to access private_base or 432 // shared_base. We do not actually need queue_ptr. 433 if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) 434 removeAssumedBits(IMPLICIT_ARG_PTR); 435 else 436 removeAssumedBits(QUEUE_PTR); 437 } 438 439 if (funcRetrievesMultigridSyncArg(A)) { 440 assert(!isAssumed(IMPLICIT_ARG_PTR) && 441 "multigrid_sync_arg needs implicitarg_ptr"); 442 removeAssumedBits(MULTIGRID_SYNC_ARG); 443 } 444 445 if (funcRetrievesHostcallPtr(A)) { 446 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr"); 447 removeAssumedBits(HOSTCALL_PTR); 448 } 449 450 if (funcRetrievesHeapPtr(A)) { 451 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr"); 452 removeAssumedBits(HEAP_PTR); 453 } 454 455 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) { 456 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr"); 457 removeAssumedBits(QUEUE_PTR); 458 } 459 460 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED 461 : ChangeStatus::UNCHANGED; 462 } 463 464 ChangeStatus manifest(Attributor &A) override { 465 SmallVector<Attribute, 8> AttrList; 466 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 467 468 for (auto Attr : ImplicitAttrs) { 469 if (isKnown(Attr.first)) 470 AttrList.push_back(Attribute::get(Ctx, Attr.second)); 471 } 472 473 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 474 /* ForceReplace */ true); 475 } 476 477 const std::string getAsStr() const override { 478 std::string Str; 479 raw_string_ostream OS(Str); 480 OS << "AMDInfo["; 481 for (auto Attr : ImplicitAttrs) 482 OS << ' ' << Attr.second; 483 OS << " ]"; 484 return OS.str(); 485 } 486 487 /// See AbstractAttribute::trackStatistics() 488 void trackStatistics() const override {} 489 490 private: 491 bool checkForQueuePtr(Attributor &A) { 492 Function *F = getAssociatedFunction(); 493 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 494 495 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 496 497 bool NeedsQueuePtr = false; 498 499 auto CheckAddrSpaceCasts = [&](Instruction &I) { 500 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 501 if (castRequiresQueuePtr(SrcAS)) { 502 NeedsQueuePtr = true; 503 return false; 504 } 505 return true; 506 }; 507 508 bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 509 510 // `checkForAllInstructions` is much more cheaper than going through all 511 // instructions, try it first. 512 513 // The queue pointer is not needed if aperture regs is present. 514 if (!HasApertureRegs) { 515 bool UsedAssumedInformation = false; 516 A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 517 {Instruction::AddrSpaceCast}, 518 UsedAssumedInformation); 519 } 520 521 // If we found that we need the queue pointer, nothing else to do. 522 if (NeedsQueuePtr) 523 return true; 524 525 if (!IsNonEntryFunc && HasApertureRegs) 526 return false; 527 528 for (BasicBlock &BB : *F) { 529 for (Instruction &I : BB) { 530 for (const Use &U : I.operands()) { 531 if (const auto *C = dyn_cast<Constant>(U)) { 532 if (InfoCache.needsQueuePtr(C, *F)) 533 return true; 534 } 535 } 536 } 537 } 538 539 return false; 540 } 541 542 bool funcRetrievesMultigridSyncArg(Attributor &A) { 543 auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(); 544 AAPointerInfo::OffsetAndSize OAS(Pos, 8); 545 return funcRetrievesImplicitKernelArg(A, OAS); 546 } 547 548 bool funcRetrievesHostcallPtr(Attributor &A) { 549 auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(); 550 AAPointerInfo::OffsetAndSize OAS(Pos, 8); 551 return funcRetrievesImplicitKernelArg(A, OAS); 552 } 553 554 bool funcRetrievesHeapPtr(Attributor &A) { 555 if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) 556 return false; 557 AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8); 558 return funcRetrievesImplicitKernelArg(A, OAS); 559 } 560 561 bool funcRetrievesQueuePtr(Attributor &A) { 562 if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) 563 return false; 564 AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8); 565 return funcRetrievesImplicitKernelArg(A, OAS); 566 } 567 568 bool funcRetrievesImplicitKernelArg(Attributor &A, 569 AAPointerInfo::OffsetAndSize OAS) { 570 // Check if this is a call to the implicitarg_ptr builtin and it 571 // is used to retrieve the hostcall pointer. The implicit arg for 572 // hostcall is not used only if every use of the implicitarg_ptr 573 // is a load that clearly does not retrieve any byte of the 574 // hostcall pointer. We check this by tracing all the uses of the 575 // initial call to the implicitarg_ptr intrinsic. 576 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) { 577 auto &Call = cast<CallBase>(I); 578 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr) 579 return true; 580 581 const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>( 582 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED); 583 584 return PointerInfoAA.forallInterferingAccesses( 585 OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) { 586 return Acc.getRemoteInst()->isDroppable(); 587 }); 588 }; 589 590 bool UsedAssumedInformation = false; 591 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this, 592 UsedAssumedInformation); 593 } 594 }; 595 596 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 597 Attributor &A) { 598 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 599 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 600 llvm_unreachable("AAAMDAttributes is only valid for function position"); 601 } 602 603 /// Propagate amdgpu-flat-work-group-size attribute. 604 struct AAAMDFlatWorkGroupSize 605 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { 606 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; 607 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) 608 : Base(IRP, 32) {} 609 610 /// See AbstractAttribute::getState(...). 611 IntegerRangeState &getState() override { return *this; } 612 const IntegerRangeState &getState() const override { return *this; } 613 614 void initialize(Attributor &A) override { 615 Function *F = getAssociatedFunction(); 616 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 617 unsigned MinGroupSize, MaxGroupSize; 618 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); 619 intersectKnown( 620 ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); 621 622 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) 623 indicatePessimisticFixpoint(); 624 } 625 626 ChangeStatus updateImpl(Attributor &A) override { 627 ChangeStatus Change = ChangeStatus::UNCHANGED; 628 629 auto CheckCallSite = [&](AbstractCallSite CS) { 630 Function *Caller = CS.getInstruction()->getFunction(); 631 LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName() 632 << "->" << getAssociatedFunction()->getName() << '\n'); 633 634 const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>( 635 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 636 637 Change |= 638 clampStateAndIndicateChange(this->getState(), CallerInfo.getState()); 639 640 return true; 641 }; 642 643 bool AllCallSitesKnown = true; 644 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 645 return indicatePessimisticFixpoint(); 646 647 return Change; 648 } 649 650 ChangeStatus manifest(Attributor &A) override { 651 SmallVector<Attribute, 8> AttrList; 652 Function *F = getAssociatedFunction(); 653 LLVMContext &Ctx = F->getContext(); 654 655 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 656 unsigned Min, Max; 657 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); 658 659 // Don't add the attribute if it's the implied default. 660 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) 661 return ChangeStatus::UNCHANGED; 662 663 SmallString<10> Buffer; 664 raw_svector_ostream OS(Buffer); 665 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 666 667 AttrList.push_back( 668 Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str())); 669 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 670 /* ForceReplace */ true); 671 } 672 673 const std::string getAsStr() const override { 674 std::string Str; 675 raw_string_ostream OS(Str); 676 OS << "AMDFlatWorkGroupSize["; 677 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 678 OS << ']'; 679 return OS.str(); 680 } 681 682 /// See AbstractAttribute::trackStatistics() 683 void trackStatistics() const override {} 684 685 /// Create an abstract attribute view for the position \p IRP. 686 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP, 687 Attributor &A); 688 689 /// See AbstractAttribute::getName() 690 const std::string getName() const override { 691 return "AAAMDFlatWorkGroupSize"; 692 } 693 694 /// See AbstractAttribute::getIdAddr() 695 const char *getIdAddr() const override { return &ID; } 696 697 /// This function should return true if the type of the \p AA is 698 /// AAAMDFlatWorkGroupSize 699 static bool classof(const AbstractAttribute *AA) { 700 return (AA->getIdAddr() == &ID); 701 } 702 703 /// Unique ID (due to the unique address) 704 static const char ID; 705 }; 706 707 const char AAAMDFlatWorkGroupSize::ID = 0; 708 709 AAAMDFlatWorkGroupSize & 710 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP, 711 Attributor &A) { 712 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 713 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A); 714 llvm_unreachable( 715 "AAAMDFlatWorkGroupSize is only valid for function position"); 716 } 717 718 class AMDGPUAttributor : public ModulePass { 719 public: 720 AMDGPUAttributor() : ModulePass(ID) {} 721 722 /// doInitialization - Virtual method overridden by subclasses to do 723 /// any necessary initialization before any pass is run. 724 bool doInitialization(Module &) override { 725 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 726 if (!TPC) 727 report_fatal_error("TargetMachine is required"); 728 729 TM = &TPC->getTM<TargetMachine>(); 730 return false; 731 } 732 733 bool runOnModule(Module &M) override { 734 SetVector<Function *> Functions; 735 AnalysisGetter AG; 736 for (Function &F : M) { 737 if (!F.isIntrinsic()) 738 Functions.insert(&F); 739 } 740 741 CallGraphUpdater CGUpdater; 742 BumpPtrAllocator Allocator; 743 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 744 DenseSet<const char *> Allowed( 745 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, 746 &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID}); 747 748 AttributorConfig AC(CGUpdater); 749 AC.Allowed = &Allowed; 750 AC.IsModulePass = true; 751 AC.DefaultInitializeLiveInternals = false; 752 753 Attributor A(Functions, InfoCache, AC); 754 755 for (Function &F : M) { 756 if (!F.isIntrinsic()) { 757 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 758 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F)); 759 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) { 760 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F)); 761 } 762 } 763 } 764 765 ChangeStatus Change = A.run(); 766 return Change == ChangeStatus::CHANGED; 767 } 768 769 StringRef getPassName() const override { return "AMDGPU Attributor"; } 770 TargetMachine *TM; 771 static char ID; 772 }; 773 } // namespace 774 775 char AMDGPUAttributor::ID = 0; 776 777 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 778 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) 779