1 //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "GCNSubtarget.h" 15 #include "llvm/CodeGen/TargetPassConfig.h" 16 #include "llvm/IR/IntrinsicsAMDGPU.h" 17 #include "llvm/IR/IntrinsicsR600.h" 18 #include "llvm/Target/TargetMachine.h" 19 #include "llvm/Transforms/IPO/Attributor.h" 20 21 #define DEBUG_TYPE "amdgpu-attributor" 22 23 using namespace llvm; 24 25 enum ImplicitArgumentMask { 26 NOT_IMPLICIT_INPUT = 0, 27 28 // SGPRs 29 DISPATCH_PTR = 1 << 0, 30 QUEUE_PTR = 1 << 1, 31 DISPATCH_ID = 1 << 2, 32 IMPLICIT_ARG_PTR = 1 << 3, 33 WORKGROUP_ID_X = 1 << 4, 34 WORKGROUP_ID_Y = 1 << 5, 35 WORKGROUP_ID_Z = 1 << 6, 36 37 // VGPRS: 38 WORKITEM_ID_X = 1 << 7, 39 WORKITEM_ID_Y = 1 << 8, 40 WORKITEM_ID_Z = 1 << 9, 41 ALL_ARGUMENT_MASK = (1 << 10) - 1 42 }; 43 44 static constexpr std::pair<ImplicitArgumentMask, 45 StringLiteral> ImplicitAttrs[] = { 46 {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"}, 47 {QUEUE_PTR, "amdgpu-no-queue-ptr"}, 48 {DISPATCH_ID, "amdgpu-no-dispatch-id"}, 49 {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"}, 50 {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"}, 51 {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"}, 52 {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"}, 53 {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"}, 54 {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"}, 55 {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"} 56 }; 57 58 // We do not need to note the x workitem or workgroup id because they are always 59 // initialized. 60 // 61 // TODO: We should not add the attributes if the known compile time workgroup 62 // size is 1 for y/z. 63 static ImplicitArgumentMask 64 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) { 65 switch (ID) { 66 case Intrinsic::amdgcn_workitem_id_x: 67 NonKernelOnly = true; 68 return WORKITEM_ID_X; 69 case Intrinsic::amdgcn_workgroup_id_x: 70 NonKernelOnly = true; 71 return WORKGROUP_ID_X; 72 case Intrinsic::amdgcn_workitem_id_y: 73 case Intrinsic::r600_read_tidig_y: 74 return WORKITEM_ID_Y; 75 case Intrinsic::amdgcn_workitem_id_z: 76 case Intrinsic::r600_read_tidig_z: 77 return WORKITEM_ID_Z; 78 case Intrinsic::amdgcn_workgroup_id_y: 79 case Intrinsic::r600_read_tgid_y: 80 return WORKGROUP_ID_Y; 81 case Intrinsic::amdgcn_workgroup_id_z: 82 case Intrinsic::r600_read_tgid_z: 83 return WORKGROUP_ID_Z; 84 case Intrinsic::amdgcn_dispatch_ptr: 85 return DISPATCH_PTR; 86 case Intrinsic::amdgcn_dispatch_id: 87 return DISPATCH_ID; 88 case Intrinsic::amdgcn_implicitarg_ptr: 89 return IMPLICIT_ARG_PTR; 90 case Intrinsic::amdgcn_queue_ptr: 91 case Intrinsic::amdgcn_is_shared: 92 case Intrinsic::amdgcn_is_private: 93 // TODO: Does not require queue ptr on gfx9+ 94 case Intrinsic::trap: 95 case Intrinsic::debugtrap: 96 IsQueuePtr = true; 97 return QUEUE_PTR; 98 default: 99 return NOT_IMPLICIT_INPUT; 100 } 101 } 102 103 static bool castRequiresQueuePtr(unsigned SrcAS) { 104 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 105 } 106 107 static bool isDSAddress(const Constant *C) { 108 const GlobalValue *GV = dyn_cast<GlobalValue>(C); 109 if (!GV) 110 return false; 111 unsigned AS = GV->getAddressSpace(); 112 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 113 } 114 115 class AMDGPUInformationCache : public InformationCache { 116 public: 117 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 118 BumpPtrAllocator &Allocator, 119 SetVector<Function *> *CGSCC, TargetMachine &TM) 120 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 121 TargetMachine &TM; 122 123 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 124 125 /// Check if the subtarget has aperture regs. 126 bool hasApertureRegs(Function &F) { 127 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 128 return ST.hasApertureRegs(); 129 } 130 131 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) { 132 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 133 return ST.getFlatWorkGroupSizes(F); 134 } 135 136 std::pair<unsigned, unsigned> 137 getMaximumFlatWorkGroupRange(const Function &F) { 138 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 139 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; 140 } 141 142 private: 143 /// Check if the ConstantExpr \p CE requires queue ptr attribute. 144 static bool visitConstExpr(const ConstantExpr *CE) { 145 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 146 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 147 return castRequiresQueuePtr(SrcAS); 148 } 149 return false; 150 } 151 152 /// Get the constant access bitmap for \p C. 153 uint8_t getConstantAccess(const Constant *C) { 154 auto It = ConstantStatus.find(C); 155 if (It != ConstantStatus.end()) 156 return It->second; 157 158 uint8_t Result = 0; 159 if (isDSAddress(C)) 160 Result = DS_GLOBAL; 161 162 if (const auto *CE = dyn_cast<ConstantExpr>(C)) 163 if (visitConstExpr(CE)) 164 Result |= ADDR_SPACE_CAST; 165 166 for (const Use &U : C->operands()) { 167 const auto *OpC = dyn_cast<Constant>(U); 168 if (!OpC) 169 continue; 170 171 Result |= getConstantAccess(OpC); 172 } 173 return Result; 174 } 175 176 public: 177 /// Returns true if \p Fn needs a queue ptr attribute because of \p C. 178 bool needsQueuePtr(const Constant *C, Function &Fn) { 179 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 180 bool HasAperture = hasApertureRegs(Fn); 181 182 // No need to explore the constants. 183 if (!IsNonEntryFunc && HasAperture) 184 return false; 185 186 uint8_t Access = getConstantAccess(C); 187 188 // We need to trap on DS globals in non-entry functions. 189 if (IsNonEntryFunc && (Access & DS_GLOBAL)) 190 return true; 191 192 return !HasAperture && (Access & ADDR_SPACE_CAST); 193 } 194 195 private: 196 /// Used to determine if the Constant needs a queue ptr attribute. 197 DenseMap<const Constant *, uint8_t> ConstantStatus; 198 }; 199 200 struct AAAMDAttributes : public StateWrapper< 201 BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> { 202 using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, 203 AbstractAttribute>; 204 205 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 206 207 /// Create an abstract attribute view for the position \p IRP. 208 static AAAMDAttributes &createForPosition(const IRPosition &IRP, 209 Attributor &A); 210 211 /// See AbstractAttribute::getName(). 212 const std::string getName() const override { return "AAAMDAttributes"; } 213 214 /// See AbstractAttribute::getIdAddr(). 215 const char *getIdAddr() const override { return &ID; } 216 217 /// This function should return true if the type of the \p AA is 218 /// AAAMDAttributes. 219 static bool classof(const AbstractAttribute *AA) { 220 return (AA->getIdAddr() == &ID); 221 } 222 223 /// Unique ID (due to the unique address) 224 static const char ID; 225 }; 226 const char AAAMDAttributes::ID = 0; 227 228 struct AAUniformWorkGroupSize 229 : public StateWrapper<BooleanState, AbstractAttribute> { 230 using Base = StateWrapper<BooleanState, AbstractAttribute>; 231 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 232 233 /// Create an abstract attribute view for the position \p IRP. 234 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP, 235 Attributor &A); 236 237 /// See AbstractAttribute::getName(). 238 const std::string getName() const override { 239 return "AAUniformWorkGroupSize"; 240 } 241 242 /// See AbstractAttribute::getIdAddr(). 243 const char *getIdAddr() const override { return &ID; } 244 245 /// This function should return true if the type of the \p AA is 246 /// AAAMDAttributes. 247 static bool classof(const AbstractAttribute *AA) { 248 return (AA->getIdAddr() == &ID); 249 } 250 251 /// Unique ID (due to the unique address) 252 static const char ID; 253 }; 254 const char AAUniformWorkGroupSize::ID = 0; 255 256 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { 257 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 258 : AAUniformWorkGroupSize(IRP, A) {} 259 260 void initialize(Attributor &A) override { 261 Function *F = getAssociatedFunction(); 262 CallingConv::ID CC = F->getCallingConv(); 263 264 if (CC != CallingConv::AMDGPU_KERNEL) 265 return; 266 267 bool InitialValue = false; 268 if (F->hasFnAttribute("uniform-work-group-size")) 269 InitialValue = F->getFnAttribute("uniform-work-group-size") 270 .getValueAsString() 271 .equals("true"); 272 273 if (InitialValue) 274 indicateOptimisticFixpoint(); 275 else 276 indicatePessimisticFixpoint(); 277 } 278 279 ChangeStatus updateImpl(Attributor &A) override { 280 ChangeStatus Change = ChangeStatus::UNCHANGED; 281 282 auto CheckCallSite = [&](AbstractCallSite CS) { 283 Function *Caller = CS.getInstruction()->getFunction(); 284 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName() 285 << "->" << getAssociatedFunction()->getName() << "\n"); 286 287 const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>( 288 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 289 290 Change = Change | clampStateAndIndicateChange(this->getState(), 291 CallerInfo.getState()); 292 293 return true; 294 }; 295 296 bool AllCallSitesKnown = true; 297 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 298 indicatePessimisticFixpoint(); 299 300 return Change; 301 } 302 303 ChangeStatus manifest(Attributor &A) override { 304 SmallVector<Attribute, 8> AttrList; 305 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 306 307 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 308 getAssumed() ? "true" : "false")); 309 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 310 /* ForceReplace */ true); 311 } 312 313 bool isValidState() const override { 314 // This state is always valid, even when the state is false. 315 return true; 316 } 317 318 const std::string getAsStr() const override { 319 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 320 } 321 322 /// See AbstractAttribute::trackStatistics() 323 void trackStatistics() const override {} 324 }; 325 326 AAUniformWorkGroupSize & 327 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP, 328 Attributor &A) { 329 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 330 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A); 331 llvm_unreachable( 332 "AAUniformWorkGroupSize is only valid for function position"); 333 } 334 335 struct AAAMDAttributesFunction : public AAAMDAttributes { 336 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 337 : AAAMDAttributes(IRP, A) {} 338 339 void initialize(Attributor &A) override { 340 Function *F = getAssociatedFunction(); 341 for (auto Attr : ImplicitAttrs) { 342 if (F->hasFnAttribute(Attr.second)) 343 addKnownBits(Attr.first); 344 } 345 346 if (F->isDeclaration()) 347 return; 348 349 // Ignore functions with graphics calling conventions, these are currently 350 // not allowed to have kernel arguments. 351 if (AMDGPU::isGraphics(F->getCallingConv())) { 352 indicatePessimisticFixpoint(); 353 return; 354 } 355 } 356 357 ChangeStatus updateImpl(Attributor &A) override { 358 Function *F = getAssociatedFunction(); 359 // The current assumed state used to determine a change. 360 auto OrigAssumed = getAssumed(); 361 362 // Check for Intrinsics and propagate attributes. 363 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 364 *this, this->getIRPosition(), DepClassTy::REQUIRED); 365 if (AAEdges.hasNonAsmUnknownCallee()) 366 return indicatePessimisticFixpoint(); 367 368 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 369 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 370 371 bool NeedsQueuePtr = false; 372 373 for (Function *Callee : AAEdges.getOptimisticEdges()) { 374 Intrinsic::ID IID = Callee->getIntrinsicID(); 375 if (IID == Intrinsic::not_intrinsic) { 376 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 377 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 378 *this &= AAAMD; 379 continue; 380 } 381 382 bool NonKernelOnly = false; 383 ImplicitArgumentMask AttrMask = 384 intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr); 385 if (AttrMask != NOT_IMPLICIT_INPUT) { 386 if ((IsNonEntryFunc || !NonKernelOnly)) 387 removeAssumedBits(AttrMask); 388 } 389 } 390 391 // If we found that we need amdgpu-queue-ptr, nothing else to do. 392 if (NeedsQueuePtr) { 393 removeAssumedBits(QUEUE_PTR); 394 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 395 ChangeStatus::UNCHANGED; 396 } 397 398 auto CheckAddrSpaceCasts = [&](Instruction &I) { 399 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 400 if (castRequiresQueuePtr(SrcAS)) { 401 NeedsQueuePtr = true; 402 return false; 403 } 404 return true; 405 }; 406 407 bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 408 409 // `checkForAllInstructions` is much more cheaper than going through all 410 // instructions, try it first. 411 412 // amdgpu-queue-ptr is not needed if aperture regs is present. 413 if (!HasApertureRegs) { 414 bool UsedAssumedInformation = false; 415 A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 416 {Instruction::AddrSpaceCast}, 417 UsedAssumedInformation); 418 } 419 420 // If we found that we need amdgpu-queue-ptr, nothing else to do. 421 if (NeedsQueuePtr) { 422 removeAssumedBits(QUEUE_PTR); 423 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 424 ChangeStatus::UNCHANGED; 425 } 426 427 if (!IsNonEntryFunc && HasApertureRegs) { 428 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 429 ChangeStatus::UNCHANGED; 430 } 431 432 for (BasicBlock &BB : *F) { 433 for (Instruction &I : BB) { 434 for (const Use &U : I.operands()) { 435 if (const auto *C = dyn_cast<Constant>(U)) { 436 if (InfoCache.needsQueuePtr(C, *F)) { 437 removeAssumedBits(QUEUE_PTR); 438 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 439 ChangeStatus::UNCHANGED; 440 } 441 } 442 } 443 } 444 } 445 446 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 447 ChangeStatus::UNCHANGED; 448 } 449 450 ChangeStatus manifest(Attributor &A) override { 451 SmallVector<Attribute, 8> AttrList; 452 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 453 454 for (auto Attr : ImplicitAttrs) { 455 if (isKnown(Attr.first)) 456 AttrList.push_back(Attribute::get(Ctx, Attr.second)); 457 } 458 459 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 460 /* ForceReplace */ true); 461 } 462 463 const std::string getAsStr() const override { 464 std::string Str; 465 raw_string_ostream OS(Str); 466 OS << "AMDInfo["; 467 for (auto Attr : ImplicitAttrs) 468 OS << ' ' << Attr.second; 469 OS << " ]"; 470 return OS.str(); 471 } 472 473 /// See AbstractAttribute::trackStatistics() 474 void trackStatistics() const override {} 475 }; 476 477 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 478 Attributor &A) { 479 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 480 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 481 llvm_unreachable("AAAMDAttributes is only valid for function position"); 482 } 483 484 /// Propagate amdgpu-flat-work-group-size attribute. 485 struct AAAMDFlatWorkGroupSize 486 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { 487 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; 488 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) 489 : Base(IRP, 32) {} 490 491 /// See AbstractAttribute::getState(...). 492 IntegerRangeState &getState() override { return *this; } 493 const IntegerRangeState &getState() const override { return *this; } 494 495 void initialize(Attributor &A) override { 496 Function *F = getAssociatedFunction(); 497 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 498 unsigned MinGroupSize, MaxGroupSize; 499 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); 500 intersectKnown( 501 ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); 502 } 503 504 ChangeStatus updateImpl(Attributor &A) override { 505 ChangeStatus Change = ChangeStatus::UNCHANGED; 506 507 auto CheckCallSite = [&](AbstractCallSite CS) { 508 Function *Caller = CS.getInstruction()->getFunction(); 509 LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName() 510 << "->" << getAssociatedFunction()->getName() << '\n'); 511 512 const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>( 513 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 514 515 Change |= 516 clampStateAndIndicateChange(this->getState(), CallerInfo.getState()); 517 518 return true; 519 }; 520 521 bool AllCallSitesKnown = true; 522 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 523 return indicatePessimisticFixpoint(); 524 525 return Change; 526 } 527 528 ChangeStatus manifest(Attributor &A) override { 529 SmallVector<Attribute, 8> AttrList; 530 Function *F = getAssociatedFunction(); 531 LLVMContext &Ctx = F->getContext(); 532 533 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 534 unsigned Min, Max; 535 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); 536 537 // Don't add the attribute if it's the implied default. 538 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) 539 return ChangeStatus::UNCHANGED; 540 541 SmallString<10> Buffer; 542 raw_svector_ostream OS(Buffer); 543 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 544 545 AttrList.push_back( 546 Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str())); 547 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 548 /* ForceReplace */ true); 549 } 550 551 const std::string getAsStr() const override { 552 std::string Str; 553 raw_string_ostream OS(Str); 554 OS << "AMDFlatWorkGroupSize["; 555 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 556 OS << ']'; 557 return OS.str(); 558 } 559 560 /// See AbstractAttribute::trackStatistics() 561 void trackStatistics() const override {} 562 563 /// Create an abstract attribute view for the position \p IRP. 564 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP, 565 Attributor &A); 566 567 /// See AbstractAttribute::getName() 568 const std::string getName() const override { 569 return "AAAMDFlatWorkGroupSize"; 570 } 571 572 /// See AbstractAttribute::getIdAddr() 573 const char *getIdAddr() const override { return &ID; } 574 575 /// This function should return true if the type of the \p AA is 576 /// AAAMDFlatWorkGroupSize 577 static bool classof(const AbstractAttribute *AA) { 578 return (AA->getIdAddr() == &ID); 579 } 580 581 /// Unique ID (due to the unique address) 582 static const char ID; 583 }; 584 585 const char AAAMDFlatWorkGroupSize::ID = 0; 586 587 AAAMDFlatWorkGroupSize & 588 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP, 589 Attributor &A) { 590 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 591 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A); 592 llvm_unreachable( 593 "AAAMDFlatWorkGroupSize is only valid for function position"); 594 } 595 596 class AMDGPUAttributor : public ModulePass { 597 public: 598 AMDGPUAttributor() : ModulePass(ID) {} 599 600 /// doInitialization - Virtual method overridden by subclasses to do 601 /// any necessary initialization before any pass is run. 602 bool doInitialization(Module &) override { 603 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 604 if (!TPC) 605 report_fatal_error("TargetMachine is required"); 606 607 TM = &TPC->getTM<TargetMachine>(); 608 return false; 609 } 610 611 bool runOnModule(Module &M) override { 612 SetVector<Function *> Functions; 613 AnalysisGetter AG; 614 for (Function &F : M) { 615 if (!F.isIntrinsic()) 616 Functions.insert(&F); 617 } 618 619 CallGraphUpdater CGUpdater; 620 BumpPtrAllocator Allocator; 621 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 622 DenseSet<const char *> Allowed( 623 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, 624 &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID}); 625 626 Attributor A(Functions, InfoCache, CGUpdater, &Allowed); 627 628 for (Function &F : M) { 629 if (!F.isIntrinsic()) { 630 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 631 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F)); 632 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) { 633 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F)); 634 } 635 } 636 } 637 638 ChangeStatus Change = A.run(); 639 return Change == ChangeStatus::CHANGED; 640 } 641 642 StringRef getPassName() const override { return "AMDGPU Attributor"; } 643 TargetMachine *TM; 644 static char ID; 645 }; 646 647 char AMDGPUAttributor::ID = 0; 648 649 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 650 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) 651