1 //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "GCNSubtarget.h" 15 #include "llvm/CodeGen/TargetPassConfig.h" 16 #include "llvm/IR/IntrinsicsAMDGPU.h" 17 #include "llvm/IR/IntrinsicsR600.h" 18 #include "llvm/Target/TargetMachine.h" 19 #include "llvm/Transforms/IPO/Attributor.h" 20 21 #define DEBUG_TYPE "amdgpu-attributor" 22 23 using namespace llvm; 24 25 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS, 26 27 enum ImplicitArgumentPositions { 28 #include "AMDGPUAttributes.def" 29 LAST_ARG_POS 30 }; 31 32 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS, 33 34 enum ImplicitArgumentMask { 35 NOT_IMPLICIT_INPUT = 0, 36 #include "AMDGPUAttributes.def" 37 ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1 38 }; 39 40 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str}, 41 static constexpr std::pair<ImplicitArgumentMask, 42 StringLiteral> ImplicitAttrs[] = { 43 #include "AMDGPUAttributes.def" 44 }; 45 46 // We do not need to note the x workitem or workgroup id because they are always 47 // initialized. 48 // 49 // TODO: We should not add the attributes if the known compile time workgroup 50 // size is 1 for y/z. 51 static ImplicitArgumentMask 52 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) { 53 switch (ID) { 54 case Intrinsic::amdgcn_workitem_id_x: 55 NonKernelOnly = true; 56 return WORKITEM_ID_X; 57 case Intrinsic::amdgcn_workgroup_id_x: 58 NonKernelOnly = true; 59 return WORKGROUP_ID_X; 60 case Intrinsic::amdgcn_workitem_id_y: 61 case Intrinsic::r600_read_tidig_y: 62 return WORKITEM_ID_Y; 63 case Intrinsic::amdgcn_workitem_id_z: 64 case Intrinsic::r600_read_tidig_z: 65 return WORKITEM_ID_Z; 66 case Intrinsic::amdgcn_workgroup_id_y: 67 case Intrinsic::r600_read_tgid_y: 68 return WORKGROUP_ID_Y; 69 case Intrinsic::amdgcn_workgroup_id_z: 70 case Intrinsic::r600_read_tgid_z: 71 return WORKGROUP_ID_Z; 72 case Intrinsic::amdgcn_dispatch_ptr: 73 return DISPATCH_PTR; 74 case Intrinsic::amdgcn_dispatch_id: 75 return DISPATCH_ID; 76 case Intrinsic::amdgcn_implicitarg_ptr: 77 return IMPLICIT_ARG_PTR; 78 case Intrinsic::amdgcn_queue_ptr: 79 case Intrinsic::amdgcn_is_shared: 80 case Intrinsic::amdgcn_is_private: 81 // TODO: Does not require the queue pointer on gfx9+ 82 case Intrinsic::trap: 83 case Intrinsic::debugtrap: 84 IsQueuePtr = true; 85 return QUEUE_PTR; 86 default: 87 return NOT_IMPLICIT_INPUT; 88 } 89 } 90 91 static bool castRequiresQueuePtr(unsigned SrcAS) { 92 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 93 } 94 95 static bool isDSAddress(const Constant *C) { 96 const GlobalValue *GV = dyn_cast<GlobalValue>(C); 97 if (!GV) 98 return false; 99 unsigned AS = GV->getAddressSpace(); 100 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 101 } 102 103 /// Returns true if the function requires the implicit argument be passed 104 /// regardless of the function contents. 105 static bool funcRequiresImplicitArgPtr(const Function &F) { 106 // Sanitizers require the hostcall buffer passed in the implicit arguments. 107 return F.hasFnAttribute(Attribute::SanitizeAddress) || 108 F.hasFnAttribute(Attribute::SanitizeThread) || 109 F.hasFnAttribute(Attribute::SanitizeMemory) || 110 F.hasFnAttribute(Attribute::SanitizeHWAddress) || 111 F.hasFnAttribute(Attribute::SanitizeMemTag); 112 } 113 114 namespace { 115 class AMDGPUInformationCache : public InformationCache { 116 public: 117 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 118 BumpPtrAllocator &Allocator, 119 SetVector<Function *> *CGSCC, TargetMachine &TM) 120 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 121 TargetMachine &TM; 122 123 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 124 125 /// Check if the subtarget has aperture regs. 126 bool hasApertureRegs(Function &F) { 127 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 128 return ST.hasApertureRegs(); 129 } 130 131 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) { 132 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 133 return ST.getFlatWorkGroupSizes(F); 134 } 135 136 std::pair<unsigned, unsigned> 137 getMaximumFlatWorkGroupRange(const Function &F) { 138 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 139 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; 140 } 141 142 private: 143 /// Check if the ConstantExpr \p CE requires the queue pointer. 144 static bool visitConstExpr(const ConstantExpr *CE) { 145 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 146 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 147 return castRequiresQueuePtr(SrcAS); 148 } 149 return false; 150 } 151 152 /// Get the constant access bitmap for \p C. 153 uint8_t getConstantAccess(const Constant *C) { 154 auto It = ConstantStatus.find(C); 155 if (It != ConstantStatus.end()) 156 return It->second; 157 158 uint8_t Result = 0; 159 if (isDSAddress(C)) 160 Result = DS_GLOBAL; 161 162 if (const auto *CE = dyn_cast<ConstantExpr>(C)) 163 if (visitConstExpr(CE)) 164 Result |= ADDR_SPACE_CAST; 165 166 for (const Use &U : C->operands()) { 167 const auto *OpC = dyn_cast<Constant>(U); 168 if (!OpC) 169 continue; 170 171 Result |= getConstantAccess(OpC); 172 } 173 return Result; 174 } 175 176 public: 177 /// Returns true if \p Fn needs the queue pointer because of \p C. 178 bool needsQueuePtr(const Constant *C, Function &Fn) { 179 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 180 bool HasAperture = hasApertureRegs(Fn); 181 182 // No need to explore the constants. 183 if (!IsNonEntryFunc && HasAperture) 184 return false; 185 186 uint8_t Access = getConstantAccess(C); 187 188 // We need to trap on DS globals in non-entry functions. 189 if (IsNonEntryFunc && (Access & DS_GLOBAL)) 190 return true; 191 192 return !HasAperture && (Access & ADDR_SPACE_CAST); 193 } 194 195 private: 196 /// Used to determine if the Constant needs the queue pointer. 197 DenseMap<const Constant *, uint8_t> ConstantStatus; 198 }; 199 200 struct AAAMDAttributes : public StateWrapper< 201 BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> { 202 using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, 203 AbstractAttribute>; 204 205 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 206 207 /// Create an abstract attribute view for the position \p IRP. 208 static AAAMDAttributes &createForPosition(const IRPosition &IRP, 209 Attributor &A); 210 211 /// See AbstractAttribute::getName(). 212 const std::string getName() const override { return "AAAMDAttributes"; } 213 214 /// See AbstractAttribute::getIdAddr(). 215 const char *getIdAddr() const override { return &ID; } 216 217 /// This function should return true if the type of the \p AA is 218 /// AAAMDAttributes. 219 static bool classof(const AbstractAttribute *AA) { 220 return (AA->getIdAddr() == &ID); 221 } 222 223 /// Unique ID (due to the unique address) 224 static const char ID; 225 }; 226 const char AAAMDAttributes::ID = 0; 227 228 struct AAUniformWorkGroupSize 229 : public StateWrapper<BooleanState, AbstractAttribute> { 230 using Base = StateWrapper<BooleanState, AbstractAttribute>; 231 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 232 233 /// Create an abstract attribute view for the position \p IRP. 234 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP, 235 Attributor &A); 236 237 /// See AbstractAttribute::getName(). 238 const std::string getName() const override { 239 return "AAUniformWorkGroupSize"; 240 } 241 242 /// See AbstractAttribute::getIdAddr(). 243 const char *getIdAddr() const override { return &ID; } 244 245 /// This function should return true if the type of the \p AA is 246 /// AAAMDAttributes. 247 static bool classof(const AbstractAttribute *AA) { 248 return (AA->getIdAddr() == &ID); 249 } 250 251 /// Unique ID (due to the unique address) 252 static const char ID; 253 }; 254 const char AAUniformWorkGroupSize::ID = 0; 255 256 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { 257 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 258 : AAUniformWorkGroupSize(IRP, A) {} 259 260 void initialize(Attributor &A) override { 261 Function *F = getAssociatedFunction(); 262 CallingConv::ID CC = F->getCallingConv(); 263 264 if (CC != CallingConv::AMDGPU_KERNEL) 265 return; 266 267 bool InitialValue = false; 268 if (F->hasFnAttribute("uniform-work-group-size")) 269 InitialValue = F->getFnAttribute("uniform-work-group-size") 270 .getValueAsString() 271 .equals("true"); 272 273 if (InitialValue) 274 indicateOptimisticFixpoint(); 275 else 276 indicatePessimisticFixpoint(); 277 } 278 279 ChangeStatus updateImpl(Attributor &A) override { 280 ChangeStatus Change = ChangeStatus::UNCHANGED; 281 282 auto CheckCallSite = [&](AbstractCallSite CS) { 283 Function *Caller = CS.getInstruction()->getFunction(); 284 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName() 285 << "->" << getAssociatedFunction()->getName() << "\n"); 286 287 const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>( 288 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 289 290 Change = Change | clampStateAndIndicateChange(this->getState(), 291 CallerInfo.getState()); 292 293 return true; 294 }; 295 296 bool AllCallSitesKnown = true; 297 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 298 return indicatePessimisticFixpoint(); 299 300 return Change; 301 } 302 303 ChangeStatus manifest(Attributor &A) override { 304 SmallVector<Attribute, 8> AttrList; 305 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 306 307 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 308 getAssumed() ? "true" : "false")); 309 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 310 /* ForceReplace */ true); 311 } 312 313 bool isValidState() const override { 314 // This state is always valid, even when the state is false. 315 return true; 316 } 317 318 const std::string getAsStr() const override { 319 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 320 } 321 322 /// See AbstractAttribute::trackStatistics() 323 void trackStatistics() const override {} 324 }; 325 326 AAUniformWorkGroupSize & 327 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP, 328 Attributor &A) { 329 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 330 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A); 331 llvm_unreachable( 332 "AAUniformWorkGroupSize is only valid for function position"); 333 } 334 335 struct AAAMDAttributesFunction : public AAAMDAttributes { 336 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 337 : AAAMDAttributes(IRP, A) {} 338 339 void initialize(Attributor &A) override { 340 Function *F = getAssociatedFunction(); 341 342 // If the function requires the implicit arg pointer due to sanitizers, 343 // assume it's needed even if explicitly marked as not requiring it. 344 const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F); 345 if (NeedsImplicit) 346 removeAssumedBits(IMPLICIT_ARG_PTR); 347 348 for (auto Attr : ImplicitAttrs) { 349 if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR) 350 continue; 351 352 if (F->hasFnAttribute(Attr.second)) 353 addKnownBits(Attr.first); 354 } 355 356 if (F->isDeclaration()) 357 return; 358 359 // Ignore functions with graphics calling conventions, these are currently 360 // not allowed to have kernel arguments. 361 if (AMDGPU::isGraphics(F->getCallingConv())) { 362 indicatePessimisticFixpoint(); 363 return; 364 } 365 } 366 367 ChangeStatus updateImpl(Attributor &A) override { 368 Function *F = getAssociatedFunction(); 369 // The current assumed state used to determine a change. 370 auto OrigAssumed = getAssumed(); 371 372 // Check for Intrinsics and propagate attributes. 373 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 374 *this, this->getIRPosition(), DepClassTy::REQUIRED); 375 if (AAEdges.hasNonAsmUnknownCallee()) 376 return indicatePessimisticFixpoint(); 377 378 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 379 380 bool NeedsQueuePtr = false; 381 382 for (Function *Callee : AAEdges.getOptimisticEdges()) { 383 Intrinsic::ID IID = Callee->getIntrinsicID(); 384 if (IID == Intrinsic::not_intrinsic) { 385 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 386 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 387 *this &= AAAMD; 388 continue; 389 } 390 391 bool NonKernelOnly = false; 392 ImplicitArgumentMask AttrMask = 393 intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr); 394 if (AttrMask != NOT_IMPLICIT_INPUT) { 395 if ((IsNonEntryFunc || !NonKernelOnly)) 396 removeAssumedBits(AttrMask); 397 } 398 } 399 400 if (!NeedsQueuePtr) { 401 NeedsQueuePtr = checkForQueuePtr(A); 402 } 403 404 if (NeedsQueuePtr) { 405 removeAssumedBits(QUEUE_PTR); 406 } 407 408 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED 409 : ChangeStatus::UNCHANGED; 410 } 411 412 ChangeStatus manifest(Attributor &A) override { 413 SmallVector<Attribute, 8> AttrList; 414 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 415 416 for (auto Attr : ImplicitAttrs) { 417 if (isKnown(Attr.first)) 418 AttrList.push_back(Attribute::get(Ctx, Attr.second)); 419 } 420 421 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 422 /* ForceReplace */ true); 423 } 424 425 const std::string getAsStr() const override { 426 std::string Str; 427 raw_string_ostream OS(Str); 428 OS << "AMDInfo["; 429 for (auto Attr : ImplicitAttrs) 430 OS << ' ' << Attr.second; 431 OS << " ]"; 432 return OS.str(); 433 } 434 435 /// See AbstractAttribute::trackStatistics() 436 void trackStatistics() const override {} 437 438 private: 439 bool checkForQueuePtr(Attributor &A) { 440 Function *F = getAssociatedFunction(); 441 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 442 443 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 444 445 bool NeedsQueuePtr = false; 446 447 auto CheckAddrSpaceCasts = [&](Instruction &I) { 448 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 449 if (castRequiresQueuePtr(SrcAS)) { 450 NeedsQueuePtr = true; 451 return false; 452 } 453 return true; 454 }; 455 456 bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 457 458 // `checkForAllInstructions` is much more cheaper than going through all 459 // instructions, try it first. 460 461 // The queue pointer is not needed if aperture regs is present. 462 if (!HasApertureRegs) { 463 bool UsedAssumedInformation = false; 464 A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 465 {Instruction::AddrSpaceCast}, 466 UsedAssumedInformation); 467 } 468 469 // If we found that we need the queue pointer, nothing else to do. 470 if (NeedsQueuePtr) 471 return true; 472 473 if (!IsNonEntryFunc && HasApertureRegs) 474 return false; 475 476 for (BasicBlock &BB : *F) { 477 for (Instruction &I : BB) { 478 for (const Use &U : I.operands()) { 479 if (const auto *C = dyn_cast<Constant>(U)) { 480 if (InfoCache.needsQueuePtr(C, *F)) 481 return true; 482 } 483 } 484 } 485 } 486 487 return false; 488 } 489 }; 490 491 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 492 Attributor &A) { 493 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 494 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 495 llvm_unreachable("AAAMDAttributes is only valid for function position"); 496 } 497 498 /// Propagate amdgpu-flat-work-group-size attribute. 499 struct AAAMDFlatWorkGroupSize 500 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { 501 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; 502 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) 503 : Base(IRP, 32) {} 504 505 /// See AbstractAttribute::getState(...). 506 IntegerRangeState &getState() override { return *this; } 507 const IntegerRangeState &getState() const override { return *this; } 508 509 void initialize(Attributor &A) override { 510 Function *F = getAssociatedFunction(); 511 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 512 unsigned MinGroupSize, MaxGroupSize; 513 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); 514 intersectKnown( 515 ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); 516 517 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) 518 indicatePessimisticFixpoint(); 519 } 520 521 ChangeStatus updateImpl(Attributor &A) override { 522 ChangeStatus Change = ChangeStatus::UNCHANGED; 523 524 auto CheckCallSite = [&](AbstractCallSite CS) { 525 Function *Caller = CS.getInstruction()->getFunction(); 526 LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName() 527 << "->" << getAssociatedFunction()->getName() << '\n'); 528 529 const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>( 530 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 531 532 Change |= 533 clampStateAndIndicateChange(this->getState(), CallerInfo.getState()); 534 535 return true; 536 }; 537 538 bool AllCallSitesKnown = true; 539 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 540 return indicatePessimisticFixpoint(); 541 542 return Change; 543 } 544 545 ChangeStatus manifest(Attributor &A) override { 546 SmallVector<Attribute, 8> AttrList; 547 Function *F = getAssociatedFunction(); 548 LLVMContext &Ctx = F->getContext(); 549 550 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 551 unsigned Min, Max; 552 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); 553 554 // Don't add the attribute if it's the implied default. 555 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) 556 return ChangeStatus::UNCHANGED; 557 558 SmallString<10> Buffer; 559 raw_svector_ostream OS(Buffer); 560 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 561 562 AttrList.push_back( 563 Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str())); 564 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 565 /* ForceReplace */ true); 566 } 567 568 const std::string getAsStr() const override { 569 std::string Str; 570 raw_string_ostream OS(Str); 571 OS << "AMDFlatWorkGroupSize["; 572 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 573 OS << ']'; 574 return OS.str(); 575 } 576 577 /// See AbstractAttribute::trackStatistics() 578 void trackStatistics() const override {} 579 580 /// Create an abstract attribute view for the position \p IRP. 581 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP, 582 Attributor &A); 583 584 /// See AbstractAttribute::getName() 585 const std::string getName() const override { 586 return "AAAMDFlatWorkGroupSize"; 587 } 588 589 /// See AbstractAttribute::getIdAddr() 590 const char *getIdAddr() const override { return &ID; } 591 592 /// This function should return true if the type of the \p AA is 593 /// AAAMDFlatWorkGroupSize 594 static bool classof(const AbstractAttribute *AA) { 595 return (AA->getIdAddr() == &ID); 596 } 597 598 /// Unique ID (due to the unique address) 599 static const char ID; 600 }; 601 602 const char AAAMDFlatWorkGroupSize::ID = 0; 603 604 AAAMDFlatWorkGroupSize & 605 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP, 606 Attributor &A) { 607 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 608 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A); 609 llvm_unreachable( 610 "AAAMDFlatWorkGroupSize is only valid for function position"); 611 } 612 613 class AMDGPUAttributor : public ModulePass { 614 public: 615 AMDGPUAttributor() : ModulePass(ID) {} 616 617 /// doInitialization - Virtual method overridden by subclasses to do 618 /// any necessary initialization before any pass is run. 619 bool doInitialization(Module &) override { 620 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 621 if (!TPC) 622 report_fatal_error("TargetMachine is required"); 623 624 TM = &TPC->getTM<TargetMachine>(); 625 return false; 626 } 627 628 bool runOnModule(Module &M) override { 629 SetVector<Function *> Functions; 630 AnalysisGetter AG; 631 for (Function &F : M) { 632 if (!F.isIntrinsic()) 633 Functions.insert(&F); 634 } 635 636 CallGraphUpdater CGUpdater; 637 BumpPtrAllocator Allocator; 638 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 639 DenseSet<const char *> Allowed( 640 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, 641 &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID}); 642 643 Attributor A(Functions, InfoCache, CGUpdater, &Allowed); 644 645 for (Function &F : M) { 646 if (!F.isIntrinsic()) { 647 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 648 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F)); 649 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) { 650 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F)); 651 } 652 } 653 } 654 655 ChangeStatus Change = A.run(); 656 return Change == ChangeStatus::CHANGED; 657 } 658 659 StringRef getPassName() const override { return "AMDGPU Attributor"; } 660 TargetMachine *TM; 661 static char ID; 662 }; 663 } // namespace 664 665 char AMDGPUAttributor::ID = 0; 666 667 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 668 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) 669