1 //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "GCNSubtarget.h" 15 #include "Utils/AMDGPUBaseInfo.h" 16 #include "llvm/CodeGen/TargetPassConfig.h" 17 #include "llvm/IR/IntrinsicsAMDGPU.h" 18 #include "llvm/IR/IntrinsicsR600.h" 19 #include "llvm/Target/TargetMachine.h" 20 #include "llvm/Transforms/IPO/Attributor.h" 21 22 #define DEBUG_TYPE "amdgpu-attributor" 23 24 using namespace llvm; 25 26 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS, 27 28 enum ImplicitArgumentPositions { 29 #include "AMDGPUAttributes.def" 30 LAST_ARG_POS 31 }; 32 33 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS, 34 35 enum ImplicitArgumentMask { 36 NOT_IMPLICIT_INPUT = 0, 37 #include "AMDGPUAttributes.def" 38 ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1 39 }; 40 41 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str}, 42 static constexpr std::pair<ImplicitArgumentMask, 43 StringLiteral> ImplicitAttrs[] = { 44 #include "AMDGPUAttributes.def" 45 }; 46 47 // We do not need to note the x workitem or workgroup id because they are always 48 // initialized. 49 // 50 // TODO: We should not add the attributes if the known compile time workgroup 51 // size is 1 for y/z. 52 static ImplicitArgumentMask 53 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) { 54 switch (ID) { 55 case Intrinsic::amdgcn_workitem_id_x: 56 NonKernelOnly = true; 57 return WORKITEM_ID_X; 58 case Intrinsic::amdgcn_workgroup_id_x: 59 NonKernelOnly = true; 60 return WORKGROUP_ID_X; 61 case Intrinsic::amdgcn_workitem_id_y: 62 case Intrinsic::r600_read_tidig_y: 63 return WORKITEM_ID_Y; 64 case Intrinsic::amdgcn_workitem_id_z: 65 case Intrinsic::r600_read_tidig_z: 66 return WORKITEM_ID_Z; 67 case Intrinsic::amdgcn_workgroup_id_y: 68 case Intrinsic::r600_read_tgid_y: 69 return WORKGROUP_ID_Y; 70 case Intrinsic::amdgcn_workgroup_id_z: 71 case Intrinsic::r600_read_tgid_z: 72 return WORKGROUP_ID_Z; 73 case Intrinsic::amdgcn_dispatch_ptr: 74 return DISPATCH_PTR; 75 case Intrinsic::amdgcn_dispatch_id: 76 return DISPATCH_ID; 77 case Intrinsic::amdgcn_implicitarg_ptr: 78 return IMPLICIT_ARG_PTR; 79 case Intrinsic::amdgcn_queue_ptr: 80 case Intrinsic::amdgcn_is_shared: 81 case Intrinsic::amdgcn_is_private: 82 // TODO: Does not require the queue pointer on gfx9+ 83 case Intrinsic::trap: 84 case Intrinsic::debugtrap: 85 IsQueuePtr = true; 86 return QUEUE_PTR; 87 default: 88 return NOT_IMPLICIT_INPUT; 89 } 90 } 91 92 static bool castRequiresQueuePtr(unsigned SrcAS) { 93 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 94 } 95 96 static bool isDSAddress(const Constant *C) { 97 const GlobalValue *GV = dyn_cast<GlobalValue>(C); 98 if (!GV) 99 return false; 100 unsigned AS = GV->getAddressSpace(); 101 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 102 } 103 104 /// Returns true if the function requires the implicit argument be passed 105 /// regardless of the function contents. 106 static bool funcRequiresHostcallPtr(const Function &F) { 107 // Sanitizers require the hostcall buffer passed in the implicit arguments. 108 return F.hasFnAttribute(Attribute::SanitizeAddress) || 109 F.hasFnAttribute(Attribute::SanitizeThread) || 110 F.hasFnAttribute(Attribute::SanitizeMemory) || 111 F.hasFnAttribute(Attribute::SanitizeHWAddress) || 112 F.hasFnAttribute(Attribute::SanitizeMemTag); 113 } 114 115 namespace { 116 class AMDGPUInformationCache : public InformationCache { 117 public: 118 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 119 BumpPtrAllocator &Allocator, 120 SetVector<Function *> *CGSCC, TargetMachine &TM) 121 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 122 TargetMachine &TM; 123 124 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 125 126 /// Check if the subtarget has aperture regs. 127 bool hasApertureRegs(Function &F) { 128 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 129 return ST.hasApertureRegs(); 130 } 131 132 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) { 133 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 134 return ST.getFlatWorkGroupSizes(F); 135 } 136 137 std::pair<unsigned, unsigned> 138 getMaximumFlatWorkGroupRange(const Function &F) { 139 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 140 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; 141 } 142 143 private: 144 /// Check if the ConstantExpr \p CE requires the queue pointer. 145 static bool visitConstExpr(const ConstantExpr *CE) { 146 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 147 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 148 return castRequiresQueuePtr(SrcAS); 149 } 150 return false; 151 } 152 153 /// Get the constant access bitmap for \p C. 154 uint8_t getConstantAccess(const Constant *C) { 155 auto It = ConstantStatus.find(C); 156 if (It != ConstantStatus.end()) 157 return It->second; 158 159 uint8_t Result = 0; 160 if (isDSAddress(C)) 161 Result = DS_GLOBAL; 162 163 if (const auto *CE = dyn_cast<ConstantExpr>(C)) 164 if (visitConstExpr(CE)) 165 Result |= ADDR_SPACE_CAST; 166 167 for (const Use &U : C->operands()) { 168 const auto *OpC = dyn_cast<Constant>(U); 169 if (!OpC) 170 continue; 171 172 Result |= getConstantAccess(OpC); 173 } 174 return Result; 175 } 176 177 public: 178 /// Returns true if \p Fn needs the queue pointer because of \p C. 179 bool needsQueuePtr(const Constant *C, Function &Fn) { 180 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 181 bool HasAperture = hasApertureRegs(Fn); 182 183 // No need to explore the constants. 184 if (!IsNonEntryFunc && HasAperture) 185 return false; 186 187 uint8_t Access = getConstantAccess(C); 188 189 // We need to trap on DS globals in non-entry functions. 190 if (IsNonEntryFunc && (Access & DS_GLOBAL)) 191 return true; 192 193 return !HasAperture && (Access & ADDR_SPACE_CAST); 194 } 195 196 private: 197 /// Used to determine if the Constant needs the queue pointer. 198 DenseMap<const Constant *, uint8_t> ConstantStatus; 199 }; 200 201 struct AAAMDAttributes : public StateWrapper< 202 BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> { 203 using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, 204 AbstractAttribute>; 205 206 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 207 208 /// Create an abstract attribute view for the position \p IRP. 209 static AAAMDAttributes &createForPosition(const IRPosition &IRP, 210 Attributor &A); 211 212 /// See AbstractAttribute::getName(). 213 const std::string getName() const override { return "AAAMDAttributes"; } 214 215 /// See AbstractAttribute::getIdAddr(). 216 const char *getIdAddr() const override { return &ID; } 217 218 /// This function should return true if the type of the \p AA is 219 /// AAAMDAttributes. 220 static bool classof(const AbstractAttribute *AA) { 221 return (AA->getIdAddr() == &ID); 222 } 223 224 /// Unique ID (due to the unique address) 225 static const char ID; 226 }; 227 const char AAAMDAttributes::ID = 0; 228 229 struct AAUniformWorkGroupSize 230 : public StateWrapper<BooleanState, AbstractAttribute> { 231 using Base = StateWrapper<BooleanState, AbstractAttribute>; 232 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 233 234 /// Create an abstract attribute view for the position \p IRP. 235 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP, 236 Attributor &A); 237 238 /// See AbstractAttribute::getName(). 239 const std::string getName() const override { 240 return "AAUniformWorkGroupSize"; 241 } 242 243 /// See AbstractAttribute::getIdAddr(). 244 const char *getIdAddr() const override { return &ID; } 245 246 /// This function should return true if the type of the \p AA is 247 /// AAAMDAttributes. 248 static bool classof(const AbstractAttribute *AA) { 249 return (AA->getIdAddr() == &ID); 250 } 251 252 /// Unique ID (due to the unique address) 253 static const char ID; 254 }; 255 const char AAUniformWorkGroupSize::ID = 0; 256 257 struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { 258 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 259 : AAUniformWorkGroupSize(IRP, A) {} 260 261 void initialize(Attributor &A) override { 262 Function *F = getAssociatedFunction(); 263 CallingConv::ID CC = F->getCallingConv(); 264 265 if (CC != CallingConv::AMDGPU_KERNEL) 266 return; 267 268 bool InitialValue = false; 269 if (F->hasFnAttribute("uniform-work-group-size")) 270 InitialValue = F->getFnAttribute("uniform-work-group-size") 271 .getValueAsString() 272 .equals("true"); 273 274 if (InitialValue) 275 indicateOptimisticFixpoint(); 276 else 277 indicatePessimisticFixpoint(); 278 } 279 280 ChangeStatus updateImpl(Attributor &A) override { 281 ChangeStatus Change = ChangeStatus::UNCHANGED; 282 283 auto CheckCallSite = [&](AbstractCallSite CS) { 284 Function *Caller = CS.getInstruction()->getFunction(); 285 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName() 286 << "->" << getAssociatedFunction()->getName() << "\n"); 287 288 const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>( 289 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 290 291 Change = Change | clampStateAndIndicateChange(this->getState(), 292 CallerInfo.getState()); 293 294 return true; 295 }; 296 297 bool AllCallSitesKnown = true; 298 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 299 return indicatePessimisticFixpoint(); 300 301 return Change; 302 } 303 304 ChangeStatus manifest(Attributor &A) override { 305 SmallVector<Attribute, 8> AttrList; 306 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 307 308 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 309 getAssumed() ? "true" : "false")); 310 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 311 /* ForceReplace */ true); 312 } 313 314 bool isValidState() const override { 315 // This state is always valid, even when the state is false. 316 return true; 317 } 318 319 const std::string getAsStr() const override { 320 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 321 } 322 323 /// See AbstractAttribute::trackStatistics() 324 void trackStatistics() const override {} 325 }; 326 327 AAUniformWorkGroupSize & 328 AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP, 329 Attributor &A) { 330 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 331 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A); 332 llvm_unreachable( 333 "AAUniformWorkGroupSize is only valid for function position"); 334 } 335 336 struct AAAMDAttributesFunction : public AAAMDAttributes { 337 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 338 : AAAMDAttributes(IRP, A) {} 339 340 void initialize(Attributor &A) override { 341 Function *F = getAssociatedFunction(); 342 343 // If the function requires the implicit arg pointer due to sanitizers, 344 // assume it's needed even if explicitly marked as not requiring it. 345 const bool NeedsHostcall = funcRequiresHostcallPtr(*F); 346 if (NeedsHostcall) { 347 removeAssumedBits(IMPLICIT_ARG_PTR); 348 removeAssumedBits(HOSTCALL_PTR); 349 } 350 351 for (auto Attr : ImplicitAttrs) { 352 if (NeedsHostcall && 353 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR)) 354 continue; 355 356 if (F->hasFnAttribute(Attr.second)) 357 addKnownBits(Attr.first); 358 } 359 360 if (F->isDeclaration()) 361 return; 362 363 // Ignore functions with graphics calling conventions, these are currently 364 // not allowed to have kernel arguments. 365 if (AMDGPU::isGraphics(F->getCallingConv())) { 366 indicatePessimisticFixpoint(); 367 return; 368 } 369 } 370 371 ChangeStatus updateImpl(Attributor &A) override { 372 Function *F = getAssociatedFunction(); 373 // The current assumed state used to determine a change. 374 auto OrigAssumed = getAssumed(); 375 376 // Check for Intrinsics and propagate attributes. 377 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 378 *this, this->getIRPosition(), DepClassTy::REQUIRED); 379 if (AAEdges.hasNonAsmUnknownCallee()) 380 return indicatePessimisticFixpoint(); 381 382 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 383 384 bool NeedsQueuePtr = false; 385 386 for (Function *Callee : AAEdges.getOptimisticEdges()) { 387 Intrinsic::ID IID = Callee->getIntrinsicID(); 388 if (IID == Intrinsic::not_intrinsic) { 389 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 390 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 391 *this &= AAAMD; 392 continue; 393 } 394 395 bool NonKernelOnly = false; 396 ImplicitArgumentMask AttrMask = 397 intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr); 398 if (AttrMask != NOT_IMPLICIT_INPUT) { 399 if ((IsNonEntryFunc || !NonKernelOnly)) 400 removeAssumedBits(AttrMask); 401 } 402 } 403 404 if (!NeedsQueuePtr) { 405 NeedsQueuePtr = checkForQueuePtr(A); 406 } 407 408 if (NeedsQueuePtr) { 409 removeAssumedBits(QUEUE_PTR); 410 } 411 412 if (funcRetrievesHostcallPtr(A)) { 413 removeAssumedBits(IMPLICIT_ARG_PTR); 414 removeAssumedBits(HOSTCALL_PTR); 415 } 416 417 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED 418 : ChangeStatus::UNCHANGED; 419 } 420 421 ChangeStatus manifest(Attributor &A) override { 422 SmallVector<Attribute, 8> AttrList; 423 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 424 425 for (auto Attr : ImplicitAttrs) { 426 if (isKnown(Attr.first)) 427 AttrList.push_back(Attribute::get(Ctx, Attr.second)); 428 } 429 430 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 431 /* ForceReplace */ true); 432 } 433 434 const std::string getAsStr() const override { 435 std::string Str; 436 raw_string_ostream OS(Str); 437 OS << "AMDInfo["; 438 for (auto Attr : ImplicitAttrs) 439 OS << ' ' << Attr.second; 440 OS << " ]"; 441 return OS.str(); 442 } 443 444 /// See AbstractAttribute::trackStatistics() 445 void trackStatistics() const override {} 446 447 private: 448 bool checkForQueuePtr(Attributor &A) { 449 Function *F = getAssociatedFunction(); 450 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 451 452 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 453 454 bool NeedsQueuePtr = false; 455 456 auto CheckAddrSpaceCasts = [&](Instruction &I) { 457 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 458 if (castRequiresQueuePtr(SrcAS)) { 459 NeedsQueuePtr = true; 460 return false; 461 } 462 return true; 463 }; 464 465 bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 466 467 // `checkForAllInstructions` is much more cheaper than going through all 468 // instructions, try it first. 469 470 // The queue pointer is not needed if aperture regs is present. 471 if (!HasApertureRegs) { 472 bool UsedAssumedInformation = false; 473 A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 474 {Instruction::AddrSpaceCast}, 475 UsedAssumedInformation); 476 } 477 478 // If we found that we need the queue pointer, nothing else to do. 479 if (NeedsQueuePtr) 480 return true; 481 482 if (!IsNonEntryFunc && HasApertureRegs) 483 return false; 484 485 for (BasicBlock &BB : *F) { 486 for (Instruction &I : BB) { 487 for (const Use &U : I.operands()) { 488 if (const auto *C = dyn_cast<Constant>(U)) { 489 if (InfoCache.needsQueuePtr(C, *F)) 490 return true; 491 } 492 } 493 } 494 } 495 496 return false; 497 } 498 499 bool funcRetrievesHostcallPtr(Attributor &A) { 500 auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(); 501 502 // Check if this is a call to the implicitarg_ptr builtin and it 503 // is used to retrieve the hostcall pointer. The implicit arg for 504 // hostcall is not used only if every use of the implicitarg_ptr 505 // is a load that clearly does not retrieve any byte of the 506 // hostcall pointer. We check this by tracing all the uses of the 507 // initial call to the implicitarg_ptr intrinsic. 508 auto DoesNotLeadToHostcallPtr = [&](Instruction &I) { 509 auto &Call = cast<CallBase>(I); 510 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr) 511 return true; 512 513 const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>( 514 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED); 515 516 AAPointerInfo::OffsetAndSize OAS(Pos, 8); 517 return PointerInfoAA.forallInterferingAccesses( 518 OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) { 519 return Acc.getRemoteInst()->isDroppable(); 520 }); 521 }; 522 523 bool UsedAssumedInformation = false; 524 return !A.checkForAllCallLikeInstructions(DoesNotLeadToHostcallPtr, *this, 525 UsedAssumedInformation); 526 } 527 }; 528 529 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 530 Attributor &A) { 531 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 532 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 533 llvm_unreachable("AAAMDAttributes is only valid for function position"); 534 } 535 536 /// Propagate amdgpu-flat-work-group-size attribute. 537 struct AAAMDFlatWorkGroupSize 538 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { 539 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; 540 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) 541 : Base(IRP, 32) {} 542 543 /// See AbstractAttribute::getState(...). 544 IntegerRangeState &getState() override { return *this; } 545 const IntegerRangeState &getState() const override { return *this; } 546 547 void initialize(Attributor &A) override { 548 Function *F = getAssociatedFunction(); 549 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 550 unsigned MinGroupSize, MaxGroupSize; 551 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); 552 intersectKnown( 553 ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); 554 555 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) 556 indicatePessimisticFixpoint(); 557 } 558 559 ChangeStatus updateImpl(Attributor &A) override { 560 ChangeStatus Change = ChangeStatus::UNCHANGED; 561 562 auto CheckCallSite = [&](AbstractCallSite CS) { 563 Function *Caller = CS.getInstruction()->getFunction(); 564 LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName() 565 << "->" << getAssociatedFunction()->getName() << '\n'); 566 567 const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>( 568 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 569 570 Change |= 571 clampStateAndIndicateChange(this->getState(), CallerInfo.getState()); 572 573 return true; 574 }; 575 576 bool AllCallSitesKnown = true; 577 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 578 return indicatePessimisticFixpoint(); 579 580 return Change; 581 } 582 583 ChangeStatus manifest(Attributor &A) override { 584 SmallVector<Attribute, 8> AttrList; 585 Function *F = getAssociatedFunction(); 586 LLVMContext &Ctx = F->getContext(); 587 588 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 589 unsigned Min, Max; 590 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); 591 592 // Don't add the attribute if it's the implied default. 593 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) 594 return ChangeStatus::UNCHANGED; 595 596 SmallString<10> Buffer; 597 raw_svector_ostream OS(Buffer); 598 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 599 600 AttrList.push_back( 601 Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str())); 602 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 603 /* ForceReplace */ true); 604 } 605 606 const std::string getAsStr() const override { 607 std::string Str; 608 raw_string_ostream OS(Str); 609 OS << "AMDFlatWorkGroupSize["; 610 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 611 OS << ']'; 612 return OS.str(); 613 } 614 615 /// See AbstractAttribute::trackStatistics() 616 void trackStatistics() const override {} 617 618 /// Create an abstract attribute view for the position \p IRP. 619 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP, 620 Attributor &A); 621 622 /// See AbstractAttribute::getName() 623 const std::string getName() const override { 624 return "AAAMDFlatWorkGroupSize"; 625 } 626 627 /// See AbstractAttribute::getIdAddr() 628 const char *getIdAddr() const override { return &ID; } 629 630 /// This function should return true if the type of the \p AA is 631 /// AAAMDFlatWorkGroupSize 632 static bool classof(const AbstractAttribute *AA) { 633 return (AA->getIdAddr() == &ID); 634 } 635 636 /// Unique ID (due to the unique address) 637 static const char ID; 638 }; 639 640 const char AAAMDFlatWorkGroupSize::ID = 0; 641 642 AAAMDFlatWorkGroupSize & 643 AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP, 644 Attributor &A) { 645 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 646 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A); 647 llvm_unreachable( 648 "AAAMDFlatWorkGroupSize is only valid for function position"); 649 } 650 651 class AMDGPUAttributor : public ModulePass { 652 public: 653 AMDGPUAttributor() : ModulePass(ID) {} 654 655 /// doInitialization - Virtual method overridden by subclasses to do 656 /// any necessary initialization before any pass is run. 657 bool doInitialization(Module &) override { 658 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 659 if (!TPC) 660 report_fatal_error("TargetMachine is required"); 661 662 TM = &TPC->getTM<TargetMachine>(); 663 return false; 664 } 665 666 bool runOnModule(Module &M) override { 667 SetVector<Function *> Functions; 668 AnalysisGetter AG; 669 for (Function &F : M) { 670 if (!F.isIntrinsic()) 671 Functions.insert(&F); 672 } 673 674 CallGraphUpdater CGUpdater; 675 BumpPtrAllocator Allocator; 676 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 677 DenseSet<const char *> Allowed( 678 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, 679 &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID}); 680 681 Attributor A(Functions, InfoCache, CGUpdater, &Allowed); 682 683 for (Function &F : M) { 684 if (!F.isIntrinsic()) { 685 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 686 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F)); 687 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) { 688 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F)); 689 } 690 } 691 } 692 693 ChangeStatus Change = A.run(); 694 return Change == ChangeStatus::CHANGED; 695 } 696 697 StringRef getPassName() const override { return "AMDGPU Attributor"; } 698 TargetMachine *TM; 699 static char ID; 700 }; 701 } // namespace 702 703 char AMDGPUAttributor::ID = 0; 704 705 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 706 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) 707