1 //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "GCNSubtarget.h" 15 #include "llvm/CodeGen/TargetPassConfig.h" 16 #include "llvm/IR/IntrinsicsAMDGPU.h" 17 #include "llvm/IR/IntrinsicsR600.h" 18 #include "llvm/Target/TargetMachine.h" 19 #include "llvm/Transforms/IPO/Attributor.h" 20 21 #define DEBUG_TYPE "amdgpu-attributor" 22 23 using namespace llvm; 24 25 static constexpr StringLiteral ImplicitAttrNames[] = { 26 // X ids unnecessarily propagated to kernels. 27 "amdgpu-work-item-id-x", "amdgpu-work-item-id-y", 28 "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", 29 "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", 30 "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", 31 "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"}; 32 33 // We do not need to note the x workitem or workgroup id because they are always 34 // initialized. 35 // 36 // TODO: We should not add the attributes if the known compile time workgroup 37 // size is 1 for y/z. 38 static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, 39 bool &IsQueuePtr) { 40 switch (ID) { 41 case Intrinsic::amdgcn_workitem_id_x: 42 NonKernelOnly = true; 43 return "amdgpu-work-item-id-x"; 44 case Intrinsic::amdgcn_workgroup_id_x: 45 NonKernelOnly = true; 46 return "amdgpu-work-group-id-x"; 47 case Intrinsic::amdgcn_workitem_id_y: 48 case Intrinsic::r600_read_tidig_y: 49 return "amdgpu-work-item-id-y"; 50 case Intrinsic::amdgcn_workitem_id_z: 51 case Intrinsic::r600_read_tidig_z: 52 return "amdgpu-work-item-id-z"; 53 case Intrinsic::amdgcn_workgroup_id_y: 54 case Intrinsic::r600_read_tgid_y: 55 return "amdgpu-work-group-id-y"; 56 case Intrinsic::amdgcn_workgroup_id_z: 57 case Intrinsic::r600_read_tgid_z: 58 return "amdgpu-work-group-id-z"; 59 case Intrinsic::amdgcn_dispatch_ptr: 60 return "amdgpu-dispatch-ptr"; 61 case Intrinsic::amdgcn_dispatch_id: 62 return "amdgpu-dispatch-id"; 63 case Intrinsic::amdgcn_kernarg_segment_ptr: 64 return "amdgpu-kernarg-segment-ptr"; 65 case Intrinsic::amdgcn_implicitarg_ptr: 66 return "amdgpu-implicitarg-ptr"; 67 case Intrinsic::amdgcn_queue_ptr: 68 case Intrinsic::amdgcn_is_shared: 69 case Intrinsic::amdgcn_is_private: 70 // TODO: Does not require queue ptr on gfx9+ 71 case Intrinsic::trap: 72 case Intrinsic::debugtrap: 73 IsQueuePtr = true; 74 return "amdgpu-queue-ptr"; 75 default: 76 return ""; 77 } 78 } 79 80 static bool castRequiresQueuePtr(unsigned SrcAS) { 81 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 82 } 83 84 static bool isDSAddress(const Constant *C) { 85 const GlobalValue *GV = dyn_cast<GlobalValue>(C); 86 if (!GV) 87 return false; 88 unsigned AS = GV->getAddressSpace(); 89 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 90 } 91 92 class AMDGPUInformationCache : public InformationCache { 93 public: 94 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 95 BumpPtrAllocator &Allocator, 96 SetVector<Function *> *CGSCC, TargetMachine &TM) 97 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 98 TargetMachine &TM; 99 100 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 101 102 /// Check if the subtarget has aperture regs. 103 bool hasApertureRegs(Function &F) { 104 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 105 return ST.hasApertureRegs(); 106 } 107 108 private: 109 /// Check if the ConstantExpr \p CE requires queue ptr attribute. 110 static bool visitConstExpr(const ConstantExpr *CE) { 111 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 112 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 113 return castRequiresQueuePtr(SrcAS); 114 } 115 return false; 116 } 117 118 /// Get the constant access bitmap for \p C. 119 uint8_t getConstantAccess(const Constant *C) { 120 auto It = ConstantStatus.find(C); 121 if (It != ConstantStatus.end()) 122 return It->second; 123 124 uint8_t Result = 0; 125 if (isDSAddress(C)) 126 Result = DS_GLOBAL; 127 128 if (const auto *CE = dyn_cast<ConstantExpr>(C)) 129 if (visitConstExpr(CE)) 130 Result |= ADDR_SPACE_CAST; 131 132 for (const Use &U : C->operands()) { 133 const auto *OpC = dyn_cast<Constant>(U); 134 if (!OpC) 135 continue; 136 137 Result |= getConstantAccess(OpC); 138 } 139 return Result; 140 } 141 142 public: 143 /// Returns true if \p Fn needs a queue ptr attribute because of \p C. 144 bool needsQueuePtr(const Constant *C, Function &Fn) { 145 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 146 bool HasAperture = hasApertureRegs(Fn); 147 148 // No need to explore the constants. 149 if (!IsNonEntryFunc && HasAperture) 150 return false; 151 152 uint8_t Access = getConstantAccess(C); 153 154 // We need to trap on DS globals in non-entry functions. 155 if (IsNonEntryFunc && (Access & DS_GLOBAL)) 156 return true; 157 158 return !HasAperture && (Access & ADDR_SPACE_CAST); 159 } 160 161 private: 162 /// Used to determine if the Constant needs a queue ptr attribute. 163 DenseMap<const Constant *, uint8_t> ConstantStatus; 164 }; 165 166 struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> { 167 using Base = StateWrapper<BooleanState, AbstractAttribute>; 168 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 169 170 /// Create an abstract attribute view for the position \p IRP. 171 static AAAMDAttributes &createForPosition(const IRPosition &IRP, 172 Attributor &A); 173 174 /// See AbstractAttribute::getName(). 175 const std::string getName() const override { return "AAAMDAttributes"; } 176 177 /// See AbstractAttribute::getIdAddr(). 178 const char *getIdAddr() const override { return &ID; } 179 180 /// This function should return true if the type of the \p AA is 181 /// AAAMDAttributes. 182 static bool classof(const AbstractAttribute *AA) { 183 return (AA->getIdAddr() == &ID); 184 } 185 186 virtual const DenseSet<StringRef> &getAttributes() const = 0; 187 188 /// Unique ID (due to the unique address) 189 static const char ID; 190 }; 191 const char AAAMDAttributes::ID = 0; 192 193 struct AAAMDWorkGroupSize 194 : public StateWrapper<BooleanState, AbstractAttribute> { 195 using Base = StateWrapper<BooleanState, AbstractAttribute>; 196 AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 197 198 /// Create an abstract attribute view for the position \p IRP. 199 static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP, 200 Attributor &A); 201 202 /// See AbstractAttribute::getName(). 203 const std::string getName() const override { return "AAAMDWorkGroupSize"; } 204 205 /// See AbstractAttribute::getIdAddr(). 206 const char *getIdAddr() const override { return &ID; } 207 208 /// This function should return true if the type of the \p AA is 209 /// AAAMDAttributes. 210 static bool classof(const AbstractAttribute *AA) { 211 return (AA->getIdAddr() == &ID); 212 } 213 214 /// Unique ID (due to the unique address) 215 static const char ID; 216 }; 217 const char AAAMDWorkGroupSize::ID = 0; 218 219 struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize { 220 AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 221 : AAAMDWorkGroupSize(IRP, A) {} 222 223 void initialize(Attributor &A) override { 224 Function *F = getAssociatedFunction(); 225 CallingConv::ID CC = F->getCallingConv(); 226 227 if (CC != CallingConv::AMDGPU_KERNEL) 228 return; 229 230 bool InitialValue = false; 231 if (F->hasFnAttribute("uniform-work-group-size")) 232 InitialValue = F->getFnAttribute("uniform-work-group-size") 233 .getValueAsString() 234 .equals("true"); 235 236 if (InitialValue) 237 indicateOptimisticFixpoint(); 238 else 239 indicatePessimisticFixpoint(); 240 } 241 242 ChangeStatus updateImpl(Attributor &A) override { 243 ChangeStatus Change = ChangeStatus::UNCHANGED; 244 245 auto CheckCallSite = [&](AbstractCallSite CS) { 246 Function *Caller = CS.getInstruction()->getFunction(); 247 LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName() 248 << "->" << getAssociatedFunction()->getName() << "\n"); 249 250 const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>( 251 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 252 253 Change = Change | clampStateAndIndicateChange(this->getState(), 254 CallerInfo.getState()); 255 256 return true; 257 }; 258 259 bool AllCallSitesKnown = true; 260 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 261 indicatePessimisticFixpoint(); 262 263 return Change; 264 } 265 266 ChangeStatus manifest(Attributor &A) override { 267 SmallVector<Attribute, 8> AttrList; 268 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 269 270 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 271 getAssumed() ? "true" : "false")); 272 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 273 /* ForceReplace */ true); 274 } 275 276 bool isValidState() const override { 277 // This state is always valid, even when the state is false. 278 return true; 279 } 280 281 const std::string getAsStr() const override { 282 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 283 } 284 285 /// See AbstractAttribute::trackStatistics() 286 void trackStatistics() const override {} 287 }; 288 289 AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP, 290 Attributor &A) { 291 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 292 return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A); 293 llvm_unreachable("AAAMDWorkGroupSize is only valid for function position"); 294 } 295 296 struct AAAMDAttributesFunction : public AAAMDAttributes { 297 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 298 : AAAMDAttributes(IRP, A) {} 299 300 void initialize(Attributor &A) override { 301 Function *F = getAssociatedFunction(); 302 CallingConv::ID CC = F->getCallingConv(); 303 bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx); 304 305 // Ignore functions with graphics calling conventions, these are currently 306 // not allowed to have kernel arguments. 307 if (AMDGPU::isGraphics(F->getCallingConv())) { 308 indicatePessimisticFixpoint(); 309 return; 310 } 311 312 for (StringRef Attr : ImplicitAttrNames) { 313 if (F->hasFnAttribute(Attr)) 314 Attributes.insert(Attr); 315 } 316 317 // TODO: We shouldn't need this in the future. 318 if (CallingConvSupportsAllImplicits && 319 F->hasAddressTaken(nullptr, true, true, true)) { 320 for (StringRef AttrName : ImplicitAttrNames) { 321 Attributes.insert(AttrName); 322 } 323 } 324 } 325 326 ChangeStatus updateImpl(Attributor &A) override { 327 Function *F = getAssociatedFunction(); 328 ChangeStatus Change = ChangeStatus::UNCHANGED; 329 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 330 CallingConv::ID CC = F->getCallingConv(); 331 bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx); 332 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 333 334 auto AddAttribute = [&](StringRef AttrName) { 335 if (Attributes.insert(AttrName).second) 336 Change = ChangeStatus::CHANGED; 337 }; 338 339 // Check for Intrinsics and propagate attributes. 340 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 341 *this, this->getIRPosition(), DepClassTy::REQUIRED); 342 343 // We have to assume that we can reach a function with these attributes. 344 // We do not consider inline assembly as a unknown callee. 345 if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) { 346 for (StringRef AttrName : ImplicitAttrNames) { 347 AddAttribute(AttrName); 348 } 349 } 350 351 bool NeedsQueuePtr = false; 352 bool HasCall = false; 353 for (Function *Callee : AAEdges.getOptimisticEdges()) { 354 Intrinsic::ID IID = Callee->getIntrinsicID(); 355 if (IID != Intrinsic::not_intrinsic) { 356 if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) { 357 AddAttribute("amdgpu-kernarg-segment-ptr"); 358 continue; 359 } 360 361 bool NonKernelOnly = false; 362 StringRef AttrName = 363 intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr); 364 365 if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly)) 366 AddAttribute(AttrName); 367 368 continue; 369 } 370 371 HasCall = true; 372 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 373 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 374 const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes(); 375 // Propagate implicit attributes from called function. 376 for (StringRef AttrName : ImplicitAttrNames) 377 if (CalleeAttributes.count(AttrName)) 378 AddAttribute(AttrName); 379 } 380 381 HasCall |= AAEdges.hasUnknownCallee(); 382 if (!IsNonEntryFunc && HasCall) 383 AddAttribute("amdgpu-calls"); 384 385 // Check the function body. 386 auto CheckAlloca = [&](Instruction &I) { 387 AddAttribute("amdgpu-stack-objects"); 388 return false; 389 }; 390 391 bool UsedAssumedInformation = false; 392 A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca}, 393 UsedAssumedInformation); 394 395 // If we found that we need amdgpu-queue-ptr, nothing else to do. 396 if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) { 397 AddAttribute("amdgpu-queue-ptr"); 398 return Change; 399 } 400 401 auto CheckAddrSpaceCasts = [&](Instruction &I) { 402 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 403 if (castRequiresQueuePtr(SrcAS)) { 404 NeedsQueuePtr = true; 405 return false; 406 } 407 return true; 408 }; 409 410 bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 411 412 // `checkForAllInstructions` is much more cheaper than going through all 413 // instructions, try it first. 414 415 // amdgpu-queue-ptr is not needed if aperture regs is present. 416 if (!HasApertureRegs) 417 A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 418 {Instruction::AddrSpaceCast}, 419 UsedAssumedInformation); 420 421 // If we found that we need amdgpu-queue-ptr, nothing else to do. 422 if (NeedsQueuePtr) { 423 AddAttribute("amdgpu-queue-ptr"); 424 return Change; 425 } 426 427 if (!IsNonEntryFunc && HasApertureRegs) 428 return Change; 429 430 for (BasicBlock &BB : *F) { 431 for (Instruction &I : BB) { 432 for (const Use &U : I.operands()) { 433 if (const auto *C = dyn_cast<Constant>(U)) { 434 if (InfoCache.needsQueuePtr(C, *F)) { 435 AddAttribute("amdgpu-queue-ptr"); 436 return Change; 437 } 438 } 439 } 440 } 441 } 442 443 return Change; 444 } 445 446 ChangeStatus manifest(Attributor &A) override { 447 SmallVector<Attribute, 8> AttrList; 448 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 449 450 for (StringRef AttrName : Attributes) 451 AttrList.push_back(Attribute::get(Ctx, AttrName)); 452 453 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 454 /* ForceReplace */ true); 455 } 456 457 const std::string getAsStr() const override { 458 return "AMDInfo[" + std::to_string(Attributes.size()) + "]"; 459 } 460 461 const DenseSet<StringRef> &getAttributes() const override { 462 return Attributes; 463 } 464 465 /// See AbstractAttribute::trackStatistics() 466 void trackStatistics() const override {} 467 468 private: 469 DenseSet<StringRef> Attributes; 470 }; 471 472 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 473 Attributor &A) { 474 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 475 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 476 llvm_unreachable("AAAMDAttributes is only valid for function position"); 477 } 478 479 class AMDGPUAttributor : public ModulePass { 480 public: 481 AMDGPUAttributor() : ModulePass(ID) {} 482 483 /// doInitialization - Virtual method overridden by subclasses to do 484 /// any necessary initialization before any pass is run. 485 bool doInitialization(Module &) override { 486 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 487 if (!TPC) 488 report_fatal_error("TargetMachine is required"); 489 490 TM = &TPC->getTM<TargetMachine>(); 491 return false; 492 } 493 494 bool runOnModule(Module &M) override { 495 SetVector<Function *> Functions; 496 AnalysisGetter AG; 497 for (Function &F : M) { 498 if (!F.isIntrinsic()) 499 Functions.insert(&F); 500 } 501 502 CallGraphUpdater CGUpdater; 503 BumpPtrAllocator Allocator; 504 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 505 Attributor A(Functions, InfoCache, CGUpdater); 506 507 for (Function &F : M) { 508 if (!F.isIntrinsic()) { 509 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 510 A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F)); 511 } 512 } 513 514 ChangeStatus Change = A.run(); 515 return Change == ChangeStatus::CHANGED; 516 } 517 518 StringRef getPassName() const override { return "AMDGPU Attributor"; } 519 TargetMachine *TM; 520 static char ID; 521 }; 522 523 char AMDGPUAttributor::ID = 0; 524 525 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 526 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) 527