1 //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "GCNSubtarget.h" 15 #include "llvm/CodeGen/TargetPassConfig.h" 16 #include "llvm/IR/IntrinsicsAMDGPU.h" 17 #include "llvm/IR/IntrinsicsR600.h" 18 #include "llvm/Target/TargetMachine.h" 19 #include "llvm/Transforms/IPO/Attributor.h" 20 21 #define DEBUG_TYPE "amdgpu-attributor" 22 23 using namespace llvm; 24 25 static constexpr StringLiteral ImplicitAttrNames[] = { 26 // X ids unnecessarily propagated to kernels. 27 "amdgpu-work-item-id-x", "amdgpu-work-item-id-y", 28 "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", 29 "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", 30 "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", 31 "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"}; 32 33 // We do not need to note the x workitem or workgroup id because they are always 34 // initialized. 35 // 36 // TODO: We should not add the attributes if the known compile time workgroup 37 // size is 1 for y/z. 38 static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, 39 bool &IsQueuePtr) { 40 switch (ID) { 41 case Intrinsic::amdgcn_workitem_id_x: 42 NonKernelOnly = true; 43 return "amdgpu-work-item-id-x"; 44 case Intrinsic::amdgcn_workgroup_id_x: 45 NonKernelOnly = true; 46 return "amdgpu-work-group-id-x"; 47 case Intrinsic::amdgcn_workitem_id_y: 48 case Intrinsic::r600_read_tidig_y: 49 return "amdgpu-work-item-id-y"; 50 case Intrinsic::amdgcn_workitem_id_z: 51 case Intrinsic::r600_read_tidig_z: 52 return "amdgpu-work-item-id-z"; 53 case Intrinsic::amdgcn_workgroup_id_y: 54 case Intrinsic::r600_read_tgid_y: 55 return "amdgpu-work-group-id-y"; 56 case Intrinsic::amdgcn_workgroup_id_z: 57 case Intrinsic::r600_read_tgid_z: 58 return "amdgpu-work-group-id-z"; 59 case Intrinsic::amdgcn_dispatch_ptr: 60 return "amdgpu-dispatch-ptr"; 61 case Intrinsic::amdgcn_dispatch_id: 62 return "amdgpu-dispatch-id"; 63 case Intrinsic::amdgcn_implicitarg_ptr: 64 return "amdgpu-implicitarg-ptr"; 65 case Intrinsic::amdgcn_queue_ptr: 66 case Intrinsic::amdgcn_is_shared: 67 case Intrinsic::amdgcn_is_private: 68 // TODO: Does not require queue ptr on gfx9+ 69 case Intrinsic::trap: 70 case Intrinsic::debugtrap: 71 IsQueuePtr = true; 72 return "amdgpu-queue-ptr"; 73 default: 74 return ""; 75 } 76 } 77 78 static bool castRequiresQueuePtr(unsigned SrcAS) { 79 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 80 } 81 82 static bool isDSAddress(const Constant *C) { 83 const GlobalValue *GV = dyn_cast<GlobalValue>(C); 84 if (!GV) 85 return false; 86 unsigned AS = GV->getAddressSpace(); 87 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 88 } 89 90 class AMDGPUInformationCache : public InformationCache { 91 public: 92 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 93 BumpPtrAllocator &Allocator, 94 SetVector<Function *> *CGSCC, TargetMachine &TM) 95 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 96 TargetMachine &TM; 97 98 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 99 100 /// Check if the subtarget has aperture regs. 101 bool hasApertureRegs(Function &F) { 102 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 103 return ST.hasApertureRegs(); 104 } 105 106 private: 107 /// Check if the ConstantExpr \p CE requires queue ptr attribute. 108 static bool visitConstExpr(const ConstantExpr *CE) { 109 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 110 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 111 return castRequiresQueuePtr(SrcAS); 112 } 113 return false; 114 } 115 116 /// Get the constant access bitmap for \p C. 117 uint8_t getConstantAccess(const Constant *C) { 118 auto It = ConstantStatus.find(C); 119 if (It != ConstantStatus.end()) 120 return It->second; 121 122 uint8_t Result = 0; 123 if (isDSAddress(C)) 124 Result = DS_GLOBAL; 125 126 if (const auto *CE = dyn_cast<ConstantExpr>(C)) 127 if (visitConstExpr(CE)) 128 Result |= ADDR_SPACE_CAST; 129 130 for (const Use &U : C->operands()) { 131 const auto *OpC = dyn_cast<Constant>(U); 132 if (!OpC) 133 continue; 134 135 Result |= getConstantAccess(OpC); 136 } 137 return Result; 138 } 139 140 public: 141 /// Returns true if \p Fn needs a queue ptr attribute because of \p C. 142 bool needsQueuePtr(const Constant *C, Function &Fn) { 143 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 144 bool HasAperture = hasApertureRegs(Fn); 145 146 // No need to explore the constants. 147 if (!IsNonEntryFunc && HasAperture) 148 return false; 149 150 uint8_t Access = getConstantAccess(C); 151 152 // We need to trap on DS globals in non-entry functions. 153 if (IsNonEntryFunc && (Access & DS_GLOBAL)) 154 return true; 155 156 return !HasAperture && (Access & ADDR_SPACE_CAST); 157 } 158 159 private: 160 /// Used to determine if the Constant needs a queue ptr attribute. 161 DenseMap<const Constant *, uint8_t> ConstantStatus; 162 }; 163 164 struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> { 165 using Base = StateWrapper<BooleanState, AbstractAttribute>; 166 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 167 168 /// Create an abstract attribute view for the position \p IRP. 169 static AAAMDAttributes &createForPosition(const IRPosition &IRP, 170 Attributor &A); 171 172 /// See AbstractAttribute::getName(). 173 const std::string getName() const override { return "AAAMDAttributes"; } 174 175 /// See AbstractAttribute::getIdAddr(). 176 const char *getIdAddr() const override { return &ID; } 177 178 /// This function should return true if the type of the \p AA is 179 /// AAAMDAttributes. 180 static bool classof(const AbstractAttribute *AA) { 181 return (AA->getIdAddr() == &ID); 182 } 183 184 virtual const DenseSet<StringRef> &getAttributes() const = 0; 185 186 /// Unique ID (due to the unique address) 187 static const char ID; 188 }; 189 const char AAAMDAttributes::ID = 0; 190 191 struct AAAMDWorkGroupSize 192 : public StateWrapper<BooleanState, AbstractAttribute> { 193 using Base = StateWrapper<BooleanState, AbstractAttribute>; 194 AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 195 196 /// Create an abstract attribute view for the position \p IRP. 197 static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP, 198 Attributor &A); 199 200 /// See AbstractAttribute::getName(). 201 const std::string getName() const override { return "AAAMDWorkGroupSize"; } 202 203 /// See AbstractAttribute::getIdAddr(). 204 const char *getIdAddr() const override { return &ID; } 205 206 /// This function should return true if the type of the \p AA is 207 /// AAAMDAttributes. 208 static bool classof(const AbstractAttribute *AA) { 209 return (AA->getIdAddr() == &ID); 210 } 211 212 /// Unique ID (due to the unique address) 213 static const char ID; 214 }; 215 const char AAAMDWorkGroupSize::ID = 0; 216 217 struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize { 218 AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 219 : AAAMDWorkGroupSize(IRP, A) {} 220 221 void initialize(Attributor &A) override { 222 Function *F = getAssociatedFunction(); 223 CallingConv::ID CC = F->getCallingConv(); 224 225 if (CC != CallingConv::AMDGPU_KERNEL) 226 return; 227 228 bool InitialValue = false; 229 if (F->hasFnAttribute("uniform-work-group-size")) 230 InitialValue = F->getFnAttribute("uniform-work-group-size") 231 .getValueAsString() 232 .equals("true"); 233 234 if (InitialValue) 235 indicateOptimisticFixpoint(); 236 else 237 indicatePessimisticFixpoint(); 238 } 239 240 ChangeStatus updateImpl(Attributor &A) override { 241 ChangeStatus Change = ChangeStatus::UNCHANGED; 242 243 auto CheckCallSite = [&](AbstractCallSite CS) { 244 Function *Caller = CS.getInstruction()->getFunction(); 245 LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName() 246 << "->" << getAssociatedFunction()->getName() << "\n"); 247 248 const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>( 249 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 250 251 Change = Change | clampStateAndIndicateChange(this->getState(), 252 CallerInfo.getState()); 253 254 return true; 255 }; 256 257 bool AllCallSitesKnown = true; 258 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 259 indicatePessimisticFixpoint(); 260 261 return Change; 262 } 263 264 ChangeStatus manifest(Attributor &A) override { 265 SmallVector<Attribute, 8> AttrList; 266 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 267 268 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 269 getAssumed() ? "true" : "false")); 270 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 271 /* ForceReplace */ true); 272 } 273 274 bool isValidState() const override { 275 // This state is always valid, even when the state is false. 276 return true; 277 } 278 279 const std::string getAsStr() const override { 280 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 281 } 282 283 /// See AbstractAttribute::trackStatistics() 284 void trackStatistics() const override {} 285 }; 286 287 AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP, 288 Attributor &A) { 289 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 290 return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A); 291 llvm_unreachable("AAAMDWorkGroupSize is only valid for function position"); 292 } 293 294 struct AAAMDAttributesFunction : public AAAMDAttributes { 295 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 296 : AAAMDAttributes(IRP, A) {} 297 298 void initialize(Attributor &A) override { 299 Function *F = getAssociatedFunction(); 300 CallingConv::ID CC = F->getCallingConv(); 301 bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx); 302 303 // Ignore functions with graphics calling conventions, these are currently 304 // not allowed to have kernel arguments. 305 if (AMDGPU::isGraphics(F->getCallingConv())) { 306 indicatePessimisticFixpoint(); 307 return; 308 } 309 310 for (StringRef Attr : ImplicitAttrNames) { 311 if (F->hasFnAttribute(Attr)) 312 Attributes.insert(Attr); 313 } 314 315 // TODO: We shouldn't need this in the future. 316 if (CallingConvSupportsAllImplicits && 317 F->hasAddressTaken(nullptr, true, true, true)) { 318 for (StringRef AttrName : ImplicitAttrNames) { 319 Attributes.insert(AttrName); 320 } 321 } 322 } 323 324 ChangeStatus updateImpl(Attributor &A) override { 325 Function *F = getAssociatedFunction(); 326 ChangeStatus Change = ChangeStatus::UNCHANGED; 327 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 328 CallingConv::ID CC = F->getCallingConv(); 329 bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx); 330 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 331 332 auto AddAttribute = [&](StringRef AttrName) { 333 if (Attributes.insert(AttrName).second) 334 Change = ChangeStatus::CHANGED; 335 }; 336 337 // Check for Intrinsics and propagate attributes. 338 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 339 *this, this->getIRPosition(), DepClassTy::REQUIRED); 340 341 // We have to assume that we can reach a function with these attributes. 342 // We do not consider inline assembly as a unknown callee. 343 if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) { 344 for (StringRef AttrName : ImplicitAttrNames) { 345 AddAttribute(AttrName); 346 } 347 } 348 349 bool NeedsQueuePtr = false; 350 for (Function *Callee : AAEdges.getOptimisticEdges()) { 351 Intrinsic::ID IID = Callee->getIntrinsicID(); 352 if (IID != Intrinsic::not_intrinsic) { 353 bool NonKernelOnly = false; 354 StringRef AttrName = 355 intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr); 356 357 if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly)) 358 AddAttribute(AttrName); 359 360 continue; 361 } 362 363 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 364 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 365 const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes(); 366 // Propagate implicit attributes from called function. 367 for (StringRef AttrName : ImplicitAttrNames) 368 if (CalleeAttributes.count(AttrName)) 369 AddAttribute(AttrName); 370 } 371 372 // If we found that we need amdgpu-queue-ptr, nothing else to do. 373 if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) { 374 AddAttribute("amdgpu-queue-ptr"); 375 return Change; 376 } 377 378 auto CheckAddrSpaceCasts = [&](Instruction &I) { 379 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 380 if (castRequiresQueuePtr(SrcAS)) { 381 NeedsQueuePtr = true; 382 return false; 383 } 384 return true; 385 }; 386 387 bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 388 389 // `checkForAllInstructions` is much more cheaper than going through all 390 // instructions, try it first. 391 392 // amdgpu-queue-ptr is not needed if aperture regs is present. 393 if (!HasApertureRegs) { 394 bool UsedAssumedInformation = false; 395 A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 396 {Instruction::AddrSpaceCast}, 397 UsedAssumedInformation); 398 } 399 400 // If we found that we need amdgpu-queue-ptr, nothing else to do. 401 if (NeedsQueuePtr) { 402 AddAttribute("amdgpu-queue-ptr"); 403 return Change; 404 } 405 406 if (!IsNonEntryFunc && HasApertureRegs) 407 return Change; 408 409 for (BasicBlock &BB : *F) { 410 for (Instruction &I : BB) { 411 for (const Use &U : I.operands()) { 412 if (const auto *C = dyn_cast<Constant>(U)) { 413 if (InfoCache.needsQueuePtr(C, *F)) { 414 AddAttribute("amdgpu-queue-ptr"); 415 return Change; 416 } 417 } 418 } 419 } 420 } 421 422 return Change; 423 } 424 425 ChangeStatus manifest(Attributor &A) override { 426 SmallVector<Attribute, 8> AttrList; 427 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 428 429 for (StringRef AttrName : Attributes) 430 AttrList.push_back(Attribute::get(Ctx, AttrName)); 431 432 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 433 /* ForceReplace */ true); 434 } 435 436 const std::string getAsStr() const override { 437 return "AMDInfo[" + std::to_string(Attributes.size()) + "]"; 438 } 439 440 const DenseSet<StringRef> &getAttributes() const override { 441 return Attributes; 442 } 443 444 /// See AbstractAttribute::trackStatistics() 445 void trackStatistics() const override {} 446 447 private: 448 DenseSet<StringRef> Attributes; 449 }; 450 451 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 452 Attributor &A) { 453 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 454 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 455 llvm_unreachable("AAAMDAttributes is only valid for function position"); 456 } 457 458 class AMDGPUAttributor : public ModulePass { 459 public: 460 AMDGPUAttributor() : ModulePass(ID) {} 461 462 /// doInitialization - Virtual method overridden by subclasses to do 463 /// any necessary initialization before any pass is run. 464 bool doInitialization(Module &) override { 465 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 466 if (!TPC) 467 report_fatal_error("TargetMachine is required"); 468 469 TM = &TPC->getTM<TargetMachine>(); 470 return false; 471 } 472 473 bool runOnModule(Module &M) override { 474 SetVector<Function *> Functions; 475 AnalysisGetter AG; 476 for (Function &F : M) { 477 if (!F.isIntrinsic()) 478 Functions.insert(&F); 479 } 480 481 CallGraphUpdater CGUpdater; 482 BumpPtrAllocator Allocator; 483 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 484 Attributor A(Functions, InfoCache, CGUpdater); 485 486 for (Function &F : M) { 487 if (!F.isIntrinsic()) { 488 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 489 A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F)); 490 } 491 } 492 493 ChangeStatus Change = A.run(); 494 return Change == ChangeStatus::CHANGED; 495 } 496 497 StringRef getPassName() const override { return "AMDGPU Attributor"; } 498 TargetMachine *TM; 499 static char ID; 500 }; 501 502 char AMDGPUAttributor::ID = 0; 503 504 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 505 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) 506