1 //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "GCNSubtarget.h" 15 #include "llvm/CodeGen/TargetPassConfig.h" 16 #include "llvm/IR/IntrinsicsAMDGPU.h" 17 #include "llvm/IR/IntrinsicsR600.h" 18 #include "llvm/Target/TargetMachine.h" 19 #include "llvm/Transforms/IPO/Attributor.h" 20 21 #define DEBUG_TYPE "amdgpu-attributor" 22 23 using namespace llvm; 24 25 enum ImplicitArgumentMask { 26 NOT_IMPLICIT_INPUT = 0, 27 28 // SGPRs 29 DISPATCH_PTR = 1 << 0, 30 QUEUE_PTR = 1 << 1, 31 DISPATCH_ID = 1 << 2, 32 IMPLICIT_ARG_PTR = 1 << 3, 33 WORKGROUP_ID_X = 1 << 4, 34 WORKGROUP_ID_Y = 1 << 5, 35 WORKGROUP_ID_Z = 1 << 6, 36 37 // VGPRS: 38 WORKITEM_ID_X = 1 << 7, 39 WORKITEM_ID_Y = 1 << 8, 40 WORKITEM_ID_Z = 1 << 9, 41 ALL_ARGUMENT_MASK = (1 << 10) - 1 42 }; 43 44 static constexpr std::pair<ImplicitArgumentMask, 45 StringLiteral> ImplicitAttrs[] = { 46 {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"}, 47 {QUEUE_PTR, "amdgpu-no-queue-ptr"}, 48 {DISPATCH_ID, "amdgpu-no-dispatch-id"}, 49 {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"}, 50 {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"}, 51 {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"}, 52 {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"}, 53 {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"}, 54 {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"}, 55 {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"} 56 }; 57 58 // We do not need to note the x workitem or workgroup id because they are always 59 // initialized. 60 // 61 // TODO: We should not add the attributes if the known compile time workgroup 62 // size is 1 for y/z. 63 static ImplicitArgumentMask 64 intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) { 65 switch (ID) { 66 case Intrinsic::amdgcn_workitem_id_x: 67 NonKernelOnly = true; 68 return WORKITEM_ID_X; 69 case Intrinsic::amdgcn_workgroup_id_x: 70 NonKernelOnly = true; 71 return WORKGROUP_ID_X; 72 case Intrinsic::amdgcn_workitem_id_y: 73 case Intrinsic::r600_read_tidig_y: 74 return WORKITEM_ID_Y; 75 case Intrinsic::amdgcn_workitem_id_z: 76 case Intrinsic::r600_read_tidig_z: 77 return WORKITEM_ID_Z; 78 case Intrinsic::amdgcn_workgroup_id_y: 79 case Intrinsic::r600_read_tgid_y: 80 return WORKGROUP_ID_Y; 81 case Intrinsic::amdgcn_workgroup_id_z: 82 case Intrinsic::r600_read_tgid_z: 83 return WORKGROUP_ID_Z; 84 case Intrinsic::amdgcn_dispatch_ptr: 85 return DISPATCH_PTR; 86 case Intrinsic::amdgcn_dispatch_id: 87 return DISPATCH_ID; 88 case Intrinsic::amdgcn_implicitarg_ptr: 89 return IMPLICIT_ARG_PTR; 90 case Intrinsic::amdgcn_queue_ptr: 91 case Intrinsic::amdgcn_is_shared: 92 case Intrinsic::amdgcn_is_private: 93 // TODO: Does not require queue ptr on gfx9+ 94 case Intrinsic::trap: 95 case Intrinsic::debugtrap: 96 IsQueuePtr = true; 97 return QUEUE_PTR; 98 default: 99 return NOT_IMPLICIT_INPUT; 100 } 101 } 102 103 static bool castRequiresQueuePtr(unsigned SrcAS) { 104 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 105 } 106 107 static bool isDSAddress(const Constant *C) { 108 const GlobalValue *GV = dyn_cast<GlobalValue>(C); 109 if (!GV) 110 return false; 111 unsigned AS = GV->getAddressSpace(); 112 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 113 } 114 115 class AMDGPUInformationCache : public InformationCache { 116 public: 117 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 118 BumpPtrAllocator &Allocator, 119 SetVector<Function *> *CGSCC, TargetMachine &TM) 120 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 121 TargetMachine &TM; 122 123 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 124 125 /// Check if the subtarget has aperture regs. 126 bool hasApertureRegs(Function &F) { 127 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 128 return ST.hasApertureRegs(); 129 } 130 131 private: 132 /// Check if the ConstantExpr \p CE requires queue ptr attribute. 133 static bool visitConstExpr(const ConstantExpr *CE) { 134 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 135 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 136 return castRequiresQueuePtr(SrcAS); 137 } 138 return false; 139 } 140 141 /// Get the constant access bitmap for \p C. 142 uint8_t getConstantAccess(const Constant *C) { 143 auto It = ConstantStatus.find(C); 144 if (It != ConstantStatus.end()) 145 return It->second; 146 147 uint8_t Result = 0; 148 if (isDSAddress(C)) 149 Result = DS_GLOBAL; 150 151 if (const auto *CE = dyn_cast<ConstantExpr>(C)) 152 if (visitConstExpr(CE)) 153 Result |= ADDR_SPACE_CAST; 154 155 for (const Use &U : C->operands()) { 156 const auto *OpC = dyn_cast<Constant>(U); 157 if (!OpC) 158 continue; 159 160 Result |= getConstantAccess(OpC); 161 } 162 return Result; 163 } 164 165 public: 166 /// Returns true if \p Fn needs a queue ptr attribute because of \p C. 167 bool needsQueuePtr(const Constant *C, Function &Fn) { 168 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 169 bool HasAperture = hasApertureRegs(Fn); 170 171 // No need to explore the constants. 172 if (!IsNonEntryFunc && HasAperture) 173 return false; 174 175 uint8_t Access = getConstantAccess(C); 176 177 // We need to trap on DS globals in non-entry functions. 178 if (IsNonEntryFunc && (Access & DS_GLOBAL)) 179 return true; 180 181 return !HasAperture && (Access & ADDR_SPACE_CAST); 182 } 183 184 private: 185 /// Used to determine if the Constant needs a queue ptr attribute. 186 DenseMap<const Constant *, uint8_t> ConstantStatus; 187 }; 188 189 struct AAAMDAttributes : public StateWrapper< 190 BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> { 191 using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, 192 AbstractAttribute>; 193 194 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 195 196 /// Create an abstract attribute view for the position \p IRP. 197 static AAAMDAttributes &createForPosition(const IRPosition &IRP, 198 Attributor &A); 199 200 /// See AbstractAttribute::getName(). 201 const std::string getName() const override { return "AAAMDAttributes"; } 202 203 /// See AbstractAttribute::getIdAddr(). 204 const char *getIdAddr() const override { return &ID; } 205 206 /// This function should return true if the type of the \p AA is 207 /// AAAMDAttributes. 208 static bool classof(const AbstractAttribute *AA) { 209 return (AA->getIdAddr() == &ID); 210 } 211 212 /// Unique ID (due to the unique address) 213 static const char ID; 214 }; 215 const char AAAMDAttributes::ID = 0; 216 217 struct AAAMDWorkGroupSize 218 : public StateWrapper<BooleanState, AbstractAttribute> { 219 using Base = StateWrapper<BooleanState, AbstractAttribute>; 220 AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 221 222 /// Create an abstract attribute view for the position \p IRP. 223 static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP, 224 Attributor &A); 225 226 /// See AbstractAttribute::getName(). 227 const std::string getName() const override { return "AAAMDWorkGroupSize"; } 228 229 /// See AbstractAttribute::getIdAddr(). 230 const char *getIdAddr() const override { return &ID; } 231 232 /// This function should return true if the type of the \p AA is 233 /// AAAMDAttributes. 234 static bool classof(const AbstractAttribute *AA) { 235 return (AA->getIdAddr() == &ID); 236 } 237 238 /// Unique ID (due to the unique address) 239 static const char ID; 240 }; 241 const char AAAMDWorkGroupSize::ID = 0; 242 243 struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize { 244 AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 245 : AAAMDWorkGroupSize(IRP, A) {} 246 247 void initialize(Attributor &A) override { 248 Function *F = getAssociatedFunction(); 249 CallingConv::ID CC = F->getCallingConv(); 250 251 if (CC != CallingConv::AMDGPU_KERNEL) 252 return; 253 254 bool InitialValue = false; 255 if (F->hasFnAttribute("uniform-work-group-size")) 256 InitialValue = F->getFnAttribute("uniform-work-group-size") 257 .getValueAsString() 258 .equals("true"); 259 260 if (InitialValue) 261 indicateOptimisticFixpoint(); 262 else 263 indicatePessimisticFixpoint(); 264 } 265 266 ChangeStatus updateImpl(Attributor &A) override { 267 ChangeStatus Change = ChangeStatus::UNCHANGED; 268 269 auto CheckCallSite = [&](AbstractCallSite CS) { 270 Function *Caller = CS.getInstruction()->getFunction(); 271 LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName() 272 << "->" << getAssociatedFunction()->getName() << "\n"); 273 274 const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>( 275 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 276 277 Change = Change | clampStateAndIndicateChange(this->getState(), 278 CallerInfo.getState()); 279 280 return true; 281 }; 282 283 bool AllCallSitesKnown = true; 284 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 285 indicatePessimisticFixpoint(); 286 287 return Change; 288 } 289 290 ChangeStatus manifest(Attributor &A) override { 291 SmallVector<Attribute, 8> AttrList; 292 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 293 294 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 295 getAssumed() ? "true" : "false")); 296 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 297 /* ForceReplace */ true); 298 } 299 300 bool isValidState() const override { 301 // This state is always valid, even when the state is false. 302 return true; 303 } 304 305 const std::string getAsStr() const override { 306 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 307 } 308 309 /// See AbstractAttribute::trackStatistics() 310 void trackStatistics() const override {} 311 }; 312 313 AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP, 314 Attributor &A) { 315 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 316 return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A); 317 llvm_unreachable("AAAMDWorkGroupSize is only valid for function position"); 318 } 319 320 struct AAAMDAttributesFunction : public AAAMDAttributes { 321 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 322 : AAAMDAttributes(IRP, A) {} 323 324 void initialize(Attributor &A) override { 325 Function *F = getAssociatedFunction(); 326 for (auto Attr : ImplicitAttrs) { 327 if (F->hasFnAttribute(Attr.second)) 328 addKnownBits(Attr.first); 329 } 330 331 if (F->isDeclaration()) 332 return; 333 334 // Ignore functions with graphics calling conventions, these are currently 335 // not allowed to have kernel arguments. 336 if (AMDGPU::isGraphics(F->getCallingConv())) { 337 indicatePessimisticFixpoint(); 338 return; 339 } 340 } 341 342 ChangeStatus updateImpl(Attributor &A) override { 343 Function *F = getAssociatedFunction(); 344 // The current assumed state used to determine a change. 345 auto OrigAssumed = getAssumed(); 346 347 // Check for Intrinsics and propagate attributes. 348 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 349 *this, this->getIRPosition(), DepClassTy::REQUIRED); 350 if (AAEdges.hasNonAsmUnknownCallee()) 351 return indicatePessimisticFixpoint(); 352 353 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 354 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 355 356 bool NeedsQueuePtr = false; 357 358 for (Function *Callee : AAEdges.getOptimisticEdges()) { 359 Intrinsic::ID IID = Callee->getIntrinsicID(); 360 if (IID == Intrinsic::not_intrinsic) { 361 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 362 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 363 *this &= AAAMD; 364 continue; 365 } 366 367 bool NonKernelOnly = false; 368 ImplicitArgumentMask AttrMask = 369 intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr); 370 if (AttrMask != NOT_IMPLICIT_INPUT) { 371 if ((IsNonEntryFunc || !NonKernelOnly)) 372 removeAssumedBits(AttrMask); 373 } 374 } 375 376 // If we found that we need amdgpu-queue-ptr, nothing else to do. 377 if (NeedsQueuePtr) { 378 removeAssumedBits(QUEUE_PTR); 379 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 380 ChangeStatus::UNCHANGED; 381 } 382 383 auto CheckAddrSpaceCasts = [&](Instruction &I) { 384 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 385 if (castRequiresQueuePtr(SrcAS)) { 386 NeedsQueuePtr = true; 387 return false; 388 } 389 return true; 390 }; 391 392 bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 393 394 // `checkForAllInstructions` is much more cheaper than going through all 395 // instructions, try it first. 396 397 // amdgpu-queue-ptr is not needed if aperture regs is present. 398 if (!HasApertureRegs) { 399 bool UsedAssumedInformation = false; 400 A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 401 {Instruction::AddrSpaceCast}, 402 UsedAssumedInformation); 403 } 404 405 // If we found that we need amdgpu-queue-ptr, nothing else to do. 406 if (NeedsQueuePtr) { 407 removeAssumedBits(QUEUE_PTR); 408 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 409 ChangeStatus::UNCHANGED; 410 } 411 412 if (!IsNonEntryFunc && HasApertureRegs) { 413 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 414 ChangeStatus::UNCHANGED; 415 } 416 417 for (BasicBlock &BB : *F) { 418 for (Instruction &I : BB) { 419 for (const Use &U : I.operands()) { 420 if (const auto *C = dyn_cast<Constant>(U)) { 421 if (InfoCache.needsQueuePtr(C, *F)) { 422 removeAssumedBits(QUEUE_PTR); 423 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 424 ChangeStatus::UNCHANGED; 425 } 426 } 427 } 428 } 429 } 430 431 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 432 ChangeStatus::UNCHANGED; 433 } 434 435 ChangeStatus manifest(Attributor &A) override { 436 SmallVector<Attribute, 8> AttrList; 437 LLVMContext &Ctx = getAssociatedFunction()->getContext(); 438 439 for (auto Attr : ImplicitAttrs) { 440 if (isKnown(Attr.first)) 441 AttrList.push_back(Attribute::get(Ctx, Attr.second)); 442 } 443 444 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 445 /* ForceReplace */ true); 446 } 447 448 const std::string getAsStr() const override { 449 std::string Str; 450 raw_string_ostream OS(Str); 451 OS << "AMDInfo["; 452 for (auto Attr : ImplicitAttrs) 453 OS << ' ' << Attr.second; 454 OS << " ]"; 455 return OS.str(); 456 } 457 458 /// See AbstractAttribute::trackStatistics() 459 void trackStatistics() const override {} 460 }; 461 462 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 463 Attributor &A) { 464 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 465 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 466 llvm_unreachable("AAAMDAttributes is only valid for function position"); 467 } 468 469 class AMDGPUAttributor : public ModulePass { 470 public: 471 AMDGPUAttributor() : ModulePass(ID) {} 472 473 /// doInitialization - Virtual method overridden by subclasses to do 474 /// any necessary initialization before any pass is run. 475 bool doInitialization(Module &) override { 476 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 477 if (!TPC) 478 report_fatal_error("TargetMachine is required"); 479 480 TM = &TPC->getTM<TargetMachine>(); 481 return false; 482 } 483 484 bool runOnModule(Module &M) override { 485 SetVector<Function *> Functions; 486 AnalysisGetter AG; 487 for (Function &F : M) { 488 if (!F.isIntrinsic()) 489 Functions.insert(&F); 490 } 491 492 CallGraphUpdater CGUpdater; 493 BumpPtrAllocator Allocator; 494 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 495 DenseSet<const char *> Allowed( 496 {&AAAMDAttributes::ID, &AAAMDWorkGroupSize::ID, &AACallEdges::ID}); 497 498 Attributor A(Functions, InfoCache, CGUpdater, &Allowed); 499 500 for (Function &F : M) { 501 if (!F.isIntrinsic()) { 502 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 503 A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F)); 504 } 505 } 506 507 ChangeStatus Change = A.run(); 508 return Change == ChangeStatus::CHANGED; 509 } 510 511 StringRef getPassName() const override { return "AMDGPU Attributor"; } 512 TargetMachine *TM; 513 static char ID; 514 }; 515 516 char AMDGPUAttributor::ID = 0; 517 518 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 519 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) 520