1 //===-- AMDGPUReplaceLDSUseWithPointer.cpp --------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass replaces all the uses of LDS within non-kernel functions by 10 // corresponding pointer counter-parts. 11 // 12 // The main motivation behind this pass is - to *avoid* subsequent LDS lowering 13 // pass from directly packing LDS (assume large LDS) into a struct type which 14 // would otherwise cause allocating huge memory for struct instance within every 15 // kernel. 16 // 17 // Brief sketch of the algorithm implemented in this pass is as below: 18 // 19 // 1. Collect all the LDS defined in the module which qualify for pointer 20 // replacement, say it is, LDSGlobals set. 21 // 22 // 2. Collect all the reachable callees for each kernel defined in the module, 23 // say it is, KernelToCallees map. 24 // 25 // 3. FOR (each global GV from LDSGlobals set) DO 26 // LDSUsedNonKernels = Collect all non-kernel functions which use GV. 27 // FOR (each kernel K in KernelToCallees map) DO 28 // ReachableCallees = KernelToCallees[K] 29 // ReachableAndLDSUsedCallees = 30 // SetIntersect(LDSUsedNonKernels, ReachableCallees) 31 // IF (ReachableAndLDSUsedCallees is not empty) THEN 32 // Pointer = Create a pointer to point-to GV if not created. 33 // Initialize Pointer to point-to GV within kernel K. 34 // ENDIF 35 // ENDFOR 36 // Replace all uses of GV within non kernel functions by Pointer. 37 // ENFOR 38 // 39 // LLVM IR example: 40 // 41 // Input IR: 42 // 43 // @lds = internal addrspace(3) global [4 x i32] undef, align 16 44 // 45 // define internal void @f0() { 46 // entry: 47 // %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds, 48 // i32 0, i32 0 49 // ret void 50 // } 51 // 52 // define protected amdgpu_kernel void @k0() { 53 // entry: 54 // call void @f0() 55 // ret void 56 // } 57 // 58 // Output IR: 59 // 60 // @lds = internal addrspace(3) global [4 x i32] undef, align 16 61 // @lds.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 62 // 63 // define internal void @f0() { 64 // entry: 65 // %0 = load i16, i16 addrspace(3)* @lds.ptr, align 2 66 // %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0 67 // %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)* 68 // %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, 69 // i32 0, i32 0 70 // ret void 71 // } 72 // 73 // define protected amdgpu_kernel void @k0() { 74 // entry: 75 // store i16 ptrtoint ([4 x i32] addrspace(3)* @lds to i16), 76 // i16 addrspace(3)* @lds.ptr, align 2 77 // call void @f0() 78 // ret void 79 // } 80 // 81 //===----------------------------------------------------------------------===// 82 83 #include "AMDGPU.h" 84 #include "GCNSubtarget.h" 85 #include "Utils/AMDGPUBaseInfo.h" 86 #include "Utils/AMDGPUMemoryUtils.h" 87 #include "llvm/ADT/DenseMap.h" 88 #include "llvm/ADT/STLExtras.h" 89 #include "llvm/ADT/SetOperations.h" 90 #include "llvm/Analysis/CallGraph.h" 91 #include "llvm/CodeGen/TargetPassConfig.h" 92 #include "llvm/IR/Constants.h" 93 #include "llvm/IR/DerivedTypes.h" 94 #include "llvm/IR/IRBuilder.h" 95 #include "llvm/IR/InlineAsm.h" 96 #include "llvm/IR/Instructions.h" 97 #include "llvm/IR/IntrinsicsAMDGPU.h" 98 #include "llvm/IR/ReplaceConstant.h" 99 #include "llvm/InitializePasses.h" 100 #include "llvm/Pass.h" 101 #include "llvm/Support/Debug.h" 102 #include "llvm/Target/TargetMachine.h" 103 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 104 #include "llvm/Transforms/Utils/ModuleUtils.h" 105 #include <algorithm> 106 #include <vector> 107 108 #define DEBUG_TYPE "amdgpu-replace-lds-use-with-pointer" 109 110 using namespace llvm; 111 112 namespace { 113 114 namespace AMDGPU { 115 /// Collect all the instructions where user \p U belongs to. \p U could be 116 /// instruction itself or it could be a constant expression which is used within 117 /// an instruction. If \p CollectKernelInsts is true, collect instructions only 118 /// from kernels, otherwise collect instructions only from non-kernel functions. 119 DenseMap<Function *, SmallPtrSet<Instruction *, 8>> 120 getFunctionToInstsMap(User *U, bool CollectKernelInsts); 121 122 SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV); 123 124 } // namespace AMDGPU 125 126 class ReplaceLDSUseImpl { 127 Module &M; 128 LLVMContext &Ctx; 129 const DataLayout &DL; 130 Constant *LDSMemBaseAddr; 131 132 DenseMap<GlobalVariable *, GlobalVariable *> LDSToPointer; 133 DenseMap<GlobalVariable *, SmallPtrSet<Function *, 8>> LDSToNonKernels; 134 DenseMap<Function *, SmallPtrSet<Function *, 8>> KernelToCallees; 135 DenseMap<Function *, SmallPtrSet<GlobalVariable *, 8>> KernelToLDSPointers; 136 DenseMap<Function *, BasicBlock *> KernelToInitBB; 137 DenseMap<Function *, DenseMap<GlobalVariable *, Value *>> 138 FunctionToLDSToReplaceInst; 139 140 // Collect LDS which requires their uses to be replaced by pointer. 141 std::vector<GlobalVariable *> collectLDSRequiringPointerReplace() { 142 // Collect LDS which requires module lowering. 143 std::vector<GlobalVariable *> LDSGlobals = 144 llvm::AMDGPU::findVariablesToLower(M); 145 146 // Remove LDS which don't qualify for replacement. 147 llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) { 148 return shouldIgnorePointerReplacement(GV); 149 }); 150 151 return LDSGlobals; 152 } 153 154 // Returns true if uses of given LDS global within non-kernel functions should 155 // be keep as it is without pointer replacement. 156 bool shouldIgnorePointerReplacement(GlobalVariable *GV) { 157 // LDS whose size is very small and doesn't exceed pointer size is not worth 158 // replacing. 159 if (DL.getTypeAllocSize(GV->getValueType()) <= 2) 160 return true; 161 162 // LDS which is not used from non-kernel function scope or it is used from 163 // global scope does not qualify for replacement. 164 LDSToNonKernels[GV] = AMDGPU::collectNonKernelAccessorsOfLDS(GV); 165 return LDSToNonKernels[GV].empty(); 166 167 // FIXME: When GV is used within all (or within most of the kernels), then 168 // it does not make sense to create a pointer for it. 169 } 170 171 // Insert new global LDS pointer which points to LDS. 172 GlobalVariable *createLDSPointer(GlobalVariable *GV) { 173 // LDS pointer which points to LDS is already created? Return it. 174 auto PointerEntry = LDSToPointer.insert(std::make_pair(GV, nullptr)); 175 if (!PointerEntry.second) 176 return PointerEntry.first->second; 177 178 // We need to create new LDS pointer which points to LDS. 179 // 180 // Each CU owns at max 64K of LDS memory, so LDS address ranges from 0 to 181 // 2^16 - 1. Hence 16 bit pointer is enough to hold the LDS address. 182 auto *I16Ty = Type::getInt16Ty(Ctx); 183 GlobalVariable *LDSPointer = new GlobalVariable( 184 M, I16Ty, false, GlobalValue::InternalLinkage, UndefValue::get(I16Ty), 185 GV->getName() + Twine(".ptr"), nullptr, GlobalVariable::NotThreadLocal, 186 AMDGPUAS::LOCAL_ADDRESS); 187 188 LDSPointer->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 189 LDSPointer->setAlignment(llvm::AMDGPU::getAlign(DL, LDSPointer)); 190 191 // Mark that an associated LDS pointer is created for LDS. 192 LDSToPointer[GV] = LDSPointer; 193 194 return LDSPointer; 195 } 196 197 // Split entry basic block in such a way that only lane 0 of each wave does 198 // the LDS pointer initialization, and return newly created basic block. 199 BasicBlock *activateLaneZero(Function *K) { 200 // If the entry basic block of kernel K is already split, then return 201 // newly created basic block. 202 auto BasicBlockEntry = KernelToInitBB.insert(std::make_pair(K, nullptr)); 203 if (!BasicBlockEntry.second) 204 return BasicBlockEntry.first->second; 205 206 // Split entry basic block of kernel K. 207 auto *EI = &(*(K->getEntryBlock().getFirstInsertionPt())); 208 IRBuilder<> Builder(EI); 209 210 Value *Mbcnt = 211 Builder.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {}, 212 {Builder.getInt32(-1), Builder.getInt32(0)}); 213 Value *Cond = Builder.CreateICmpEQ(Mbcnt, Builder.getInt32(0)); 214 Instruction *WB = cast<Instruction>( 215 Builder.CreateIntrinsic(Intrinsic::amdgcn_wave_barrier, {}, {})); 216 217 BasicBlock *NBB = SplitBlockAndInsertIfThen(Cond, WB, false)->getParent(); 218 219 // Mark that the entry basic block of kernel K is split. 220 KernelToInitBB[K] = NBB; 221 222 return NBB; 223 } 224 225 // Within given kernel, initialize given LDS pointer to point to given LDS. 226 void initializeLDSPointer(Function *K, GlobalVariable *GV, 227 GlobalVariable *LDSPointer) { 228 // If LDS pointer is already initialized within K, then nothing to do. 229 auto PointerEntry = KernelToLDSPointers.insert( 230 std::make_pair(K, SmallPtrSet<GlobalVariable *, 8>())); 231 if (!PointerEntry.second) 232 if (PointerEntry.first->second.contains(LDSPointer)) 233 return; 234 235 // Insert instructions at EI which initialize LDS pointer to point-to LDS 236 // within kernel K. 237 // 238 // That is, convert pointer type of GV to i16, and then store this converted 239 // i16 value within LDSPointer which is of type i16*. 240 auto *EI = &(*(activateLaneZero(K)->getFirstInsertionPt())); 241 IRBuilder<> Builder(EI); 242 Builder.CreateStore(Builder.CreatePtrToInt(GV, Type::getInt16Ty(Ctx)), 243 LDSPointer); 244 245 // Mark that LDS pointer is initialized within kernel K. 246 KernelToLDSPointers[K].insert(LDSPointer); 247 } 248 249 // We have created an LDS pointer for LDS, and initialized it to point-to LDS 250 // within all relevant kernels. Now replace all the uses of LDS within 251 // non-kernel functions by LDS pointer. 252 void replaceLDSUseByPointer(GlobalVariable *GV, GlobalVariable *LDSPointer) { 253 SmallVector<User *, 8> LDSUsers(GV->users()); 254 for (auto *U : LDSUsers) { 255 // When `U` is a constant expression, it is possible that same constant 256 // expression exists within multiple instructions, and within multiple 257 // non-kernel functions. Collect all those non-kernel functions and all 258 // those instructions within which `U` exist. 259 auto FunctionToInsts = 260 AMDGPU::getFunctionToInstsMap(U, false /*=CollectKernelInsts*/); 261 262 for (const auto &FunctionToInst : FunctionToInsts) { 263 Function *F = FunctionToInst.first; 264 auto &Insts = FunctionToInst.second; 265 for (auto *I : Insts) { 266 // If `U` is a constant expression, then we need to break the 267 // associated instruction into a set of separate instructions by 268 // converting constant expressions into instructions. 269 SmallPtrSet<Instruction *, 8> UserInsts; 270 271 if (U == I) { 272 // `U` is an instruction, conversion from constant expression to 273 // set of instructions is *not* required. 274 UserInsts.insert(I); 275 } else { 276 // `U` is a constant expression, convert it into corresponding set 277 // of instructions. 278 auto *CE = cast<ConstantExpr>(U); 279 convertConstantExprsToInstructions(I, CE, &UserInsts); 280 } 281 282 // Go through all the user instructions, if LDS exist within them as 283 // an operand, then replace it by replace instruction. 284 for (auto *II : UserInsts) { 285 auto *ReplaceInst = getReplacementInst(F, GV, LDSPointer); 286 II->replaceUsesOfWith(GV, ReplaceInst); 287 } 288 } 289 } 290 } 291 } 292 293 // Create a set of replacement instructions which together replace LDS within 294 // non-kernel function F by accessing LDS indirectly using LDS pointer. 295 Value *getReplacementInst(Function *F, GlobalVariable *GV, 296 GlobalVariable *LDSPointer) { 297 // If the instruction which replaces LDS within F is already created, then 298 // return it. 299 auto LDSEntry = FunctionToLDSToReplaceInst.insert( 300 std::make_pair(F, DenseMap<GlobalVariable *, Value *>())); 301 if (!LDSEntry.second) { 302 auto ReplaceInstEntry = 303 LDSEntry.first->second.insert(std::make_pair(GV, nullptr)); 304 if (!ReplaceInstEntry.second) 305 return ReplaceInstEntry.first->second; 306 } 307 308 // Get the instruction insertion point within the beginning of the entry 309 // block of current non-kernel function. 310 auto *EI = &(*(F->getEntryBlock().getFirstInsertionPt())); 311 IRBuilder<> Builder(EI); 312 313 // Insert required set of instructions which replace LDS within F. 314 auto *V = Builder.CreateBitCast( 315 Builder.CreateGEP( 316 Builder.getInt8Ty(), LDSMemBaseAddr, 317 Builder.CreateLoad(LDSPointer->getValueType(), LDSPointer)), 318 GV->getType()); 319 320 // Mark that the replacement instruction which replace LDS within F is 321 // created. 322 FunctionToLDSToReplaceInst[F][GV] = V; 323 324 return V; 325 } 326 327 public: 328 ReplaceLDSUseImpl(Module &M) 329 : M(M), Ctx(M.getContext()), DL(M.getDataLayout()) { 330 LDSMemBaseAddr = Constant::getIntegerValue( 331 PointerType::get(Type::getInt8Ty(M.getContext()), 332 AMDGPUAS::LOCAL_ADDRESS), 333 APInt(32, 0)); 334 } 335 336 // Entry-point function which interface ReplaceLDSUseImpl with outside of the 337 // class. 338 bool replaceLDSUse(); 339 340 private: 341 // For a given LDS from collected LDS globals set, replace its non-kernel 342 // function scope uses by pointer. 343 bool replaceLDSUse(GlobalVariable *GV); 344 }; 345 346 // For given LDS from collected LDS globals set, replace its non-kernel function 347 // scope uses by pointer. 348 bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) { 349 // Holds all those non-kernel functions within which LDS is being accessed. 350 SmallPtrSet<Function *, 8> &LDSAccessors = LDSToNonKernels[GV]; 351 352 // The LDS pointer which points to LDS and replaces all the uses of LDS. 353 GlobalVariable *LDSPointer = nullptr; 354 355 // Traverse through each kernel K, check and if required, initialize the 356 // LDS pointer to point to LDS within K. 357 for (const auto &KernelToCallee : KernelToCallees) { 358 Function *K = KernelToCallee.first; 359 SmallPtrSet<Function *, 8> Callees = KernelToCallee.second; 360 361 // Compute reachable and LDS used callees for kernel K. 362 set_intersect(Callees, LDSAccessors); 363 364 // None of the LDS accessing non-kernel functions are reachable from 365 // kernel K. Hence, no need to initialize LDS pointer within kernel K. 366 if (Callees.empty()) 367 continue; 368 369 // We have found reachable and LDS used callees for kernel K, and we need to 370 // initialize LDS pointer within kernel K, and we need to replace LDS use 371 // within those callees by LDS pointer. 372 // 373 // But, first check if LDS pointer is already created, if not create one. 374 LDSPointer = createLDSPointer(GV); 375 376 // Initialize LDS pointer to point to LDS within kernel K. 377 initializeLDSPointer(K, GV, LDSPointer); 378 } 379 380 // We have not found reachable and LDS used callees for any of the kernels, 381 // and hence we have not created LDS pointer. 382 if (!LDSPointer) 383 return false; 384 385 // We have created an LDS pointer for LDS, and initialized it to point-to LDS 386 // within all relevant kernels. Now replace all the uses of LDS within 387 // non-kernel functions by LDS pointer. 388 replaceLDSUseByPointer(GV, LDSPointer); 389 390 return true; 391 } 392 393 namespace AMDGPU { 394 395 // An helper class for collecting all reachable callees for each kernel defined 396 // within the module. 397 class CollectReachableCallees { 398 Module &M; 399 CallGraph CG; 400 SmallPtrSet<CallGraphNode *, 8> AddressTakenFunctions; 401 402 // Collect all address taken functions within the module. 403 void collectAddressTakenFunctions() { 404 auto *ECNode = CG.getExternalCallingNode(); 405 406 for (const auto &GI : *ECNode) { 407 auto *CGN = GI.second; 408 auto *F = CGN->getFunction(); 409 if (!F || F->isDeclaration() || llvm::AMDGPU::isKernelCC(F)) 410 continue; 411 AddressTakenFunctions.insert(CGN); 412 } 413 } 414 415 // For given kernel, collect all its reachable non-kernel functions. 416 SmallPtrSet<Function *, 8> collectReachableCallees(Function *K) { 417 SmallPtrSet<Function *, 8> ReachableCallees; 418 419 // Call graph node which represents this kernel. 420 auto *KCGN = CG[K]; 421 422 // Go through all call graph nodes reachable from the node representing this 423 // kernel, visit all their call sites, if the call site is direct, add 424 // corresponding callee to reachable callee set, if it is indirect, resolve 425 // the indirect call site to potential reachable callees, add them to 426 // reachable callee set, and repeat the process for the newly added 427 // potential callee nodes. 428 // 429 // FIXME: Need to handle bit-casted function pointers. 430 // 431 SmallVector<CallGraphNode *, 8> CGNStack(depth_first(KCGN)); 432 SmallPtrSet<CallGraphNode *, 8> VisitedCGNodes; 433 while (!CGNStack.empty()) { 434 auto *CGN = CGNStack.pop_back_val(); 435 436 if (!VisitedCGNodes.insert(CGN).second) 437 continue; 438 439 // Ignore call graph node which does not have associated function or 440 // associated function is not a definition. 441 if (!CGN->getFunction() || CGN->getFunction()->isDeclaration()) 442 continue; 443 444 for (const auto &GI : *CGN) { 445 auto *RCB = cast<CallBase>(GI.first.getValue()); 446 auto *RCGN = GI.second; 447 448 if (auto *DCallee = RCGN->getFunction()) { 449 ReachableCallees.insert(DCallee); 450 } else if (RCB->isIndirectCall()) { 451 auto *RCBFTy = RCB->getFunctionType(); 452 for (auto *ACGN : AddressTakenFunctions) { 453 auto *ACallee = ACGN->getFunction(); 454 if (ACallee->getFunctionType() == RCBFTy) { 455 ReachableCallees.insert(ACallee); 456 CGNStack.append(df_begin(ACGN), df_end(ACGN)); 457 } 458 } 459 } 460 } 461 } 462 463 return ReachableCallees; 464 } 465 466 public: 467 explicit CollectReachableCallees(Module &M) : M(M), CG(CallGraph(M)) { 468 // Collect address taken functions. 469 collectAddressTakenFunctions(); 470 } 471 472 void collectReachableCallees( 473 DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) { 474 // Collect reachable callee set for each kernel defined in the module. 475 for (Function &F : M.functions()) { 476 if (!llvm::AMDGPU::isKernelCC(&F)) 477 continue; 478 Function *K = &F; 479 KernelToCallees[K] = collectReachableCallees(K); 480 } 481 } 482 }; 483 484 /// Collect reachable callees for each kernel defined in the module \p M and 485 /// return collected callees at \p KernelToCallees. 486 void collectReachableCallees( 487 Module &M, 488 DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) { 489 CollectReachableCallees CRC{M}; 490 CRC.collectReachableCallees(KernelToCallees); 491 } 492 493 /// For the given LDS global \p GV, visit all its users and collect all 494 /// non-kernel functions within which \p GV is used and return collected list of 495 /// such non-kernel functions. 496 SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV) { 497 SmallPtrSet<Function *, 8> LDSAccessors; 498 SmallVector<User *, 8> UserStack(GV->users()); 499 SmallPtrSet<User *, 8> VisitedUsers; 500 501 while (!UserStack.empty()) { 502 auto *U = UserStack.pop_back_val(); 503 504 // `U` is already visited? continue to next one. 505 if (!VisitedUsers.insert(U).second) 506 continue; 507 508 // `U` is a global variable which is initialized with LDS. Ignore LDS. 509 if (isa<GlobalValue>(U)) 510 return SmallPtrSet<Function *, 8>(); 511 512 // Recursively explore constant users. 513 if (isa<Constant>(U)) { 514 append_range(UserStack, U->users()); 515 continue; 516 } 517 518 // `U` should be an instruction, if it belongs to a non-kernel function F, 519 // then collect F. 520 Function *F = cast<Instruction>(U)->getFunction(); 521 if (!llvm::AMDGPU::isKernelCC(F)) 522 LDSAccessors.insert(F); 523 } 524 525 return LDSAccessors; 526 } 527 528 DenseMap<Function *, SmallPtrSet<Instruction *, 8>> 529 getFunctionToInstsMap(User *U, bool CollectKernelInsts) { 530 DenseMap<Function *, SmallPtrSet<Instruction *, 8>> FunctionToInsts; 531 SmallVector<User *, 8> UserStack; 532 SmallPtrSet<User *, 8> VisitedUsers; 533 534 UserStack.push_back(U); 535 536 while (!UserStack.empty()) { 537 auto *UU = UserStack.pop_back_val(); 538 539 if (!VisitedUsers.insert(UU).second) 540 continue; 541 542 if (isa<GlobalValue>(UU)) 543 continue; 544 545 if (isa<Constant>(UU)) { 546 append_range(UserStack, UU->users()); 547 continue; 548 } 549 550 auto *I = cast<Instruction>(UU); 551 Function *F = I->getFunction(); 552 if (CollectKernelInsts) { 553 if (!llvm::AMDGPU::isKernelCC(F)) { 554 continue; 555 } 556 } else { 557 if (llvm::AMDGPU::isKernelCC(F)) { 558 continue; 559 } 560 } 561 562 FunctionToInsts.insert(std::make_pair(F, SmallPtrSet<Instruction *, 8>())); 563 FunctionToInsts[F].insert(I); 564 } 565 566 return FunctionToInsts; 567 } 568 569 } // namespace AMDGPU 570 571 // Entry-point function which interface ReplaceLDSUseImpl with outside of the 572 // class. 573 bool ReplaceLDSUseImpl::replaceLDSUse() { 574 // Collect LDS which requires their uses to be replaced by pointer. 575 std::vector<GlobalVariable *> LDSGlobals = 576 collectLDSRequiringPointerReplace(); 577 578 // No LDS to pointer-replace. Nothing to do. 579 if (LDSGlobals.empty()) 580 return false; 581 582 // Collect reachable callee set for each kernel defined in the module. 583 AMDGPU::collectReachableCallees(M, KernelToCallees); 584 585 if (KernelToCallees.empty()) { 586 // Either module does not have any kernel definitions, or none of the kernel 587 // has a call to non-kernel functions, or we could not resolve any of the 588 // call sites to proper non-kernel functions, because of the situations like 589 // inline asm calls. Nothing to replace. 590 return false; 591 } 592 593 // For every LDS from collected LDS globals set, replace its non-kernel 594 // function scope use by pointer. 595 bool Changed = false; 596 for (auto *GV : LDSGlobals) 597 Changed |= replaceLDSUse(GV); 598 599 return Changed; 600 } 601 602 class AMDGPUReplaceLDSUseWithPointer : public ModulePass { 603 public: 604 static char ID; 605 606 AMDGPUReplaceLDSUseWithPointer() : ModulePass(ID) { 607 initializeAMDGPUReplaceLDSUseWithPointerPass( 608 *PassRegistry::getPassRegistry()); 609 } 610 611 bool runOnModule(Module &M) override; 612 613 void getAnalysisUsage(AnalysisUsage &AU) const override { 614 AU.addRequired<TargetPassConfig>(); 615 } 616 }; 617 618 } // namespace 619 620 char AMDGPUReplaceLDSUseWithPointer::ID = 0; 621 char &llvm::AMDGPUReplaceLDSUseWithPointerID = 622 AMDGPUReplaceLDSUseWithPointer::ID; 623 624 INITIALIZE_PASS_BEGIN( 625 AMDGPUReplaceLDSUseWithPointer, DEBUG_TYPE, 626 "Replace within non-kernel function use of LDS with pointer", 627 false /*only look at the cfg*/, false /*analysis pass*/) 628 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 629 INITIALIZE_PASS_END( 630 AMDGPUReplaceLDSUseWithPointer, DEBUG_TYPE, 631 "Replace within non-kernel function use of LDS with pointer", 632 false /*only look at the cfg*/, false /*analysis pass*/) 633 634 bool AMDGPUReplaceLDSUseWithPointer::runOnModule(Module &M) { 635 ReplaceLDSUseImpl LDSUseReplacer{M}; 636 return LDSUseReplacer.replaceLDSUse(); 637 } 638 639 ModulePass *llvm::createAMDGPUReplaceLDSUseWithPointerPass() { 640 return new AMDGPUReplaceLDSUseWithPointer(); 641 } 642 643 PreservedAnalyses 644 AMDGPUReplaceLDSUseWithPointerPass::run(Module &M, ModuleAnalysisManager &AM) { 645 ReplaceLDSUseImpl LDSUseReplacer{M}; 646 LDSUseReplacer.replaceLDSUse(); 647 return PreservedAnalyses::all(); 648 } 649