1 //===-- AMDGPUReplaceLDSUseWithPointer.cpp --------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass replaces all the uses of LDS within non-kernel functions by 10 // corresponding pointer counter-parts. 11 // 12 // The main motivation behind this pass is - to *avoid* subsequent LDS lowering 13 // pass from directly packing LDS (assume large LDS) into a struct type which 14 // would otherwise cause allocating huge memory for struct instance within every 15 // kernel. 16 // 17 // Brief sketch of the algorithm implemented in this pass is as below: 18 // 19 // 1. Collect all the LDS defined in the module which qualify for pointer 20 // replacement, say it is, LDSGlobals set. 21 // 22 // 2. Collect all the reachable callees for each kernel defined in the module, 23 // say it is, KernelToCallees map. 24 // 25 // 3. FOR (each global GV from LDSGlobals set) DO 26 // LDSUsedNonKernels = Collect all non-kernel functions which use GV. 27 // FOR (each kernel K in KernelToCallees map) DO 28 // ReachableCallees = KernelToCallees[K] 29 // ReachableAndLDSUsedCallees = 30 // SetIntersect(LDSUsedNonKernels, ReachableCallees) 31 // IF (ReachableAndLDSUsedCallees is not empty) THEN 32 // Pointer = Create a pointer to point-to GV if not created. 33 // Initialize Pointer to point-to GV within kernel K. 34 // ENDIF 35 // ENDFOR 36 // Replace all uses of GV within non kernel functions by Pointer. 37 // ENFOR 38 // 39 // LLVM IR example: 40 // 41 // Input IR: 42 // 43 // @lds = internal addrspace(3) global [4 x i32] undef, align 16 44 // 45 // define internal void @f0() { 46 // entry: 47 // %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds, 48 // i32 0, i32 0 49 // ret void 50 // } 51 // 52 // define protected amdgpu_kernel void @k0() { 53 // entry: 54 // call void @f0() 55 // ret void 56 // } 57 // 58 // Output IR: 59 // 60 // @lds = internal addrspace(3) global [4 x i32] undef, align 16 61 // @lds.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 62 // 63 // define internal void @f0() { 64 // entry: 65 // %0 = load i16, i16 addrspace(3)* @lds.ptr, align 2 66 // %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0 67 // %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)* 68 // %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, 69 // i32 0, i32 0 70 // ret void 71 // } 72 // 73 // define protected amdgpu_kernel void @k0() { 74 // entry: 75 // store i16 ptrtoint ([4 x i32] addrspace(3)* @lds to i16), 76 // i16 addrspace(3)* @lds.ptr, align 2 77 // call void @f0() 78 // ret void 79 // } 80 // 81 //===----------------------------------------------------------------------===// 82 83 #include "AMDGPU.h" 84 #include "GCNSubtarget.h" 85 #include "Utils/AMDGPUBaseInfo.h" 86 #include "Utils/AMDGPULDSUtils.h" 87 #include "llvm/ADT/DenseMap.h" 88 #include "llvm/ADT/STLExtras.h" 89 #include "llvm/ADT/SetOperations.h" 90 #include "llvm/Analysis/CallGraph.h" 91 #include "llvm/CodeGen/TargetPassConfig.h" 92 #include "llvm/IR/Constants.h" 93 #include "llvm/IR/DerivedTypes.h" 94 #include "llvm/IR/IRBuilder.h" 95 #include "llvm/IR/InlineAsm.h" 96 #include "llvm/IR/Instructions.h" 97 #include "llvm/IR/IntrinsicsAMDGPU.h" 98 #include "llvm/IR/ReplaceConstant.h" 99 #include "llvm/InitializePasses.h" 100 #include "llvm/Pass.h" 101 #include "llvm/Support/Debug.h" 102 #include "llvm/Target/TargetMachine.h" 103 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 104 #include "llvm/Transforms/Utils/ModuleUtils.h" 105 #include <algorithm> 106 #include <vector> 107 108 #define DEBUG_TYPE "amdgpu-replace-lds-use-with-pointer" 109 110 using namespace llvm; 111 112 namespace { 113 114 namespace AMDGPU { 115 /// Collect all the instructions where user \p U belongs to. \p U could be 116 /// instruction itself or it could be a constant expression which is used within 117 /// an instruction. If \p CollectKernelInsts is true, collect instructions only 118 /// from kernels, otherwise collect instructions only from non-kernel functions. 119 DenseMap<Function *, SmallPtrSet<Instruction *, 8>> 120 getFunctionToInstsMap(User *U, bool CollectKernelInsts); 121 122 SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV); 123 124 } // namespace AMDGPU 125 126 class ReplaceLDSUseImpl { 127 Module &M; 128 LLVMContext &Ctx; 129 const DataLayout &DL; 130 Constant *LDSMemBaseAddr; 131 132 DenseMap<GlobalVariable *, GlobalVariable *> LDSToPointer; 133 DenseMap<GlobalVariable *, SmallPtrSet<Function *, 8>> LDSToNonKernels; 134 DenseMap<Function *, SmallPtrSet<Function *, 8>> KernelToCallees; 135 DenseMap<Function *, SmallPtrSet<GlobalVariable *, 8>> KernelToLDSPointers; 136 DenseMap<Function *, BasicBlock *> KernelToInitBB; 137 DenseMap<Function *, DenseMap<GlobalVariable *, Value *>> 138 FunctionToLDSToReplaceInst; 139 140 // Collect LDS which requires their uses to be replaced by pointer. 141 std::vector<GlobalVariable *> collectLDSRequiringPointerReplace() { 142 // Collect LDS which requires module lowering. 143 std::vector<GlobalVariable *> LDSGlobals = 144 llvm::AMDGPU::findVariablesToLower(M); 145 146 // Remove LDS which don't qualify for replacement. 147 llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) { 148 return shouldIgnorePointerReplacement(GV); 149 }); 150 151 return LDSGlobals; 152 } 153 154 // Returns true if uses of given LDS global within non-kernel functions should 155 // be keep as it is without pointer replacement. 156 bool shouldIgnorePointerReplacement(GlobalVariable *GV) { 157 // LDS whose size is very small and doesn't exceed pointer size is not worth 158 // replacing. 159 if (DL.getTypeAllocSize(GV->getValueType()) <= 2) 160 return true; 161 162 // LDS which is not used from non-kernel function scope or it is used from 163 // global scope does not qualify for replacement. 164 LDSToNonKernels[GV] = AMDGPU::collectNonKernelAccessorsOfLDS(GV); 165 return LDSToNonKernels[GV].empty(); 166 167 // FIXME: When GV is used within all (or within most of the kernels), then 168 // it does not make sense to create a pointer for it. 169 } 170 171 // Insert new global LDS pointer which points to LDS. 172 GlobalVariable *createLDSPointer(GlobalVariable *GV) { 173 // LDS pointer which points to LDS is already created? Return it. 174 auto PointerEntry = LDSToPointer.insert(std::make_pair(GV, nullptr)); 175 if (!PointerEntry.second) 176 return PointerEntry.first->second; 177 178 // We need to create new LDS pointer which points to LDS. 179 // 180 // Each CU owns at max 64K of LDS memory, so LDS address ranges from 0 to 181 // 2^16 - 1. Hence 16 bit pointer is enough to hold the LDS address. 182 auto *I16Ty = Type::getInt16Ty(Ctx); 183 GlobalVariable *LDSPointer = new GlobalVariable( 184 M, I16Ty, false, GlobalValue::InternalLinkage, UndefValue::get(I16Ty), 185 GV->getName() + Twine(".ptr"), nullptr, GlobalVariable::NotThreadLocal, 186 AMDGPUAS::LOCAL_ADDRESS); 187 188 LDSPointer->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 189 LDSPointer->setAlignment(llvm::AMDGPU::getAlign(DL, LDSPointer)); 190 191 // Mark that an associated LDS pointer is created for LDS. 192 LDSToPointer[GV] = LDSPointer; 193 194 return LDSPointer; 195 } 196 197 // Split entry basic block in such a way that only lane 0 of each wave does 198 // the LDS pointer initialization, and return newly created basic block. 199 BasicBlock *activateLaneZero(Function *K) { 200 // If the entry basic block of kernel K is already split, then return 201 // newly created basic block. 202 auto BasicBlockEntry = KernelToInitBB.insert(std::make_pair(K, nullptr)); 203 if (!BasicBlockEntry.second) 204 return BasicBlockEntry.first->second; 205 206 // Split entry basic block of kernel K. 207 auto *EI = &(*(K->getEntryBlock().getFirstInsertionPt())); 208 IRBuilder<> Builder(EI); 209 210 Value *Mbcnt = 211 Builder.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {}, 212 {Builder.getInt32(-1), Builder.getInt32(0)}); 213 Value *Cond = Builder.CreateICmpEQ(Mbcnt, Builder.getInt32(0)); 214 Instruction *WB = cast<Instruction>( 215 Builder.CreateIntrinsic(Intrinsic::amdgcn_wave_barrier, {}, {})); 216 217 BasicBlock *NBB = SplitBlockAndInsertIfThen(Cond, WB, false)->getParent(); 218 219 // Mark that the entry basic block of kernel K is split. 220 KernelToInitBB[K] = NBB; 221 222 return NBB; 223 } 224 225 // Within given kernel, initialize given LDS pointer to point to given LDS. 226 void initializeLDSPointer(Function *K, GlobalVariable *GV, 227 GlobalVariable *LDSPointer) { 228 // If LDS pointer is already initialized within K, then nothing to do. 229 auto PointerEntry = KernelToLDSPointers.insert( 230 std::make_pair(K, SmallPtrSet<GlobalVariable *, 8>())); 231 if (!PointerEntry.second) 232 if (PointerEntry.first->second.contains(LDSPointer)) 233 return; 234 235 // Insert instructions at EI which initialize LDS pointer to point-to LDS 236 // within kernel K. 237 // 238 // That is, convert pointer type of GV to i16, and then store this converted 239 // i16 value within LDSPointer which is of type i16*. 240 auto *EI = &(*(activateLaneZero(K)->getFirstInsertionPt())); 241 IRBuilder<> Builder(EI); 242 Builder.CreateStore(Builder.CreatePtrToInt(GV, Type::getInt16Ty(Ctx)), 243 LDSPointer); 244 245 // Mark that LDS pointer is initialized within kernel K. 246 KernelToLDSPointers[K].insert(LDSPointer); 247 } 248 249 // We have created an LDS pointer for LDS, and initialized it to point-to LDS 250 // within all relevant kernels. Now replace all the uses of LDS within 251 // non-kernel functions by LDS pointer. 252 void replaceLDSUseByPointer(GlobalVariable *GV, GlobalVariable *LDSPointer) { 253 SmallVector<User *, 8> LDSUsers(GV->users()); 254 for (auto *U : LDSUsers) { 255 // When `U` is a constant expression, it is possible that same constant 256 // expression exists within multiple instructions, and within multiple 257 // non-kernel functions. Collect all those non-kernel functions and all 258 // those instructions within which `U` exist. 259 auto FunctionToInsts = 260 AMDGPU::getFunctionToInstsMap(U, false /*=CollectKernelInsts*/); 261 262 for (auto FI = FunctionToInsts.begin(), FE = FunctionToInsts.end(); 263 FI != FE; ++FI) { 264 Function *F = FI->first; 265 auto &Insts = FI->second; 266 for (auto *I : Insts) { 267 // If `U` is a constant expression, then we need to break the 268 // associated instruction into a set of separate instructions by 269 // converting constant expressions into instructions. 270 SmallPtrSet<Instruction *, 8> UserInsts; 271 272 if (U == I) { 273 // `U` is an instruction, conversion from constant expression to 274 // set of instructions is *not* required. 275 UserInsts.insert(I); 276 } else { 277 // `U` is a constant expression, convert it into corresponding set 278 // of instructions. 279 auto *CE = cast<ConstantExpr>(U); 280 convertConstantExprsToInstructions(I, CE, &UserInsts); 281 } 282 283 // Go through all the user instructions, if LDS exist within them as 284 // an operand, then replace it by replace instruction. 285 for (auto *II : UserInsts) { 286 auto *ReplaceInst = getReplacementInst(F, GV, LDSPointer); 287 II->replaceUsesOfWith(GV, ReplaceInst); 288 } 289 } 290 } 291 } 292 } 293 294 // Create a set of replacement instructions which together replace LDS within 295 // non-kernel function F by accessing LDS indirectly using LDS pointer. 296 Value *getReplacementInst(Function *F, GlobalVariable *GV, 297 GlobalVariable *LDSPointer) { 298 // If the instruction which replaces LDS within F is already created, then 299 // return it. 300 auto LDSEntry = FunctionToLDSToReplaceInst.insert( 301 std::make_pair(F, DenseMap<GlobalVariable *, Value *>())); 302 if (!LDSEntry.second) { 303 auto ReplaceInstEntry = 304 LDSEntry.first->second.insert(std::make_pair(GV, nullptr)); 305 if (!ReplaceInstEntry.second) 306 return ReplaceInstEntry.first->second; 307 } 308 309 // Get the instruction insertion point within the beginning of the entry 310 // block of current non-kernel function. 311 auto *EI = &(*(F->getEntryBlock().getFirstInsertionPt())); 312 IRBuilder<> Builder(EI); 313 314 // Insert required set of instructions which replace LDS within F. 315 auto *V = Builder.CreateBitCast( 316 Builder.CreateGEP( 317 Builder.getInt8Ty(), LDSMemBaseAddr, 318 Builder.CreateLoad(LDSPointer->getValueType(), LDSPointer)), 319 GV->getType()); 320 321 // Mark that the replacement instruction which replace LDS within F is 322 // created. 323 FunctionToLDSToReplaceInst[F][GV] = V; 324 325 return V; 326 } 327 328 public: 329 ReplaceLDSUseImpl(Module &M) 330 : M(M), Ctx(M.getContext()), DL(M.getDataLayout()) { 331 LDSMemBaseAddr = Constant::getIntegerValue( 332 PointerType::get(Type::getInt8Ty(M.getContext()), 333 AMDGPUAS::LOCAL_ADDRESS), 334 APInt(32, 0)); 335 } 336 337 // Entry-point function which interface ReplaceLDSUseImpl with outside of the 338 // class. 339 bool replaceLDSUse(); 340 341 private: 342 // For a given LDS from collected LDS globals set, replace its non-kernel 343 // function scope uses by pointer. 344 bool replaceLDSUse(GlobalVariable *GV); 345 }; 346 347 // For given LDS from collected LDS globals set, replace its non-kernel function 348 // scope uses by pointer. 349 bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) { 350 // Holds all those non-kernel functions within which LDS is being accessed. 351 SmallPtrSet<Function *, 8> &LDSAccessors = LDSToNonKernels[GV]; 352 353 // The LDS pointer which points to LDS and replaces all the uses of LDS. 354 GlobalVariable *LDSPointer = nullptr; 355 356 // Traverse through each kernel K, check and if required, initialize the 357 // LDS pointer to point to LDS within K. 358 for (auto KI = KernelToCallees.begin(), KE = KernelToCallees.end(); KI != KE; 359 ++KI) { 360 Function *K = KI->first; 361 SmallPtrSet<Function *, 8> Callees = KI->second; 362 363 // Compute reachable and LDS used callees for kernel K. 364 set_intersect(Callees, LDSAccessors); 365 366 // None of the LDS accessing non-kernel functions are reachable from 367 // kernel K. Hence, no need to initialize LDS pointer within kernel K. 368 if (Callees.empty()) 369 continue; 370 371 // We have found reachable and LDS used callees for kernel K, and we need to 372 // initialize LDS pointer within kernel K, and we need to replace LDS use 373 // within those callees by LDS pointer. 374 // 375 // But, first check if LDS pointer is already created, if not create one. 376 LDSPointer = createLDSPointer(GV); 377 378 // Initialize LDS pointer to point to LDS within kernel K. 379 initializeLDSPointer(K, GV, LDSPointer); 380 } 381 382 // We have not found reachable and LDS used callees for any of the kernels, 383 // and hence we have not created LDS pointer. 384 if (!LDSPointer) 385 return false; 386 387 // We have created an LDS pointer for LDS, and initialized it to point-to LDS 388 // within all relevant kernels. Now replace all the uses of LDS within 389 // non-kernel functions by LDS pointer. 390 replaceLDSUseByPointer(GV, LDSPointer); 391 392 return true; 393 } 394 395 namespace AMDGPU { 396 397 // An helper class for collecting all reachable callees for each kernel defined 398 // within the module. 399 class CollectReachableCallees { 400 Module &M; 401 CallGraph CG; 402 SmallPtrSet<CallGraphNode *, 8> AddressTakenFunctions; 403 404 // Collect all address taken functions within the module. 405 void collectAddressTakenFunctions() { 406 auto *ECNode = CG.getExternalCallingNode(); 407 408 for (auto GI = ECNode->begin(), GE = ECNode->end(); GI != GE; ++GI) { 409 auto *CGN = GI->second; 410 auto *F = CGN->getFunction(); 411 if (!F || F->isDeclaration() || llvm::AMDGPU::isKernelCC(F)) 412 continue; 413 AddressTakenFunctions.insert(CGN); 414 } 415 } 416 417 // For given kernel, collect all its reachable non-kernel functions. 418 SmallPtrSet<Function *, 8> collectReachableCallees(Function *K) { 419 SmallPtrSet<Function *, 8> ReachableCallees; 420 421 // Call graph node which represents this kernel. 422 auto *KCGN = CG[K]; 423 424 // Go through all call graph nodes reachable from the node representing this 425 // kernel, visit all their call sites, if the call site is direct, add 426 // corresponding callee to reachable callee set, if it is indirect, resolve 427 // the indirect call site to potential reachable callees, add them to 428 // reachable callee set, and repeat the process for the newly added 429 // potential callee nodes. 430 // 431 // FIXME: Need to handle bit-casted function pointers. 432 // 433 SmallVector<CallGraphNode *, 8> CGNStack(df_begin(KCGN), df_end(KCGN)); 434 SmallPtrSet<CallGraphNode *, 8> VisitedCGNodes; 435 while (!CGNStack.empty()) { 436 auto *CGN = CGNStack.pop_back_val(); 437 438 if (!VisitedCGNodes.insert(CGN).second) 439 continue; 440 441 // Ignore call graph node which does not have associated function or 442 // associated function is not a definition. 443 if (!CGN->getFunction() || CGN->getFunction()->isDeclaration()) 444 continue; 445 446 for (auto GI = CGN->begin(), GE = CGN->end(); GI != GE; ++GI) { 447 auto *RCB = cast<CallBase>(GI->first.getValue()); 448 auto *RCGN = GI->second; 449 450 if (auto *DCallee = RCGN->getFunction()) { 451 ReachableCallees.insert(DCallee); 452 } else if (RCB->isIndirectCall()) { 453 auto *RCBFTy = RCB->getFunctionType(); 454 for (auto *ACGN : AddressTakenFunctions) { 455 auto *ACallee = ACGN->getFunction(); 456 if (ACallee->getFunctionType() == RCBFTy) { 457 ReachableCallees.insert(ACallee); 458 CGNStack.append(df_begin(ACGN), df_end(ACGN)); 459 } 460 } 461 } 462 } 463 } 464 465 return ReachableCallees; 466 } 467 468 public: 469 explicit CollectReachableCallees(Module &M) : M(M), CG(CallGraph(M)) { 470 // Collect address taken functions. 471 collectAddressTakenFunctions(); 472 } 473 474 void collectReachableCallees( 475 DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) { 476 // Collect reachable callee set for each kernel defined in the module. 477 for (Function &F : M.functions()) { 478 if (!llvm::AMDGPU::isKernelCC(&F)) 479 continue; 480 Function *K = &F; 481 KernelToCallees[K] = collectReachableCallees(K); 482 } 483 } 484 }; 485 486 /// Collect reachable callees for each kernel defined in the module \p M and 487 /// return collected callees at \p KernelToCallees. 488 void collectReachableCallees( 489 Module &M, 490 DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) { 491 CollectReachableCallees CRC{M}; 492 CRC.collectReachableCallees(KernelToCallees); 493 } 494 495 /// For the given LDS global \p GV, visit all its users and collect all 496 /// non-kernel functions within which \p GV is used and return collected list of 497 /// such non-kernel functions. 498 SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV) { 499 SmallPtrSet<Function *, 8> LDSAccessors; 500 SmallVector<User *, 8> UserStack(GV->users()); 501 SmallPtrSet<User *, 8> VisitedUsers; 502 503 while (!UserStack.empty()) { 504 auto *U = UserStack.pop_back_val(); 505 506 // `U` is already visited? continue to next one. 507 if (!VisitedUsers.insert(U).second) 508 continue; 509 510 // `U` is a global variable which is initialized with LDS. Ignore LDS. 511 if (isa<GlobalValue>(U)) 512 return SmallPtrSet<Function *, 8>(); 513 514 // Recursively explore constant users. 515 if (isa<Constant>(U)) { 516 append_range(UserStack, U->users()); 517 continue; 518 } 519 520 // `U` should be an instruction, if it belongs to a non-kernel function F, 521 // then collect F. 522 Function *F = cast<Instruction>(U)->getFunction(); 523 if (!llvm::AMDGPU::isKernelCC(F)) 524 LDSAccessors.insert(F); 525 } 526 527 return LDSAccessors; 528 } 529 530 DenseMap<Function *, SmallPtrSet<Instruction *, 8>> 531 getFunctionToInstsMap(User *U, bool CollectKernelInsts) { 532 DenseMap<Function *, SmallPtrSet<Instruction *, 8>> FunctionToInsts; 533 SmallVector<User *, 8> UserStack; 534 SmallPtrSet<User *, 8> VisitedUsers; 535 536 UserStack.push_back(U); 537 538 while (!UserStack.empty()) { 539 auto *UU = UserStack.pop_back_val(); 540 541 if (!VisitedUsers.insert(UU).second) 542 continue; 543 544 if (isa<GlobalValue>(UU)) 545 continue; 546 547 if (isa<Constant>(UU)) { 548 append_range(UserStack, UU->users()); 549 continue; 550 } 551 552 auto *I = cast<Instruction>(UU); 553 Function *F = I->getFunction(); 554 if (CollectKernelInsts) { 555 if (!llvm::AMDGPU::isKernelCC(F)) { 556 continue; 557 } 558 } else { 559 if (llvm::AMDGPU::isKernelCC(F)) { 560 continue; 561 } 562 } 563 564 FunctionToInsts.insert(std::make_pair(F, SmallPtrSet<Instruction *, 8>())); 565 FunctionToInsts[F].insert(I); 566 } 567 568 return FunctionToInsts; 569 } 570 571 } // namespace AMDGPU 572 573 // Entry-point function which interface ReplaceLDSUseImpl with outside of the 574 // class. 575 bool ReplaceLDSUseImpl::replaceLDSUse() { 576 // Collect LDS which requires their uses to be replaced by pointer. 577 std::vector<GlobalVariable *> LDSGlobals = 578 collectLDSRequiringPointerReplace(); 579 580 // No LDS to pointer-replace. Nothing to do. 581 if (LDSGlobals.empty()) 582 return false; 583 584 // Collect reachable callee set for each kernel defined in the module. 585 AMDGPU::collectReachableCallees(M, KernelToCallees); 586 587 if (KernelToCallees.empty()) { 588 // Either module does not have any kernel definitions, or none of the kernel 589 // has a call to non-kernel functions, or we could not resolve any of the 590 // call sites to proper non-kernel functions, because of the situations like 591 // inline asm calls. Nothing to replace. 592 return false; 593 } 594 595 // For every LDS from collected LDS globals set, replace its non-kernel 596 // function scope use by pointer. 597 bool Changed = false; 598 for (auto *GV : LDSGlobals) 599 Changed |= replaceLDSUse(GV); 600 601 return Changed; 602 } 603 604 class AMDGPUReplaceLDSUseWithPointer : public ModulePass { 605 public: 606 static char ID; 607 608 AMDGPUReplaceLDSUseWithPointer() : ModulePass(ID) { 609 initializeAMDGPUReplaceLDSUseWithPointerPass( 610 *PassRegistry::getPassRegistry()); 611 } 612 613 bool runOnModule(Module &M) override; 614 615 void getAnalysisUsage(AnalysisUsage &AU) const override { 616 AU.addRequired<TargetPassConfig>(); 617 } 618 }; 619 620 } // namespace 621 622 char AMDGPUReplaceLDSUseWithPointer::ID = 0; 623 char &llvm::AMDGPUReplaceLDSUseWithPointerID = 624 AMDGPUReplaceLDSUseWithPointer::ID; 625 626 INITIALIZE_PASS_BEGIN( 627 AMDGPUReplaceLDSUseWithPointer, DEBUG_TYPE, 628 "Replace within non-kernel function use of LDS with pointer", 629 false /*only look at the cfg*/, false /*analysis pass*/) 630 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 631 INITIALIZE_PASS_END( 632 AMDGPUReplaceLDSUseWithPointer, DEBUG_TYPE, 633 "Replace within non-kernel function use of LDS with pointer", 634 false /*only look at the cfg*/, false /*analysis pass*/) 635 636 bool AMDGPUReplaceLDSUseWithPointer::runOnModule(Module &M) { 637 ReplaceLDSUseImpl LDSUseReplacer{M}; 638 return LDSUseReplacer.replaceLDSUse(); 639 } 640 641 ModulePass *llvm::createAMDGPUReplaceLDSUseWithPointerPass() { 642 return new AMDGPUReplaceLDSUseWithPointer(); 643 } 644 645 PreservedAnalyses 646 AMDGPUReplaceLDSUseWithPointerPass::run(Module &M, ModuleAnalysisManager &AM) { 647 ReplaceLDSUseImpl LDSUseReplacer{M}; 648 LDSUseReplacer.replaceLDSUse(); 649 return PreservedAnalyses::all(); 650 } 651