180fd5fa5Shsmahesha //===-- AMDGPUReplaceLDSUseWithPointer.cpp --------------------------------===//
280fd5fa5Shsmahesha //
380fd5fa5Shsmahesha // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
480fd5fa5Shsmahesha // See https://llvm.org/LICENSE.txt for license information.
580fd5fa5Shsmahesha // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
680fd5fa5Shsmahesha //
780fd5fa5Shsmahesha //===----------------------------------------------------------------------===//
880fd5fa5Shsmahesha //
980fd5fa5Shsmahesha // This pass replaces all the uses of LDS within non-kernel functions by
1080fd5fa5Shsmahesha // corresponding pointer counter-parts.
1180fd5fa5Shsmahesha //
1280fd5fa5Shsmahesha // The main motivation behind this pass is - to *avoid* subsequent LDS lowering
1380fd5fa5Shsmahesha // pass from directly packing LDS (assume large LDS) into a struct type which
1480fd5fa5Shsmahesha // would otherwise cause allocating huge memory for struct instance within every
1580fd5fa5Shsmahesha // kernel.
1680fd5fa5Shsmahesha //
1780fd5fa5Shsmahesha // Brief sketch of the algorithm implemented in this pass is as below:
1880fd5fa5Shsmahesha //
1980fd5fa5Shsmahesha //   1. Collect all the LDS defined in the module which qualify for pointer
2080fd5fa5Shsmahesha //      replacement, say it is, LDSGlobals set.
2180fd5fa5Shsmahesha //
2280fd5fa5Shsmahesha //   2. Collect all the reachable callees for each kernel defined in the module,
2380fd5fa5Shsmahesha //      say it is, KernelToCallees map.
2480fd5fa5Shsmahesha //
2580fd5fa5Shsmahesha //   3. FOR (each global GV from LDSGlobals set) DO
2680fd5fa5Shsmahesha //        LDSUsedNonKernels = Collect all non-kernel functions which use GV.
2780fd5fa5Shsmahesha //        FOR (each kernel K in KernelToCallees map) DO
2880fd5fa5Shsmahesha //           ReachableCallees = KernelToCallees[K]
2980fd5fa5Shsmahesha //           ReachableAndLDSUsedCallees =
3080fd5fa5Shsmahesha //              SetIntersect(LDSUsedNonKernels, ReachableCallees)
3180fd5fa5Shsmahesha //           IF (ReachableAndLDSUsedCallees is not empty) THEN
3280fd5fa5Shsmahesha //             Pointer = Create a pointer to point-to GV if not created.
3380fd5fa5Shsmahesha //             Initialize Pointer to point-to GV within kernel K.
3480fd5fa5Shsmahesha //           ENDIF
3580fd5fa5Shsmahesha //        ENDFOR
3680fd5fa5Shsmahesha //        Replace all uses of GV within non kernel functions by Pointer.
3780fd5fa5Shsmahesha //      ENFOR
3880fd5fa5Shsmahesha //
3980fd5fa5Shsmahesha // LLVM IR example:
4080fd5fa5Shsmahesha //
4180fd5fa5Shsmahesha //    Input IR:
4280fd5fa5Shsmahesha //
4380fd5fa5Shsmahesha //    @lds = internal addrspace(3) global [4 x i32] undef, align 16
4480fd5fa5Shsmahesha //
4580fd5fa5Shsmahesha //    define internal void @f0() {
4680fd5fa5Shsmahesha //    entry:
4780fd5fa5Shsmahesha //      %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds,
4880fd5fa5Shsmahesha //             i32 0, i32 0
4980fd5fa5Shsmahesha //      ret void
5080fd5fa5Shsmahesha //    }
5180fd5fa5Shsmahesha //
5280fd5fa5Shsmahesha //    define protected amdgpu_kernel void @k0() {
5380fd5fa5Shsmahesha //    entry:
5480fd5fa5Shsmahesha //      call void @f0()
5580fd5fa5Shsmahesha //      ret void
5680fd5fa5Shsmahesha //    }
5780fd5fa5Shsmahesha //
5880fd5fa5Shsmahesha //    Output IR:
5980fd5fa5Shsmahesha //
6080fd5fa5Shsmahesha //    @lds = internal addrspace(3) global [4 x i32] undef, align 16
6180fd5fa5Shsmahesha //    @lds.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
6280fd5fa5Shsmahesha //
6380fd5fa5Shsmahesha //    define internal void @f0() {
6480fd5fa5Shsmahesha //    entry:
6580fd5fa5Shsmahesha //      %0 = load i16, i16 addrspace(3)* @lds.ptr, align 2
6680fd5fa5Shsmahesha //      %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
6780fd5fa5Shsmahesha //      %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
6880fd5fa5Shsmahesha //      %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2,
6980fd5fa5Shsmahesha //             i32 0, i32 0
7080fd5fa5Shsmahesha //      ret void
7180fd5fa5Shsmahesha //    }
7280fd5fa5Shsmahesha //
7380fd5fa5Shsmahesha //    define protected amdgpu_kernel void @k0() {
7480fd5fa5Shsmahesha //    entry:
7580fd5fa5Shsmahesha //      store i16 ptrtoint ([4 x i32] addrspace(3)* @lds to i16),
7680fd5fa5Shsmahesha //            i16 addrspace(3)* @lds.ptr, align 2
7780fd5fa5Shsmahesha //      call void @f0()
7880fd5fa5Shsmahesha //      ret void
7980fd5fa5Shsmahesha //    }
8080fd5fa5Shsmahesha //
8180fd5fa5Shsmahesha //===----------------------------------------------------------------------===//
8280fd5fa5Shsmahesha 
8380fd5fa5Shsmahesha #include "AMDGPU.h"
8480fd5fa5Shsmahesha #include "GCNSubtarget.h"
8580fd5fa5Shsmahesha #include "Utils/AMDGPUBaseInfo.h"
86c7eb8463SStanislav Mekhanoshin #include "Utils/AMDGPUMemoryUtils.h"
8780fd5fa5Shsmahesha #include "llvm/ADT/DenseMap.h"
8880fd5fa5Shsmahesha #include "llvm/ADT/STLExtras.h"
8980fd5fa5Shsmahesha #include "llvm/ADT/SetOperations.h"
90f0e3b39aSJon Chesterfield #include "llvm/Analysis/CallGraph.h"
9180fd5fa5Shsmahesha #include "llvm/CodeGen/TargetPassConfig.h"
9280fd5fa5Shsmahesha #include "llvm/IR/Constants.h"
9380fd5fa5Shsmahesha #include "llvm/IR/DerivedTypes.h"
9480fd5fa5Shsmahesha #include "llvm/IR/IRBuilder.h"
9580fd5fa5Shsmahesha #include "llvm/IR/InlineAsm.h"
9680fd5fa5Shsmahesha #include "llvm/IR/Instructions.h"
9780fd5fa5Shsmahesha #include "llvm/IR/IntrinsicsAMDGPU.h"
9880fd5fa5Shsmahesha #include "llvm/IR/ReplaceConstant.h"
9980fd5fa5Shsmahesha #include "llvm/InitializePasses.h"
10080fd5fa5Shsmahesha #include "llvm/Pass.h"
10180fd5fa5Shsmahesha #include "llvm/Support/Debug.h"
10280fd5fa5Shsmahesha #include "llvm/Target/TargetMachine.h"
10380fd5fa5Shsmahesha #include "llvm/Transforms/Utils/BasicBlockUtils.h"
10480fd5fa5Shsmahesha #include "llvm/Transforms/Utils/ModuleUtils.h"
10580fd5fa5Shsmahesha #include <algorithm>
10680fd5fa5Shsmahesha #include <vector>
10780fd5fa5Shsmahesha 
10880fd5fa5Shsmahesha #define DEBUG_TYPE "amdgpu-replace-lds-use-with-pointer"
10980fd5fa5Shsmahesha 
11080fd5fa5Shsmahesha using namespace llvm;
11180fd5fa5Shsmahesha 
11280fd5fa5Shsmahesha namespace {
11380fd5fa5Shsmahesha 
114f0e3b39aSJon Chesterfield namespace AMDGPU {
115f0e3b39aSJon Chesterfield /// Collect all the instructions where user \p U belongs to. \p U could be
116f0e3b39aSJon Chesterfield /// instruction itself or it could be a constant expression which is used within
117f0e3b39aSJon Chesterfield /// an instruction. If \p CollectKernelInsts is true, collect instructions only
118f0e3b39aSJon Chesterfield /// from kernels, otherwise collect instructions only from non-kernel functions.
119f0e3b39aSJon Chesterfield DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
120f0e3b39aSJon Chesterfield getFunctionToInstsMap(User *U, bool CollectKernelInsts);
121f0e3b39aSJon Chesterfield 
122f0e3b39aSJon Chesterfield SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV);
123f0e3b39aSJon Chesterfield 
124f0e3b39aSJon Chesterfield } // namespace AMDGPU
125f0e3b39aSJon Chesterfield 
12680fd5fa5Shsmahesha class ReplaceLDSUseImpl {
12780fd5fa5Shsmahesha   Module &M;
12880fd5fa5Shsmahesha   LLVMContext &Ctx;
12980fd5fa5Shsmahesha   const DataLayout &DL;
13080fd5fa5Shsmahesha   Constant *LDSMemBaseAddr;
13180fd5fa5Shsmahesha 
13280fd5fa5Shsmahesha   DenseMap<GlobalVariable *, GlobalVariable *> LDSToPointer;
13380fd5fa5Shsmahesha   DenseMap<GlobalVariable *, SmallPtrSet<Function *, 8>> LDSToNonKernels;
13480fd5fa5Shsmahesha   DenseMap<Function *, SmallPtrSet<Function *, 8>> KernelToCallees;
13580fd5fa5Shsmahesha   DenseMap<Function *, SmallPtrSet<GlobalVariable *, 8>> KernelToLDSPointers;
13680fd5fa5Shsmahesha   DenseMap<Function *, BasicBlock *> KernelToInitBB;
13780fd5fa5Shsmahesha   DenseMap<Function *, DenseMap<GlobalVariable *, Value *>>
13880fd5fa5Shsmahesha       FunctionToLDSToReplaceInst;
13980fd5fa5Shsmahesha 
14080fd5fa5Shsmahesha   // Collect LDS which requires their uses to be replaced by pointer.
collectLDSRequiringPointerReplace()14180fd5fa5Shsmahesha   std::vector<GlobalVariable *> collectLDSRequiringPointerReplace() {
14280fd5fa5Shsmahesha     // Collect LDS which requires module lowering.
143f0e3b39aSJon Chesterfield     std::vector<GlobalVariable *> LDSGlobals =
144*2224bbcdSJon Chesterfield         llvm::AMDGPU::findVariablesToLower(M, nullptr);
14580fd5fa5Shsmahesha 
14680fd5fa5Shsmahesha     // Remove LDS which don't qualify for replacement.
1478568ca78SKazu Hirata     llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) {
14880fd5fa5Shsmahesha       return shouldIgnorePointerReplacement(GV);
1498568ca78SKazu Hirata     });
15080fd5fa5Shsmahesha 
15180fd5fa5Shsmahesha     return LDSGlobals;
15280fd5fa5Shsmahesha   }
15380fd5fa5Shsmahesha 
15480fd5fa5Shsmahesha   // Returns true if uses of given LDS global within non-kernel functions should
15580fd5fa5Shsmahesha   // be keep as it is without pointer replacement.
shouldIgnorePointerReplacement(GlobalVariable * GV)15680fd5fa5Shsmahesha   bool shouldIgnorePointerReplacement(GlobalVariable *GV) {
157dc6e8dfdSJacob Lambert     // LDS whose size is very small and doesn't exceed pointer size is not worth
15880fd5fa5Shsmahesha     // replacing.
15980fd5fa5Shsmahesha     if (DL.getTypeAllocSize(GV->getValueType()) <= 2)
16080fd5fa5Shsmahesha       return true;
16180fd5fa5Shsmahesha 
16280fd5fa5Shsmahesha     // LDS which is not used from non-kernel function scope or it is used from
16380fd5fa5Shsmahesha     // global scope does not qualify for replacement.
16480fd5fa5Shsmahesha     LDSToNonKernels[GV] = AMDGPU::collectNonKernelAccessorsOfLDS(GV);
16580fd5fa5Shsmahesha     return LDSToNonKernels[GV].empty();
16680fd5fa5Shsmahesha 
16780fd5fa5Shsmahesha     // FIXME: When GV is used within all (or within most of the kernels), then
16880fd5fa5Shsmahesha     // it does not make sense to create a pointer for it.
16980fd5fa5Shsmahesha   }
17080fd5fa5Shsmahesha 
17180fd5fa5Shsmahesha   // Insert new global LDS pointer which points to LDS.
createLDSPointer(GlobalVariable * GV)17280fd5fa5Shsmahesha   GlobalVariable *createLDSPointer(GlobalVariable *GV) {
173dc6e8dfdSJacob Lambert     // LDS pointer which points to LDS is already created? Return it.
17480fd5fa5Shsmahesha     auto PointerEntry = LDSToPointer.insert(std::make_pair(GV, nullptr));
17580fd5fa5Shsmahesha     if (!PointerEntry.second)
17680fd5fa5Shsmahesha       return PointerEntry.first->second;
17780fd5fa5Shsmahesha 
17880fd5fa5Shsmahesha     // We need to create new LDS pointer which points to LDS.
17980fd5fa5Shsmahesha     //
18080fd5fa5Shsmahesha     // Each CU owns at max 64K of LDS memory, so LDS address ranges from 0 to
18180fd5fa5Shsmahesha     // 2^16 - 1. Hence 16 bit pointer is enough to hold the LDS address.
18280fd5fa5Shsmahesha     auto *I16Ty = Type::getInt16Ty(Ctx);
18380fd5fa5Shsmahesha     GlobalVariable *LDSPointer = new GlobalVariable(
18480fd5fa5Shsmahesha         M, I16Ty, false, GlobalValue::InternalLinkage, UndefValue::get(I16Ty),
18580fd5fa5Shsmahesha         GV->getName() + Twine(".ptr"), nullptr, GlobalVariable::NotThreadLocal,
18680fd5fa5Shsmahesha         AMDGPUAS::LOCAL_ADDRESS);
18780fd5fa5Shsmahesha 
18880fd5fa5Shsmahesha     LDSPointer->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
189f0e3b39aSJon Chesterfield     LDSPointer->setAlignment(llvm::AMDGPU::getAlign(DL, LDSPointer));
19080fd5fa5Shsmahesha 
19180fd5fa5Shsmahesha     // Mark that an associated LDS pointer is created for LDS.
19280fd5fa5Shsmahesha     LDSToPointer[GV] = LDSPointer;
19380fd5fa5Shsmahesha 
19480fd5fa5Shsmahesha     return LDSPointer;
19580fd5fa5Shsmahesha   }
19680fd5fa5Shsmahesha 
19780fd5fa5Shsmahesha   // Split entry basic block in such a way that only lane 0 of each wave does
19880fd5fa5Shsmahesha   // the LDS pointer initialization, and return newly created basic block.
activateLaneZero(Function * K)19980fd5fa5Shsmahesha   BasicBlock *activateLaneZero(Function *K) {
200dc6e8dfdSJacob Lambert     // If the entry basic block of kernel K is already split, then return
20180fd5fa5Shsmahesha     // newly created basic block.
20280fd5fa5Shsmahesha     auto BasicBlockEntry = KernelToInitBB.insert(std::make_pair(K, nullptr));
20380fd5fa5Shsmahesha     if (!BasicBlockEntry.second)
20480fd5fa5Shsmahesha       return BasicBlockEntry.first->second;
20580fd5fa5Shsmahesha 
2060c288140Shsmahesha     // Split entry basic block of kernel K.
2070c288140Shsmahesha     auto *EI = &(*(K->getEntryBlock().getFirstInsertionPt()));
2080c288140Shsmahesha     IRBuilder<> Builder(EI);
20980fd5fa5Shsmahesha 
21080fd5fa5Shsmahesha     Value *Mbcnt =
21180fd5fa5Shsmahesha         Builder.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
21280fd5fa5Shsmahesha                                 {Builder.getInt32(-1), Builder.getInt32(0)});
21380fd5fa5Shsmahesha     Value *Cond = Builder.CreateICmpEQ(Mbcnt, Builder.getInt32(0));
21480fd5fa5Shsmahesha     Instruction *WB = cast<Instruction>(
21580fd5fa5Shsmahesha         Builder.CreateIntrinsic(Intrinsic::amdgcn_wave_barrier, {}, {}));
21680fd5fa5Shsmahesha 
21780fd5fa5Shsmahesha     BasicBlock *NBB = SplitBlockAndInsertIfThen(Cond, WB, false)->getParent();
21880fd5fa5Shsmahesha 
219dc6e8dfdSJacob Lambert     // Mark that the entry basic block of kernel K is split.
22080fd5fa5Shsmahesha     KernelToInitBB[K] = NBB;
22180fd5fa5Shsmahesha 
22280fd5fa5Shsmahesha     return NBB;
22380fd5fa5Shsmahesha   }
22480fd5fa5Shsmahesha 
22580fd5fa5Shsmahesha   // Within given kernel, initialize given LDS pointer to point to given LDS.
initializeLDSPointer(Function * K,GlobalVariable * GV,GlobalVariable * LDSPointer)22680fd5fa5Shsmahesha   void initializeLDSPointer(Function *K, GlobalVariable *GV,
22780fd5fa5Shsmahesha                             GlobalVariable *LDSPointer) {
22880fd5fa5Shsmahesha     // If LDS pointer is already initialized within K, then nothing to do.
22980fd5fa5Shsmahesha     auto PointerEntry = KernelToLDSPointers.insert(
23080fd5fa5Shsmahesha         std::make_pair(K, SmallPtrSet<GlobalVariable *, 8>()));
23180fd5fa5Shsmahesha     if (!PointerEntry.second)
23280fd5fa5Shsmahesha       if (PointerEntry.first->second.contains(LDSPointer))
23380fd5fa5Shsmahesha         return;
23480fd5fa5Shsmahesha 
23580fd5fa5Shsmahesha     // Insert instructions at EI which initialize LDS pointer to point-to LDS
23680fd5fa5Shsmahesha     // within kernel K.
23780fd5fa5Shsmahesha     //
23880fd5fa5Shsmahesha     // That is, convert pointer type of GV to i16, and then store this converted
23980fd5fa5Shsmahesha     // i16 value within LDSPointer which is of type i16*.
24080fd5fa5Shsmahesha     auto *EI = &(*(activateLaneZero(K)->getFirstInsertionPt()));
24180fd5fa5Shsmahesha     IRBuilder<> Builder(EI);
24280fd5fa5Shsmahesha     Builder.CreateStore(Builder.CreatePtrToInt(GV, Type::getInt16Ty(Ctx)),
24380fd5fa5Shsmahesha                         LDSPointer);
24480fd5fa5Shsmahesha 
24580fd5fa5Shsmahesha     // Mark that LDS pointer is initialized within kernel K.
24680fd5fa5Shsmahesha     KernelToLDSPointers[K].insert(LDSPointer);
24780fd5fa5Shsmahesha   }
24880fd5fa5Shsmahesha 
24980fd5fa5Shsmahesha   // We have created an LDS pointer for LDS, and initialized it to point-to LDS
250dc6e8dfdSJacob Lambert   // within all relevant kernels. Now replace all the uses of LDS within
25180fd5fa5Shsmahesha   // non-kernel functions by LDS pointer.
replaceLDSUseByPointer(GlobalVariable * GV,GlobalVariable * LDSPointer)25280fd5fa5Shsmahesha   void replaceLDSUseByPointer(GlobalVariable *GV, GlobalVariable *LDSPointer) {
25380fd5fa5Shsmahesha     SmallVector<User *, 8> LDSUsers(GV->users());
25480fd5fa5Shsmahesha     for (auto *U : LDSUsers) {
25580fd5fa5Shsmahesha       // When `U` is a constant expression, it is possible that same constant
25680fd5fa5Shsmahesha       // expression exists within multiple instructions, and within multiple
25780fd5fa5Shsmahesha       // non-kernel functions. Collect all those non-kernel functions and all
25880fd5fa5Shsmahesha       // those instructions within which `U` exist.
25980fd5fa5Shsmahesha       auto FunctionToInsts =
26080fd5fa5Shsmahesha           AMDGPU::getFunctionToInstsMap(U, false /*=CollectKernelInsts*/);
26180fd5fa5Shsmahesha 
262d395befaSKazu Hirata       for (const auto &FunctionToInst : FunctionToInsts) {
263d395befaSKazu Hirata         Function *F = FunctionToInst.first;
264d395befaSKazu Hirata         auto &Insts = FunctionToInst.second;
26580fd5fa5Shsmahesha         for (auto *I : Insts) {
26680fd5fa5Shsmahesha           // If `U` is a constant expression, then we need to break the
26780fd5fa5Shsmahesha           // associated instruction into a set of separate instructions by
26880fd5fa5Shsmahesha           // converting constant expressions into instructions.
26980fd5fa5Shsmahesha           SmallPtrSet<Instruction *, 8> UserInsts;
27080fd5fa5Shsmahesha 
27180fd5fa5Shsmahesha           if (U == I) {
27280fd5fa5Shsmahesha             // `U` is an instruction, conversion from constant expression to
27380fd5fa5Shsmahesha             // set of instructions is *not* required.
27480fd5fa5Shsmahesha             UserInsts.insert(I);
27580fd5fa5Shsmahesha           } else {
27680fd5fa5Shsmahesha             // `U` is a constant expression, convert it into corresponding set
27780fd5fa5Shsmahesha             // of instructions.
27880fd5fa5Shsmahesha             auto *CE = cast<ConstantExpr>(U);
27980fd5fa5Shsmahesha             convertConstantExprsToInstructions(I, CE, &UserInsts);
28080fd5fa5Shsmahesha           }
28180fd5fa5Shsmahesha 
282dc6e8dfdSJacob Lambert           // Go through all the user instructions, if LDS exist within them as
283dc6e8dfdSJacob Lambert           // an operand, then replace it by replace instruction.
28480fd5fa5Shsmahesha           for (auto *II : UserInsts) {
28580fd5fa5Shsmahesha             auto *ReplaceInst = getReplacementInst(F, GV, LDSPointer);
28680fd5fa5Shsmahesha             II->replaceUsesOfWith(GV, ReplaceInst);
28780fd5fa5Shsmahesha           }
28880fd5fa5Shsmahesha         }
28980fd5fa5Shsmahesha       }
29080fd5fa5Shsmahesha     }
29180fd5fa5Shsmahesha   }
29280fd5fa5Shsmahesha 
29380fd5fa5Shsmahesha   // Create a set of replacement instructions which together replace LDS within
29480fd5fa5Shsmahesha   // non-kernel function F by accessing LDS indirectly using LDS pointer.
getReplacementInst(Function * F,GlobalVariable * GV,GlobalVariable * LDSPointer)29580fd5fa5Shsmahesha   Value *getReplacementInst(Function *F, GlobalVariable *GV,
29680fd5fa5Shsmahesha                             GlobalVariable *LDSPointer) {
29780fd5fa5Shsmahesha     // If the instruction which replaces LDS within F is already created, then
29880fd5fa5Shsmahesha     // return it.
29980fd5fa5Shsmahesha     auto LDSEntry = FunctionToLDSToReplaceInst.insert(
30080fd5fa5Shsmahesha         std::make_pair(F, DenseMap<GlobalVariable *, Value *>()));
30180fd5fa5Shsmahesha     if (!LDSEntry.second) {
30280fd5fa5Shsmahesha       auto ReplaceInstEntry =
30380fd5fa5Shsmahesha           LDSEntry.first->second.insert(std::make_pair(GV, nullptr));
30480fd5fa5Shsmahesha       if (!ReplaceInstEntry.second)
30580fd5fa5Shsmahesha         return ReplaceInstEntry.first->second;
30680fd5fa5Shsmahesha     }
30780fd5fa5Shsmahesha 
30880fd5fa5Shsmahesha     // Get the instruction insertion point within the beginning of the entry
30980fd5fa5Shsmahesha     // block of current non-kernel function.
31080fd5fa5Shsmahesha     auto *EI = &(*(F->getEntryBlock().getFirstInsertionPt()));
31180fd5fa5Shsmahesha     IRBuilder<> Builder(EI);
31280fd5fa5Shsmahesha 
31380fd5fa5Shsmahesha     // Insert required set of instructions which replace LDS within F.
31480fd5fa5Shsmahesha     auto *V = Builder.CreateBitCast(
31580fd5fa5Shsmahesha         Builder.CreateGEP(
3162c68ecccSNikita Popov             Builder.getInt8Ty(), LDSMemBaseAddr,
31780fd5fa5Shsmahesha             Builder.CreateLoad(LDSPointer->getValueType(), LDSPointer)),
31880fd5fa5Shsmahesha         GV->getType());
31980fd5fa5Shsmahesha 
32080fd5fa5Shsmahesha     // Mark that the replacement instruction which replace LDS within F is
32180fd5fa5Shsmahesha     // created.
32280fd5fa5Shsmahesha     FunctionToLDSToReplaceInst[F][GV] = V;
32380fd5fa5Shsmahesha 
32480fd5fa5Shsmahesha     return V;
32580fd5fa5Shsmahesha   }
32680fd5fa5Shsmahesha 
32780fd5fa5Shsmahesha public:
ReplaceLDSUseImpl(Module & M)32880fd5fa5Shsmahesha   ReplaceLDSUseImpl(Module &M)
32980fd5fa5Shsmahesha       : M(M), Ctx(M.getContext()), DL(M.getDataLayout()) {
33080fd5fa5Shsmahesha     LDSMemBaseAddr = Constant::getIntegerValue(
33180fd5fa5Shsmahesha         PointerType::get(Type::getInt8Ty(M.getContext()),
33280fd5fa5Shsmahesha                          AMDGPUAS::LOCAL_ADDRESS),
33380fd5fa5Shsmahesha         APInt(32, 0));
33480fd5fa5Shsmahesha   }
33580fd5fa5Shsmahesha 
33680fd5fa5Shsmahesha   // Entry-point function which interface ReplaceLDSUseImpl with outside of the
33780fd5fa5Shsmahesha   // class.
33880fd5fa5Shsmahesha   bool replaceLDSUse();
33980fd5fa5Shsmahesha 
34080fd5fa5Shsmahesha private:
34180fd5fa5Shsmahesha   // For a given LDS from collected LDS globals set, replace its non-kernel
34280fd5fa5Shsmahesha   // function scope uses by pointer.
34380fd5fa5Shsmahesha   bool replaceLDSUse(GlobalVariable *GV);
34480fd5fa5Shsmahesha };
34580fd5fa5Shsmahesha 
34680fd5fa5Shsmahesha // For given LDS from collected LDS globals set, replace its non-kernel function
34780fd5fa5Shsmahesha // scope uses by pointer.
replaceLDSUse(GlobalVariable * GV)34880fd5fa5Shsmahesha bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) {
34980fd5fa5Shsmahesha   // Holds all those non-kernel functions within which LDS is being accessed.
35080fd5fa5Shsmahesha   SmallPtrSet<Function *, 8> &LDSAccessors = LDSToNonKernels[GV];
35180fd5fa5Shsmahesha 
35280fd5fa5Shsmahesha   // The LDS pointer which points to LDS and replaces all the uses of LDS.
35380fd5fa5Shsmahesha   GlobalVariable *LDSPointer = nullptr;
35480fd5fa5Shsmahesha 
35580fd5fa5Shsmahesha   // Traverse through each kernel K, check and if required, initialize the
35680fd5fa5Shsmahesha   // LDS pointer to point to LDS within K.
357d395befaSKazu Hirata   for (const auto &KernelToCallee : KernelToCallees) {
358d395befaSKazu Hirata     Function *K = KernelToCallee.first;
359d395befaSKazu Hirata     SmallPtrSet<Function *, 8> Callees = KernelToCallee.second;
36080fd5fa5Shsmahesha 
36180fd5fa5Shsmahesha     // Compute reachable and LDS used callees for kernel K.
36280fd5fa5Shsmahesha     set_intersect(Callees, LDSAccessors);
36380fd5fa5Shsmahesha 
36480fd5fa5Shsmahesha     // None of the LDS accessing non-kernel functions are reachable from
36580fd5fa5Shsmahesha     // kernel K. Hence, no need to initialize LDS pointer within kernel K.
36680fd5fa5Shsmahesha     if (Callees.empty())
36780fd5fa5Shsmahesha       continue;
36880fd5fa5Shsmahesha 
36980fd5fa5Shsmahesha     // We have found reachable and LDS used callees for kernel K, and we need to
37080fd5fa5Shsmahesha     // initialize LDS pointer within kernel K, and we need to replace LDS use
37180fd5fa5Shsmahesha     // within those callees by LDS pointer.
37280fd5fa5Shsmahesha     //
37380fd5fa5Shsmahesha     // But, first check if LDS pointer is already created, if not create one.
37480fd5fa5Shsmahesha     LDSPointer = createLDSPointer(GV);
37580fd5fa5Shsmahesha 
37680fd5fa5Shsmahesha     // Initialize LDS pointer to point to LDS within kernel K.
37780fd5fa5Shsmahesha     initializeLDSPointer(K, GV, LDSPointer);
37880fd5fa5Shsmahesha   }
37980fd5fa5Shsmahesha 
38080fd5fa5Shsmahesha   // We have not found reachable and LDS used callees for any of the kernels,
38180fd5fa5Shsmahesha   // and hence we have not created LDS pointer.
38280fd5fa5Shsmahesha   if (!LDSPointer)
38380fd5fa5Shsmahesha     return false;
38480fd5fa5Shsmahesha 
38580fd5fa5Shsmahesha   // We have created an LDS pointer for LDS, and initialized it to point-to LDS
386dc6e8dfdSJacob Lambert   // within all relevant kernels. Now replace all the uses of LDS within
38780fd5fa5Shsmahesha   // non-kernel functions by LDS pointer.
38880fd5fa5Shsmahesha   replaceLDSUseByPointer(GV, LDSPointer);
38980fd5fa5Shsmahesha 
39080fd5fa5Shsmahesha   return true;
39180fd5fa5Shsmahesha }
39280fd5fa5Shsmahesha 
393f0e3b39aSJon Chesterfield namespace AMDGPU {
394f0e3b39aSJon Chesterfield 
395f0e3b39aSJon Chesterfield // An helper class for collecting all reachable callees for each kernel defined
396f0e3b39aSJon Chesterfield // within the module.
397f0e3b39aSJon Chesterfield class CollectReachableCallees {
398f0e3b39aSJon Chesterfield   Module &M;
399f0e3b39aSJon Chesterfield   CallGraph CG;
400f0e3b39aSJon Chesterfield   SmallPtrSet<CallGraphNode *, 8> AddressTakenFunctions;
401f0e3b39aSJon Chesterfield 
402f0e3b39aSJon Chesterfield   // Collect all address taken functions within the module.
collectAddressTakenFunctions()403f0e3b39aSJon Chesterfield   void collectAddressTakenFunctions() {
404f0e3b39aSJon Chesterfield     auto *ECNode = CG.getExternalCallingNode();
405f0e3b39aSJon Chesterfield 
40667aeae01SKazu Hirata     for (const auto &GI : *ECNode) {
40767aeae01SKazu Hirata       auto *CGN = GI.second;
408f0e3b39aSJon Chesterfield       auto *F = CGN->getFunction();
409f0e3b39aSJon Chesterfield       if (!F || F->isDeclaration() || llvm::AMDGPU::isKernelCC(F))
410f0e3b39aSJon Chesterfield         continue;
411f0e3b39aSJon Chesterfield       AddressTakenFunctions.insert(CGN);
412f0e3b39aSJon Chesterfield     }
413f0e3b39aSJon Chesterfield   }
414f0e3b39aSJon Chesterfield 
415f0e3b39aSJon Chesterfield   // For given kernel, collect all its reachable non-kernel functions.
collectReachableCallees(Function * K)416f0e3b39aSJon Chesterfield   SmallPtrSet<Function *, 8> collectReachableCallees(Function *K) {
417f0e3b39aSJon Chesterfield     SmallPtrSet<Function *, 8> ReachableCallees;
418f0e3b39aSJon Chesterfield 
419f0e3b39aSJon Chesterfield     // Call graph node which represents this kernel.
420f0e3b39aSJon Chesterfield     auto *KCGN = CG[K];
421f0e3b39aSJon Chesterfield 
422f0e3b39aSJon Chesterfield     // Go through all call graph nodes reachable from the node representing this
423f0e3b39aSJon Chesterfield     // kernel, visit all their call sites, if the call site is direct, add
424f0e3b39aSJon Chesterfield     // corresponding callee to reachable callee set, if it is indirect, resolve
425f0e3b39aSJon Chesterfield     // the indirect call site to potential reachable callees, add them to
426f0e3b39aSJon Chesterfield     // reachable callee set, and repeat the process for the newly added
427f0e3b39aSJon Chesterfield     // potential callee nodes.
428f0e3b39aSJon Chesterfield     //
429f0e3b39aSJon Chesterfield     // FIXME: Need to handle bit-casted function pointers.
430f0e3b39aSJon Chesterfield     //
4319db0e216SKazu Hirata     SmallVector<CallGraphNode *, 8> CGNStack(depth_first(KCGN));
432f0e3b39aSJon Chesterfield     SmallPtrSet<CallGraphNode *, 8> VisitedCGNodes;
433f0e3b39aSJon Chesterfield     while (!CGNStack.empty()) {
434f0e3b39aSJon Chesterfield       auto *CGN = CGNStack.pop_back_val();
435f0e3b39aSJon Chesterfield 
436f0e3b39aSJon Chesterfield       if (!VisitedCGNodes.insert(CGN).second)
437f0e3b39aSJon Chesterfield         continue;
438f0e3b39aSJon Chesterfield 
439f0e3b39aSJon Chesterfield       // Ignore call graph node which does not have associated function or
440f0e3b39aSJon Chesterfield       // associated function is not a definition.
441f0e3b39aSJon Chesterfield       if (!CGN->getFunction() || CGN->getFunction()->isDeclaration())
442f0e3b39aSJon Chesterfield         continue;
443f0e3b39aSJon Chesterfield 
44467aeae01SKazu Hirata       for (const auto &GI : *CGN) {
4457a47ee51SKazu Hirata         auto *RCB = cast<CallBase>(*GI.first);
44667aeae01SKazu Hirata         auto *RCGN = GI.second;
447f0e3b39aSJon Chesterfield 
448f0e3b39aSJon Chesterfield         if (auto *DCallee = RCGN->getFunction()) {
449f0e3b39aSJon Chesterfield           ReachableCallees.insert(DCallee);
450f0e3b39aSJon Chesterfield         } else if (RCB->isIndirectCall()) {
451f0e3b39aSJon Chesterfield           auto *RCBFTy = RCB->getFunctionType();
452f0e3b39aSJon Chesterfield           for (auto *ACGN : AddressTakenFunctions) {
453f0e3b39aSJon Chesterfield             auto *ACallee = ACGN->getFunction();
454f0e3b39aSJon Chesterfield             if (ACallee->getFunctionType() == RCBFTy) {
455f0e3b39aSJon Chesterfield               ReachableCallees.insert(ACallee);
456f0e3b39aSJon Chesterfield               CGNStack.append(df_begin(ACGN), df_end(ACGN));
457f0e3b39aSJon Chesterfield             }
458f0e3b39aSJon Chesterfield           }
459f0e3b39aSJon Chesterfield         }
460f0e3b39aSJon Chesterfield       }
461f0e3b39aSJon Chesterfield     }
462f0e3b39aSJon Chesterfield 
463f0e3b39aSJon Chesterfield     return ReachableCallees;
464f0e3b39aSJon Chesterfield   }
465f0e3b39aSJon Chesterfield 
466f0e3b39aSJon Chesterfield public:
CollectReachableCallees(Module & M)467f0e3b39aSJon Chesterfield   explicit CollectReachableCallees(Module &M) : M(M), CG(CallGraph(M)) {
468f0e3b39aSJon Chesterfield     // Collect address taken functions.
469f0e3b39aSJon Chesterfield     collectAddressTakenFunctions();
470f0e3b39aSJon Chesterfield   }
471f0e3b39aSJon Chesterfield 
collectReachableCallees(DenseMap<Function *,SmallPtrSet<Function *,8>> & KernelToCallees)472f0e3b39aSJon Chesterfield   void collectReachableCallees(
473f0e3b39aSJon Chesterfield       DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
474f0e3b39aSJon Chesterfield     // Collect reachable callee set for each kernel defined in the module.
475f0e3b39aSJon Chesterfield     for (Function &F : M.functions()) {
476f0e3b39aSJon Chesterfield       if (!llvm::AMDGPU::isKernelCC(&F))
477f0e3b39aSJon Chesterfield         continue;
478f0e3b39aSJon Chesterfield       Function *K = &F;
479f0e3b39aSJon Chesterfield       KernelToCallees[K] = collectReachableCallees(K);
480f0e3b39aSJon Chesterfield     }
481f0e3b39aSJon Chesterfield   }
482f0e3b39aSJon Chesterfield };
483f0e3b39aSJon Chesterfield 
484f0e3b39aSJon Chesterfield /// Collect reachable callees for each kernel defined in the module \p M and
485f0e3b39aSJon Chesterfield /// return collected callees at \p KernelToCallees.
collectReachableCallees(Module & M,DenseMap<Function *,SmallPtrSet<Function *,8>> & KernelToCallees)486f0e3b39aSJon Chesterfield void collectReachableCallees(
487f0e3b39aSJon Chesterfield     Module &M,
488f0e3b39aSJon Chesterfield     DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
489f0e3b39aSJon Chesterfield   CollectReachableCallees CRC{M};
490f0e3b39aSJon Chesterfield   CRC.collectReachableCallees(KernelToCallees);
491f0e3b39aSJon Chesterfield }
492f0e3b39aSJon Chesterfield 
493f0e3b39aSJon Chesterfield /// For the given LDS global \p GV, visit all its users and collect all
494f0e3b39aSJon Chesterfield /// non-kernel functions within which \p GV is used and return collected list of
495f0e3b39aSJon Chesterfield /// such non-kernel functions.
collectNonKernelAccessorsOfLDS(GlobalVariable * GV)496f0e3b39aSJon Chesterfield SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV) {
497f0e3b39aSJon Chesterfield   SmallPtrSet<Function *, 8> LDSAccessors;
498f0e3b39aSJon Chesterfield   SmallVector<User *, 8> UserStack(GV->users());
499f0e3b39aSJon Chesterfield   SmallPtrSet<User *, 8> VisitedUsers;
500f0e3b39aSJon Chesterfield 
501f0e3b39aSJon Chesterfield   while (!UserStack.empty()) {
502f0e3b39aSJon Chesterfield     auto *U = UserStack.pop_back_val();
503f0e3b39aSJon Chesterfield 
504f0e3b39aSJon Chesterfield     // `U` is already visited? continue to next one.
505f0e3b39aSJon Chesterfield     if (!VisitedUsers.insert(U).second)
506f0e3b39aSJon Chesterfield       continue;
507f0e3b39aSJon Chesterfield 
508f0e3b39aSJon Chesterfield     // `U` is a global variable which is initialized with LDS. Ignore LDS.
509f0e3b39aSJon Chesterfield     if (isa<GlobalValue>(U))
510f0e3b39aSJon Chesterfield       return SmallPtrSet<Function *, 8>();
511f0e3b39aSJon Chesterfield 
512f0e3b39aSJon Chesterfield     // Recursively explore constant users.
513f0e3b39aSJon Chesterfield     if (isa<Constant>(U)) {
514f0e3b39aSJon Chesterfield       append_range(UserStack, U->users());
515f0e3b39aSJon Chesterfield       continue;
516f0e3b39aSJon Chesterfield     }
517f0e3b39aSJon Chesterfield 
518f0e3b39aSJon Chesterfield     // `U` should be an instruction, if it belongs to a non-kernel function F,
519f0e3b39aSJon Chesterfield     // then collect F.
520f0e3b39aSJon Chesterfield     Function *F = cast<Instruction>(U)->getFunction();
521f0e3b39aSJon Chesterfield     if (!llvm::AMDGPU::isKernelCC(F))
522f0e3b39aSJon Chesterfield       LDSAccessors.insert(F);
523f0e3b39aSJon Chesterfield   }
524f0e3b39aSJon Chesterfield 
525f0e3b39aSJon Chesterfield   return LDSAccessors;
526f0e3b39aSJon Chesterfield }
527f0e3b39aSJon Chesterfield 
528f0e3b39aSJon Chesterfield DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
getFunctionToInstsMap(User * U,bool CollectKernelInsts)529f0e3b39aSJon Chesterfield getFunctionToInstsMap(User *U, bool CollectKernelInsts) {
530f0e3b39aSJon Chesterfield   DenseMap<Function *, SmallPtrSet<Instruction *, 8>> FunctionToInsts;
531f0e3b39aSJon Chesterfield   SmallVector<User *, 8> UserStack;
532f0e3b39aSJon Chesterfield   SmallPtrSet<User *, 8> VisitedUsers;
533f0e3b39aSJon Chesterfield 
534f0e3b39aSJon Chesterfield   UserStack.push_back(U);
535f0e3b39aSJon Chesterfield 
536f0e3b39aSJon Chesterfield   while (!UserStack.empty()) {
537f0e3b39aSJon Chesterfield     auto *UU = UserStack.pop_back_val();
538f0e3b39aSJon Chesterfield 
539f0e3b39aSJon Chesterfield     if (!VisitedUsers.insert(UU).second)
540f0e3b39aSJon Chesterfield       continue;
541f0e3b39aSJon Chesterfield 
542f0e3b39aSJon Chesterfield     if (isa<GlobalValue>(UU))
543f0e3b39aSJon Chesterfield       continue;
544f0e3b39aSJon Chesterfield 
545f0e3b39aSJon Chesterfield     if (isa<Constant>(UU)) {
546f0e3b39aSJon Chesterfield       append_range(UserStack, UU->users());
547f0e3b39aSJon Chesterfield       continue;
548f0e3b39aSJon Chesterfield     }
549f0e3b39aSJon Chesterfield 
550f0e3b39aSJon Chesterfield     auto *I = cast<Instruction>(UU);
551f0e3b39aSJon Chesterfield     Function *F = I->getFunction();
552f0e3b39aSJon Chesterfield     if (CollectKernelInsts) {
553f0e3b39aSJon Chesterfield       if (!llvm::AMDGPU::isKernelCC(F)) {
554f0e3b39aSJon Chesterfield         continue;
555f0e3b39aSJon Chesterfield       }
556f0e3b39aSJon Chesterfield     } else {
557f0e3b39aSJon Chesterfield       if (llvm::AMDGPU::isKernelCC(F)) {
558f0e3b39aSJon Chesterfield         continue;
559f0e3b39aSJon Chesterfield       }
560f0e3b39aSJon Chesterfield     }
561f0e3b39aSJon Chesterfield 
562f0e3b39aSJon Chesterfield     FunctionToInsts.insert(std::make_pair(F, SmallPtrSet<Instruction *, 8>()));
563f0e3b39aSJon Chesterfield     FunctionToInsts[F].insert(I);
564f0e3b39aSJon Chesterfield   }
565f0e3b39aSJon Chesterfield 
566f0e3b39aSJon Chesterfield   return FunctionToInsts;
567f0e3b39aSJon Chesterfield }
568f0e3b39aSJon Chesterfield 
569f0e3b39aSJon Chesterfield } // namespace AMDGPU
570f0e3b39aSJon Chesterfield 
57180fd5fa5Shsmahesha // Entry-point function which interface ReplaceLDSUseImpl with outside of the
57280fd5fa5Shsmahesha // class.
replaceLDSUse()57380fd5fa5Shsmahesha bool ReplaceLDSUseImpl::replaceLDSUse() {
57480fd5fa5Shsmahesha   // Collect LDS which requires their uses to be replaced by pointer.
57580fd5fa5Shsmahesha   std::vector<GlobalVariable *> LDSGlobals =
57680fd5fa5Shsmahesha       collectLDSRequiringPointerReplace();
57780fd5fa5Shsmahesha 
57880fd5fa5Shsmahesha   // No LDS to pointer-replace. Nothing to do.
57980fd5fa5Shsmahesha   if (LDSGlobals.empty())
58080fd5fa5Shsmahesha     return false;
58180fd5fa5Shsmahesha 
58280fd5fa5Shsmahesha   // Collect reachable callee set for each kernel defined in the module.
58380fd5fa5Shsmahesha   AMDGPU::collectReachableCallees(M, KernelToCallees);
58480fd5fa5Shsmahesha 
58580fd5fa5Shsmahesha   if (KernelToCallees.empty()) {
58680fd5fa5Shsmahesha     // Either module does not have any kernel definitions, or none of the kernel
58780fd5fa5Shsmahesha     // has a call to non-kernel functions, or we could not resolve any of the
58880fd5fa5Shsmahesha     // call sites to proper non-kernel functions, because of the situations like
58980fd5fa5Shsmahesha     // inline asm calls. Nothing to replace.
59080fd5fa5Shsmahesha     return false;
59180fd5fa5Shsmahesha   }
59280fd5fa5Shsmahesha 
59380fd5fa5Shsmahesha   // For every LDS from collected LDS globals set, replace its non-kernel
59480fd5fa5Shsmahesha   // function scope use by pointer.
59580fd5fa5Shsmahesha   bool Changed = false;
59680fd5fa5Shsmahesha   for (auto *GV : LDSGlobals)
59780fd5fa5Shsmahesha     Changed |= replaceLDSUse(GV);
59880fd5fa5Shsmahesha 
59980fd5fa5Shsmahesha   return Changed;
60080fd5fa5Shsmahesha }
60180fd5fa5Shsmahesha 
60280fd5fa5Shsmahesha class AMDGPUReplaceLDSUseWithPointer : public ModulePass {
60380fd5fa5Shsmahesha public:
60480fd5fa5Shsmahesha   static char ID;
60580fd5fa5Shsmahesha 
AMDGPUReplaceLDSUseWithPointer()60680fd5fa5Shsmahesha   AMDGPUReplaceLDSUseWithPointer() : ModulePass(ID) {
60780fd5fa5Shsmahesha     initializeAMDGPUReplaceLDSUseWithPointerPass(
60880fd5fa5Shsmahesha         *PassRegistry::getPassRegistry());
60980fd5fa5Shsmahesha   }
61080fd5fa5Shsmahesha 
61180fd5fa5Shsmahesha   bool runOnModule(Module &M) override;
61280fd5fa5Shsmahesha 
getAnalysisUsage(AnalysisUsage & AU) const61380fd5fa5Shsmahesha   void getAnalysisUsage(AnalysisUsage &AU) const override {
61480fd5fa5Shsmahesha     AU.addRequired<TargetPassConfig>();
61580fd5fa5Shsmahesha   }
61680fd5fa5Shsmahesha };
61780fd5fa5Shsmahesha 
61880fd5fa5Shsmahesha } // namespace
61980fd5fa5Shsmahesha 
62080fd5fa5Shsmahesha char AMDGPUReplaceLDSUseWithPointer::ID = 0;
62180fd5fa5Shsmahesha char &llvm::AMDGPUReplaceLDSUseWithPointerID =
62280fd5fa5Shsmahesha     AMDGPUReplaceLDSUseWithPointer::ID;
62380fd5fa5Shsmahesha 
62480fd5fa5Shsmahesha INITIALIZE_PASS_BEGIN(
62580fd5fa5Shsmahesha     AMDGPUReplaceLDSUseWithPointer, DEBUG_TYPE,
62680fd5fa5Shsmahesha     "Replace within non-kernel function use of LDS with pointer",
62780fd5fa5Shsmahesha     false /*only look at the cfg*/, false /*analysis pass*/)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)62880fd5fa5Shsmahesha INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
62980fd5fa5Shsmahesha INITIALIZE_PASS_END(
63080fd5fa5Shsmahesha     AMDGPUReplaceLDSUseWithPointer, DEBUG_TYPE,
63180fd5fa5Shsmahesha     "Replace within non-kernel function use of LDS with pointer",
63280fd5fa5Shsmahesha     false /*only look at the cfg*/, false /*analysis pass*/)
63380fd5fa5Shsmahesha 
63480fd5fa5Shsmahesha bool AMDGPUReplaceLDSUseWithPointer::runOnModule(Module &M) {
63580fd5fa5Shsmahesha   ReplaceLDSUseImpl LDSUseReplacer{M};
63680fd5fa5Shsmahesha   return LDSUseReplacer.replaceLDSUse();
63780fd5fa5Shsmahesha }
63880fd5fa5Shsmahesha 
createAMDGPUReplaceLDSUseWithPointerPass()63980fd5fa5Shsmahesha ModulePass *llvm::createAMDGPUReplaceLDSUseWithPointerPass() {
64080fd5fa5Shsmahesha   return new AMDGPUReplaceLDSUseWithPointer();
64180fd5fa5Shsmahesha }
64280fd5fa5Shsmahesha 
64380fd5fa5Shsmahesha PreservedAnalyses
run(Module & M,ModuleAnalysisManager & AM)64480fd5fa5Shsmahesha AMDGPUReplaceLDSUseWithPointerPass::run(Module &M, ModuleAnalysisManager &AM) {
64580fd5fa5Shsmahesha   ReplaceLDSUseImpl LDSUseReplacer{M};
64680fd5fa5Shsmahesha   LDSUseReplacer.replaceLDSUse();
64780fd5fa5Shsmahesha   return PreservedAnalyses::all();
64880fd5fa5Shsmahesha }
649