145bb48eaSTom Stellard //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
245bb48eaSTom Stellard //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
645bb48eaSTom Stellard //
745bb48eaSTom Stellard //===----------------------------------------------------------------------===//
845bb48eaSTom Stellard //
945bb48eaSTom Stellard /// \file
1045bb48eaSTom Stellard /// This pass marks all internal functions as always_inline and creates
11f9b521fdSAlfred Huang /// duplicates of all other functions and marks the duplicates as always_inline.
1245bb48eaSTom Stellard //
1345bb48eaSTom Stellard //===----------------------------------------------------------------------===//
1445bb48eaSTom Stellard 
1545bb48eaSTom Stellard #include "AMDGPU.h"
16a680199aSMatt Arsenault #include "AMDGPUTargetMachine.h"
17a680199aSMatt Arsenault #include "Utils/AMDGPUBaseInfo.h"
18*0567f033SAnshil Gandhi #include "llvm/CodeGen/CommandFlags.h"
1945bb48eaSTom Stellard #include "llvm/IR/Module.h"
206a87e9b0Sdfukalov #include "llvm/Pass.h"
216a87e9b0Sdfukalov #include "llvm/Support/CommandLine.h"
2245bb48eaSTom Stellard 
2345bb48eaSTom Stellard using namespace llvm;
2445bb48eaSTom Stellard 
2545bb48eaSTom Stellard namespace {
2645bb48eaSTom Stellard 
271390af2dSMatt Arsenault static cl::opt<bool> StressCalls(
281390af2dSMatt Arsenault   "amdgpu-stress-function-calls",
291390af2dSMatt Arsenault   cl::Hidden,
301390af2dSMatt Arsenault   cl::desc("Force all functions to be noinline"),
311390af2dSMatt Arsenault   cl::init(false));
321390af2dSMatt Arsenault 
3345bb48eaSTom Stellard class AMDGPUAlwaysInline : public ModulePass {
3489653dfdSStanislav Mekhanoshin   bool GlobalOpt;
3589653dfdSStanislav Mekhanoshin 
3645bb48eaSTom Stellard public:
37746e0657SMatt Arsenault   static char ID;
38746e0657SMatt Arsenault 
AMDGPUAlwaysInline(bool GlobalOpt=false)39746e0657SMatt Arsenault   AMDGPUAlwaysInline(bool GlobalOpt = false) :
40746e0657SMatt Arsenault     ModulePass(ID), GlobalOpt(GlobalOpt) { }
4145bb48eaSTom Stellard   bool runOnModule(Module &M) override;
42a680199aSMatt Arsenault 
getAnalysisUsage(AnalysisUsage & AU) const43a680199aSMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override {
44a680199aSMatt Arsenault     AU.setPreservesAll();
45a680199aSMatt Arsenault  }
4645bb48eaSTom Stellard };
4745bb48eaSTom Stellard 
4845bb48eaSTom Stellard } // End anonymous namespace
4945bb48eaSTom Stellard 
50746e0657SMatt Arsenault INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
51746e0657SMatt Arsenault                 "AMDGPU Inline All Functions", false, false)
52746e0657SMatt Arsenault 
5345bb48eaSTom Stellard char AMDGPUAlwaysInline::ID = 0;
5445bb48eaSTom Stellard 
554e838ba9SArthur Eubanks static void
recursivelyVisitUsers(GlobalValue & GV,SmallPtrSetImpl<Function * > & FuncsToAlwaysInline)564e838ba9SArthur Eubanks recursivelyVisitUsers(GlobalValue &GV,
57a680199aSMatt Arsenault                       SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
5819aacdb7SKazu Hirata   SmallVector<User *, 16> Stack(GV.users());
59a680199aSMatt Arsenault 
60a680199aSMatt Arsenault   SmallPtrSet<const Value *, 8> Visited;
61a680199aSMatt Arsenault 
62a680199aSMatt Arsenault   while (!Stack.empty()) {
63a680199aSMatt Arsenault     User *U = Stack.pop_back_val();
64a680199aSMatt Arsenault     if (!Visited.insert(U).second)
65a680199aSMatt Arsenault       continue;
66a680199aSMatt Arsenault 
67a680199aSMatt Arsenault     if (Instruction *I = dyn_cast<Instruction>(U)) {
68a680199aSMatt Arsenault       Function *F = I->getParent()->getParent();
69a680199aSMatt Arsenault       if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
70c3d3c22aSMatt Arsenault         // FIXME: This is a horrible hack. We should always respect noinline,
71c3d3c22aSMatt Arsenault         // and just let us hit the error when we can't handle this.
72c3d3c22aSMatt Arsenault         //
73c3d3c22aSMatt Arsenault         // Unfortunately, clang adds noinline to all functions at -O0. We have
74dc6e8dfdSJacob Lambert         // to override this here until that's fixed.
75c3d3c22aSMatt Arsenault         F->removeFnAttr(Attribute::NoInline);
76c3d3c22aSMatt Arsenault 
77a680199aSMatt Arsenault         FuncsToAlwaysInline.insert(F);
78a680199aSMatt Arsenault         Stack.push_back(F);
79a680199aSMatt Arsenault       }
80a680199aSMatt Arsenault 
81a680199aSMatt Arsenault       // No need to look at further users, but we do need to inline any callers.
82a680199aSMatt Arsenault       continue;
83a680199aSMatt Arsenault     }
84a680199aSMatt Arsenault 
8505444417SKazu Hirata     append_range(Stack, U->users());
86a680199aSMatt Arsenault   }
87a680199aSMatt Arsenault }
88a680199aSMatt Arsenault 
alwaysInlineImpl(Module & M,bool GlobalOpt)894e838ba9SArthur Eubanks static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
90eba80895SNikolay Haustov   std::vector<GlobalAlias*> AliasesToRemove;
91a680199aSMatt Arsenault 
92a680199aSMatt Arsenault   SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
93a680199aSMatt Arsenault   SmallPtrSet<Function *, 8> FuncsToNoInline;
94*0567f033SAnshil Gandhi   Triple TT(M.getTargetTriple());
95ca95d441SMatt Arsenault 
96eba80895SNikolay Haustov   for (GlobalAlias &A : M.aliases()) {
97eba80895SNikolay Haustov     if (Function* F = dyn_cast<Function>(A.getAliasee())) {
98*0567f033SAnshil Gandhi       if (TT.getArch() == Triple::amdgcn &&
99*0567f033SAnshil Gandhi           A.getLinkage() != GlobalValue::InternalLinkage)
100*0567f033SAnshil Gandhi         continue;
101eba80895SNikolay Haustov       A.replaceAllUsesWith(F);
102eba80895SNikolay Haustov       AliasesToRemove.push_back(&A);
103eba80895SNikolay Haustov     }
104a680199aSMatt Arsenault 
105a680199aSMatt Arsenault     // FIXME: If the aliasee isn't a function, it's some kind of constant expr
106a680199aSMatt Arsenault     // cast that won't be inlined through.
107eba80895SNikolay Haustov   }
108eba80895SNikolay Haustov 
10989653dfdSStanislav Mekhanoshin   if (GlobalOpt) {
110eba80895SNikolay Haustov     for (GlobalAlias* A : AliasesToRemove) {
111eba80895SNikolay Haustov       A->eraseFromParent();
112eba80895SNikolay Haustov     }
11389653dfdSStanislav Mekhanoshin   }
114eba80895SNikolay Haustov 
115a680199aSMatt Arsenault   // Always force inlining of any function that uses an LDS global address. This
116a680199aSMatt Arsenault   // is something of a workaround because we don't have a way of supporting LDS
117a680199aSMatt Arsenault   // objects defined in functions. LDS is always allocated by a kernel, and it
118a680199aSMatt Arsenault   // is difficult to manage LDS usage if a function may be used by multiple
119a680199aSMatt Arsenault   // kernels.
120a680199aSMatt Arsenault   //
121a680199aSMatt Arsenault   // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
122a680199aSMatt Arsenault   // should only appear when IPO passes manages to move LDs defined in a kernel
123a680199aSMatt Arsenault   // into a single user function.
124a680199aSMatt Arsenault 
125a680199aSMatt Arsenault   for (GlobalVariable &GV : M.globals()) {
126a680199aSMatt Arsenault     // TODO: Region address
127bc561662SMatt Arsenault     unsigned AS = GV.getAddressSpace();
1284973b0c4Shsmahesha     if ((AS == AMDGPUAS::REGION_ADDRESS) ||
1294973b0c4Shsmahesha         (AS == AMDGPUAS::LOCAL_ADDRESS &&
130106959acSVang Thao          (!AMDGPUTargetMachine::EnableLowerModuleLDS || !GV.hasInitializer())))
131a680199aSMatt Arsenault       recursivelyVisitUsers(GV, FuncsToAlwaysInline);
132a680199aSMatt Arsenault   }
133a680199aSMatt Arsenault 
134a680199aSMatt Arsenault   if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
1351390af2dSMatt Arsenault     auto IncompatAttr
1361390af2dSMatt Arsenault       = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
1371390af2dSMatt Arsenault 
138ca95d441SMatt Arsenault     for (Function &F : M) {
139a680199aSMatt Arsenault       if (!F.isDeclaration() && !F.use_empty() &&
140a680199aSMatt Arsenault           !F.hasFnAttribute(IncompatAttr)) {
141a680199aSMatt Arsenault         if (StressCalls) {
142a680199aSMatt Arsenault           if (!FuncsToAlwaysInline.count(&F))
143a680199aSMatt Arsenault             FuncsToNoInline.insert(&F);
144a680199aSMatt Arsenault         } else
145a680199aSMatt Arsenault           FuncsToAlwaysInline.insert(&F);
146a680199aSMatt Arsenault       }
147a680199aSMatt Arsenault     }
14845bb48eaSTom Stellard   }
14945bb48eaSTom Stellard 
150a680199aSMatt Arsenault   for (Function *F : FuncsToAlwaysInline)
151a680199aSMatt Arsenault     F->addFnAttr(Attribute::AlwaysInline);
15245bb48eaSTom Stellard 
153a680199aSMatt Arsenault   for (Function *F : FuncsToNoInline)
154a680199aSMatt Arsenault     F->addFnAttr(Attribute::NoInline);
155a680199aSMatt Arsenault 
156a680199aSMatt Arsenault   return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
15745bb48eaSTom Stellard }
15845bb48eaSTom Stellard 
runOnModule(Module & M)1594e838ba9SArthur Eubanks bool AMDGPUAlwaysInline::runOnModule(Module &M) {
1604e838ba9SArthur Eubanks   return alwaysInlineImpl(M, GlobalOpt);
1614e838ba9SArthur Eubanks }
1624e838ba9SArthur Eubanks 
createAMDGPUAlwaysInlinePass(bool GlobalOpt)16389653dfdSStanislav Mekhanoshin ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
16489653dfdSStanislav Mekhanoshin   return new AMDGPUAlwaysInline(GlobalOpt);
16545bb48eaSTom Stellard }
166a680199aSMatt Arsenault 
run(Module & M,ModuleAnalysisManager & AM)1674e838ba9SArthur Eubanks PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M,
1684e838ba9SArthur Eubanks                                               ModuleAnalysisManager &AM) {
1694e838ba9SArthur Eubanks   alwaysInlineImpl(M, GlobalOpt);
1704e838ba9SArthur Eubanks   return PreservedAnalyses::all();
1714e838ba9SArthur Eubanks }
172