145bb48eaSTom Stellard //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
245bb48eaSTom Stellard //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
645bb48eaSTom Stellard //
745bb48eaSTom Stellard //===----------------------------------------------------------------------===//
845bb48eaSTom Stellard //
945bb48eaSTom Stellard /// \file
1045bb48eaSTom Stellard /// This pass marks all internal functions as always_inline and creates
11f9b521fdSAlfred Huang /// duplicates of all other functions and marks the duplicates as always_inline.
1245bb48eaSTom Stellard //
1345bb48eaSTom Stellard //===----------------------------------------------------------------------===//
1445bb48eaSTom Stellard 
1545bb48eaSTom Stellard #include "AMDGPU.h"
16a680199aSMatt Arsenault #include "AMDGPUTargetMachine.h"
17a680199aSMatt Arsenault #include "Utils/AMDGPUBaseInfo.h"
1845bb48eaSTom Stellard #include "llvm/IR/Module.h"
196a87e9b0Sdfukalov #include "llvm/Pass.h"
206a87e9b0Sdfukalov #include "llvm/Support/CommandLine.h"
2145bb48eaSTom Stellard 
2245bb48eaSTom Stellard using namespace llvm;
2345bb48eaSTom Stellard 
2445bb48eaSTom Stellard namespace {
2545bb48eaSTom Stellard 
261390af2dSMatt Arsenault static cl::opt<bool> StressCalls(
271390af2dSMatt Arsenault   "amdgpu-stress-function-calls",
281390af2dSMatt Arsenault   cl::Hidden,
291390af2dSMatt Arsenault   cl::desc("Force all functions to be noinline"),
301390af2dSMatt Arsenault   cl::init(false));
311390af2dSMatt Arsenault 
3245bb48eaSTom Stellard class AMDGPUAlwaysInline : public ModulePass {
3389653dfdSStanislav Mekhanoshin   bool GlobalOpt;
3489653dfdSStanislav Mekhanoshin 
3545bb48eaSTom Stellard public:
36746e0657SMatt Arsenault   static char ID;
37746e0657SMatt Arsenault 
38746e0657SMatt Arsenault   AMDGPUAlwaysInline(bool GlobalOpt = false) :
39746e0657SMatt Arsenault     ModulePass(ID), GlobalOpt(GlobalOpt) { }
4045bb48eaSTom Stellard   bool runOnModule(Module &M) override;
41a680199aSMatt Arsenault 
42a680199aSMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override {
43a680199aSMatt Arsenault     AU.setPreservesAll();
44a680199aSMatt Arsenault  }
4545bb48eaSTom Stellard };
4645bb48eaSTom Stellard 
4745bb48eaSTom Stellard } // End anonymous namespace
4845bb48eaSTom Stellard 
49746e0657SMatt Arsenault INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
50746e0657SMatt Arsenault                 "AMDGPU Inline All Functions", false, false)
51746e0657SMatt Arsenault 
5245bb48eaSTom Stellard char AMDGPUAlwaysInline::ID = 0;
5345bb48eaSTom Stellard 
544e838ba9SArthur Eubanks static void
554e838ba9SArthur Eubanks recursivelyVisitUsers(GlobalValue &GV,
56a680199aSMatt Arsenault                       SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
5719aacdb7SKazu Hirata   SmallVector<User *, 16> Stack(GV.users());
58a680199aSMatt Arsenault 
59a680199aSMatt Arsenault   SmallPtrSet<const Value *, 8> Visited;
60a680199aSMatt Arsenault 
61a680199aSMatt Arsenault   while (!Stack.empty()) {
62a680199aSMatt Arsenault     User *U = Stack.pop_back_val();
63a680199aSMatt Arsenault     if (!Visited.insert(U).second)
64a680199aSMatt Arsenault       continue;
65a680199aSMatt Arsenault 
66a680199aSMatt Arsenault     if (Instruction *I = dyn_cast<Instruction>(U)) {
67a680199aSMatt Arsenault       Function *F = I->getParent()->getParent();
68a680199aSMatt Arsenault       if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
69c3d3c22aSMatt Arsenault         // FIXME: This is a horrible hack. We should always respect noinline,
70c3d3c22aSMatt Arsenault         // and just let us hit the error when we can't handle this.
71c3d3c22aSMatt Arsenault         //
72c3d3c22aSMatt Arsenault         // Unfortunately, clang adds noinline to all functions at -O0. We have
73*dc6e8dfdSJacob Lambert         // to override this here until that's fixed.
74c3d3c22aSMatt Arsenault         F->removeFnAttr(Attribute::NoInline);
75c3d3c22aSMatt Arsenault 
76a680199aSMatt Arsenault         FuncsToAlwaysInline.insert(F);
77a680199aSMatt Arsenault         Stack.push_back(F);
78a680199aSMatt Arsenault       }
79a680199aSMatt Arsenault 
80a680199aSMatt Arsenault       // No need to look at further users, but we do need to inline any callers.
81a680199aSMatt Arsenault       continue;
82a680199aSMatt Arsenault     }
83a680199aSMatt Arsenault 
8405444417SKazu Hirata     append_range(Stack, U->users());
85a680199aSMatt Arsenault   }
86a680199aSMatt Arsenault }
87a680199aSMatt Arsenault 
884e838ba9SArthur Eubanks static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
89eba80895SNikolay Haustov   std::vector<GlobalAlias*> AliasesToRemove;
90a680199aSMatt Arsenault 
91a680199aSMatt Arsenault   SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
92a680199aSMatt Arsenault   SmallPtrSet<Function *, 8> FuncsToNoInline;
93ca95d441SMatt Arsenault 
94eba80895SNikolay Haustov   for (GlobalAlias &A : M.aliases()) {
95eba80895SNikolay Haustov     if (Function* F = dyn_cast<Function>(A.getAliasee())) {
96eba80895SNikolay Haustov       A.replaceAllUsesWith(F);
97eba80895SNikolay Haustov       AliasesToRemove.push_back(&A);
98eba80895SNikolay Haustov     }
99a680199aSMatt Arsenault 
100a680199aSMatt Arsenault     // FIXME: If the aliasee isn't a function, it's some kind of constant expr
101a680199aSMatt Arsenault     // cast that won't be inlined through.
102eba80895SNikolay Haustov   }
103eba80895SNikolay Haustov 
10489653dfdSStanislav Mekhanoshin   if (GlobalOpt) {
105eba80895SNikolay Haustov     for (GlobalAlias* A : AliasesToRemove) {
106eba80895SNikolay Haustov       A->eraseFromParent();
107eba80895SNikolay Haustov     }
10889653dfdSStanislav Mekhanoshin   }
109eba80895SNikolay Haustov 
110a680199aSMatt Arsenault   // Always force inlining of any function that uses an LDS global address. This
111a680199aSMatt Arsenault   // is something of a workaround because we don't have a way of supporting LDS
112a680199aSMatt Arsenault   // objects defined in functions. LDS is always allocated by a kernel, and it
113a680199aSMatt Arsenault   // is difficult to manage LDS usage if a function may be used by multiple
114a680199aSMatt Arsenault   // kernels.
115a680199aSMatt Arsenault   //
116a680199aSMatt Arsenault   // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
117a680199aSMatt Arsenault   // should only appear when IPO passes manages to move LDs defined in a kernel
118a680199aSMatt Arsenault   // into a single user function.
119a680199aSMatt Arsenault 
120a680199aSMatt Arsenault   for (GlobalVariable &GV : M.globals()) {
121a680199aSMatt Arsenault     // TODO: Region address
122bc561662SMatt Arsenault     unsigned AS = GV.getAddressSpace();
1234973b0c4Shsmahesha     if ((AS == AMDGPUAS::REGION_ADDRESS) ||
1244973b0c4Shsmahesha         (AS == AMDGPUAS::LOCAL_ADDRESS &&
125106959acSVang Thao          (!AMDGPUTargetMachine::EnableLowerModuleLDS || !GV.hasInitializer())))
126a680199aSMatt Arsenault       recursivelyVisitUsers(GV, FuncsToAlwaysInline);
127a680199aSMatt Arsenault   }
128a680199aSMatt Arsenault 
129a680199aSMatt Arsenault   if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
1301390af2dSMatt Arsenault     auto IncompatAttr
1311390af2dSMatt Arsenault       = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
1321390af2dSMatt Arsenault 
133ca95d441SMatt Arsenault     for (Function &F : M) {
134a680199aSMatt Arsenault       if (!F.isDeclaration() && !F.use_empty() &&
135a680199aSMatt Arsenault           !F.hasFnAttribute(IncompatAttr)) {
136a680199aSMatt Arsenault         if (StressCalls) {
137a680199aSMatt Arsenault           if (!FuncsToAlwaysInline.count(&F))
138a680199aSMatt Arsenault             FuncsToNoInline.insert(&F);
139a680199aSMatt Arsenault         } else
140a680199aSMatt Arsenault           FuncsToAlwaysInline.insert(&F);
141a680199aSMatt Arsenault       }
142a680199aSMatt Arsenault     }
14345bb48eaSTom Stellard   }
14445bb48eaSTom Stellard 
145a680199aSMatt Arsenault   for (Function *F : FuncsToAlwaysInline)
146a680199aSMatt Arsenault     F->addFnAttr(Attribute::AlwaysInline);
14745bb48eaSTom Stellard 
148a680199aSMatt Arsenault   for (Function *F : FuncsToNoInline)
149a680199aSMatt Arsenault     F->addFnAttr(Attribute::NoInline);
150a680199aSMatt Arsenault 
151a680199aSMatt Arsenault   return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
15245bb48eaSTom Stellard }
15345bb48eaSTom Stellard 
1544e838ba9SArthur Eubanks bool AMDGPUAlwaysInline::runOnModule(Module &M) {
1554e838ba9SArthur Eubanks   return alwaysInlineImpl(M, GlobalOpt);
1564e838ba9SArthur Eubanks }
1574e838ba9SArthur Eubanks 
15889653dfdSStanislav Mekhanoshin ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
15989653dfdSStanislav Mekhanoshin   return new AMDGPUAlwaysInline(GlobalOpt);
16045bb48eaSTom Stellard }
161a680199aSMatt Arsenault 
1624e838ba9SArthur Eubanks PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M,
1634e838ba9SArthur Eubanks                                               ModuleAnalysisManager &AM) {
1644e838ba9SArthur Eubanks   alwaysInlineImpl(M, GlobalOpt);
1654e838ba9SArthur Eubanks   return PreservedAnalyses::all();
1664e838ba9SArthur Eubanks }
167