18f0fd8f6SDimitry Andric //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
28f0fd8f6SDimitry Andric //
38f0fd8f6SDimitry Andric //                     The LLVM Compiler Infrastructure
48f0fd8f6SDimitry Andric //
58f0fd8f6SDimitry Andric // This file is distributed under the University of Illinois Open Source
68f0fd8f6SDimitry Andric // License. See LICENSE.TXT for details.
78f0fd8f6SDimitry Andric //
88f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
98f0fd8f6SDimitry Andric //
108f0fd8f6SDimitry Andric /// \file
118f0fd8f6SDimitry Andric /// This pass marks all internal functions as always_inline and creates
1224d58133SDimitry Andric /// duplicates of all other functions and marks the duplicates as always_inline.
138f0fd8f6SDimitry Andric //
148f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
158f0fd8f6SDimitry Andric 
168f0fd8f6SDimitry Andric #include "AMDGPU.h"
174ba319b5SDimitry Andric #include "AMDGPUTargetMachine.h"
184ba319b5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
194ba319b5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
208f0fd8f6SDimitry Andric #include "llvm/IR/Module.h"
218f0fd8f6SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
228f0fd8f6SDimitry Andric 
238f0fd8f6SDimitry Andric using namespace llvm;
248f0fd8f6SDimitry Andric 
258f0fd8f6SDimitry Andric namespace {
268f0fd8f6SDimitry Andric 
272cab237bSDimitry Andric static cl::opt<bool> StressCalls(
282cab237bSDimitry Andric   "amdgpu-stress-function-calls",
292cab237bSDimitry Andric   cl::Hidden,
302cab237bSDimitry Andric   cl::desc("Force all functions to be noinline"),
312cab237bSDimitry Andric   cl::init(false));
322cab237bSDimitry Andric 
338f0fd8f6SDimitry Andric class AMDGPUAlwaysInline : public ModulePass {
347a7e6055SDimitry Andric   bool GlobalOpt;
357a7e6055SDimitry Andric 
364ba319b5SDimitry Andric   void recursivelyVisitUsers(GlobalValue &GV,
374ba319b5SDimitry Andric                              SmallPtrSetImpl<Function *> &FuncsToAlwaysInline);
388f0fd8f6SDimitry Andric public:
396d97bb29SDimitry Andric   static char ID;
406d97bb29SDimitry Andric 
AMDGPUAlwaysInline(bool GlobalOpt=false)416d97bb29SDimitry Andric   AMDGPUAlwaysInline(bool GlobalOpt = false) :
426d97bb29SDimitry Andric     ModulePass(ID), GlobalOpt(GlobalOpt) { }
438f0fd8f6SDimitry Andric   bool runOnModule(Module &M) override;
444ba319b5SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const454ba319b5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
464ba319b5SDimitry Andric     AU.setPreservesAll();
474ba319b5SDimitry Andric  }
488f0fd8f6SDimitry Andric };
498f0fd8f6SDimitry Andric 
508f0fd8f6SDimitry Andric } // End anonymous namespace
518f0fd8f6SDimitry Andric 
526d97bb29SDimitry Andric INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
536d97bb29SDimitry Andric                 "AMDGPU Inline All Functions", false, false)
546d97bb29SDimitry Andric 
558f0fd8f6SDimitry Andric char AMDGPUAlwaysInline::ID = 0;
568f0fd8f6SDimitry Andric 
recursivelyVisitUsers(GlobalValue & GV,SmallPtrSetImpl<Function * > & FuncsToAlwaysInline)574ba319b5SDimitry Andric void AMDGPUAlwaysInline::recursivelyVisitUsers(
584ba319b5SDimitry Andric   GlobalValue &GV,
594ba319b5SDimitry Andric   SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
604ba319b5SDimitry Andric   SmallVector<User *, 16> Stack;
614ba319b5SDimitry Andric 
624ba319b5SDimitry Andric   SmallPtrSet<const Value *, 8> Visited;
634ba319b5SDimitry Andric 
644ba319b5SDimitry Andric   for (User *U : GV.users())
654ba319b5SDimitry Andric     Stack.push_back(U);
664ba319b5SDimitry Andric 
674ba319b5SDimitry Andric   while (!Stack.empty()) {
684ba319b5SDimitry Andric     User *U = Stack.pop_back_val();
694ba319b5SDimitry Andric     if (!Visited.insert(U).second)
704ba319b5SDimitry Andric       continue;
714ba319b5SDimitry Andric 
724ba319b5SDimitry Andric     if (Instruction *I = dyn_cast<Instruction>(U)) {
734ba319b5SDimitry Andric       Function *F = I->getParent()->getParent();
744ba319b5SDimitry Andric       if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
754ba319b5SDimitry Andric         FuncsToAlwaysInline.insert(F);
764ba319b5SDimitry Andric         Stack.push_back(F);
774ba319b5SDimitry Andric       }
784ba319b5SDimitry Andric 
794ba319b5SDimitry Andric       // No need to look at further users, but we do need to inline any callers.
804ba319b5SDimitry Andric       continue;
814ba319b5SDimitry Andric     }
824ba319b5SDimitry Andric 
834ba319b5SDimitry Andric     for (User *UU : U->users())
844ba319b5SDimitry Andric       Stack.push_back(UU);
854ba319b5SDimitry Andric   }
864ba319b5SDimitry Andric }
874ba319b5SDimitry Andric 
runOnModule(Module & M)888f0fd8f6SDimitry Andric bool AMDGPUAlwaysInline::runOnModule(Module &M) {
89d88c1a5aSDimitry Andric   std::vector<GlobalAlias*> AliasesToRemove;
904ba319b5SDimitry Andric 
914ba319b5SDimitry Andric   SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
924ba319b5SDimitry Andric   SmallPtrSet<Function *, 8> FuncsToNoInline;
93875ed548SDimitry Andric 
94d88c1a5aSDimitry Andric   for (GlobalAlias &A : M.aliases()) {
95d88c1a5aSDimitry Andric     if (Function* F = dyn_cast<Function>(A.getAliasee())) {
96d88c1a5aSDimitry Andric       A.replaceAllUsesWith(F);
97d88c1a5aSDimitry Andric       AliasesToRemove.push_back(&A);
98d88c1a5aSDimitry Andric     }
994ba319b5SDimitry Andric 
1004ba319b5SDimitry Andric     // FIXME: If the aliasee isn't a function, it's some kind of constant expr
1014ba319b5SDimitry Andric     // cast that won't be inlined through.
102d88c1a5aSDimitry Andric   }
103d88c1a5aSDimitry Andric 
1047a7e6055SDimitry Andric   if (GlobalOpt) {
105d88c1a5aSDimitry Andric     for (GlobalAlias* A : AliasesToRemove) {
106d88c1a5aSDimitry Andric       A->eraseFromParent();
107d88c1a5aSDimitry Andric     }
1087a7e6055SDimitry Andric   }
109d88c1a5aSDimitry Andric 
1104ba319b5SDimitry Andric   // Always force inlining of any function that uses an LDS global address. This
1114ba319b5SDimitry Andric   // is something of a workaround because we don't have a way of supporting LDS
1124ba319b5SDimitry Andric   // objects defined in functions. LDS is always allocated by a kernel, and it
1134ba319b5SDimitry Andric   // is difficult to manage LDS usage if a function may be used by multiple
1144ba319b5SDimitry Andric   // kernels.
1154ba319b5SDimitry Andric   //
1164ba319b5SDimitry Andric   // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
1174ba319b5SDimitry Andric   // should only appear when IPO passes manages to move LDs defined in a kernel
1184ba319b5SDimitry Andric   // into a single user function.
1194ba319b5SDimitry Andric 
1204ba319b5SDimitry Andric   for (GlobalVariable &GV : M.globals()) {
1214ba319b5SDimitry Andric     // TODO: Region address
1224ba319b5SDimitry Andric     unsigned AS = GV.getType()->getAddressSpace();
123*b5893f02SDimitry Andric     if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)
1244ba319b5SDimitry Andric       continue;
1254ba319b5SDimitry Andric 
1264ba319b5SDimitry Andric     recursivelyVisitUsers(GV, FuncsToAlwaysInline);
1274ba319b5SDimitry Andric   }
1284ba319b5SDimitry Andric 
1294ba319b5SDimitry Andric   if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
1302cab237bSDimitry Andric     auto IncompatAttr
1312cab237bSDimitry Andric       = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
1322cab237bSDimitry Andric 
133875ed548SDimitry Andric     for (Function &F : M) {
1344ba319b5SDimitry Andric       if (!F.isDeclaration() && !F.use_empty() &&
1354ba319b5SDimitry Andric           !F.hasFnAttribute(IncompatAttr)) {
1364ba319b5SDimitry Andric         if (StressCalls) {
1374ba319b5SDimitry Andric           if (!FuncsToAlwaysInline.count(&F))
1384ba319b5SDimitry Andric             FuncsToNoInline.insert(&F);
1394ba319b5SDimitry Andric         } else
1404ba319b5SDimitry Andric           FuncsToAlwaysInline.insert(&F);
1414ba319b5SDimitry Andric       }
1424ba319b5SDimitry Andric     }
1438f0fd8f6SDimitry Andric   }
1448f0fd8f6SDimitry Andric 
1454ba319b5SDimitry Andric   for (Function *F : FuncsToAlwaysInline)
1464ba319b5SDimitry Andric     F->addFnAttr(Attribute::AlwaysInline);
1478f0fd8f6SDimitry Andric 
1484ba319b5SDimitry Andric   for (Function *F : FuncsToNoInline)
1494ba319b5SDimitry Andric     F->addFnAttr(Attribute::NoInline);
1504ba319b5SDimitry Andric 
1514ba319b5SDimitry Andric   return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
1528f0fd8f6SDimitry Andric }
1538f0fd8f6SDimitry Andric 
createAMDGPUAlwaysInlinePass(bool GlobalOpt)1547a7e6055SDimitry Andric ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
1557a7e6055SDimitry Andric   return new AMDGPUAlwaysInline(GlobalOpt);
1568f0fd8f6SDimitry Andric }
1574ba319b5SDimitry Andric 
158