18f0fd8f6SDimitry Andric //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
28f0fd8f6SDimitry Andric //
38f0fd8f6SDimitry Andric // The LLVM Compiler Infrastructure
48f0fd8f6SDimitry Andric //
58f0fd8f6SDimitry Andric // This file is distributed under the University of Illinois Open Source
68f0fd8f6SDimitry Andric // License. See LICENSE.TXT for details.
78f0fd8f6SDimitry Andric //
88f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
98f0fd8f6SDimitry Andric //
108f0fd8f6SDimitry Andric /// \file
118f0fd8f6SDimitry Andric /// This pass marks all internal functions as always_inline and creates
1224d58133SDimitry Andric /// duplicates of all other functions and marks the duplicates as always_inline.
138f0fd8f6SDimitry Andric //
148f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
158f0fd8f6SDimitry Andric
168f0fd8f6SDimitry Andric #include "AMDGPU.h"
174ba319b5SDimitry Andric #include "AMDGPUTargetMachine.h"
184ba319b5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
194ba319b5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
208f0fd8f6SDimitry Andric #include "llvm/IR/Module.h"
218f0fd8f6SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
228f0fd8f6SDimitry Andric
238f0fd8f6SDimitry Andric using namespace llvm;
248f0fd8f6SDimitry Andric
258f0fd8f6SDimitry Andric namespace {
268f0fd8f6SDimitry Andric
272cab237bSDimitry Andric static cl::opt<bool> StressCalls(
282cab237bSDimitry Andric "amdgpu-stress-function-calls",
292cab237bSDimitry Andric cl::Hidden,
302cab237bSDimitry Andric cl::desc("Force all functions to be noinline"),
312cab237bSDimitry Andric cl::init(false));
322cab237bSDimitry Andric
338f0fd8f6SDimitry Andric class AMDGPUAlwaysInline : public ModulePass {
347a7e6055SDimitry Andric bool GlobalOpt;
357a7e6055SDimitry Andric
364ba319b5SDimitry Andric void recursivelyVisitUsers(GlobalValue &GV,
374ba319b5SDimitry Andric SmallPtrSetImpl<Function *> &FuncsToAlwaysInline);
388f0fd8f6SDimitry Andric public:
396d97bb29SDimitry Andric static char ID;
406d97bb29SDimitry Andric
AMDGPUAlwaysInline(bool GlobalOpt=false)416d97bb29SDimitry Andric AMDGPUAlwaysInline(bool GlobalOpt = false) :
426d97bb29SDimitry Andric ModulePass(ID), GlobalOpt(GlobalOpt) { }
438f0fd8f6SDimitry Andric bool runOnModule(Module &M) override;
444ba319b5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const454ba319b5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
464ba319b5SDimitry Andric AU.setPreservesAll();
474ba319b5SDimitry Andric }
488f0fd8f6SDimitry Andric };
498f0fd8f6SDimitry Andric
508f0fd8f6SDimitry Andric } // End anonymous namespace
518f0fd8f6SDimitry Andric
526d97bb29SDimitry Andric INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
536d97bb29SDimitry Andric "AMDGPU Inline All Functions", false, false)
546d97bb29SDimitry Andric
558f0fd8f6SDimitry Andric char AMDGPUAlwaysInline::ID = 0;
568f0fd8f6SDimitry Andric
recursivelyVisitUsers(GlobalValue & GV,SmallPtrSetImpl<Function * > & FuncsToAlwaysInline)574ba319b5SDimitry Andric void AMDGPUAlwaysInline::recursivelyVisitUsers(
584ba319b5SDimitry Andric GlobalValue &GV,
594ba319b5SDimitry Andric SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
604ba319b5SDimitry Andric SmallVector<User *, 16> Stack;
614ba319b5SDimitry Andric
624ba319b5SDimitry Andric SmallPtrSet<const Value *, 8> Visited;
634ba319b5SDimitry Andric
644ba319b5SDimitry Andric for (User *U : GV.users())
654ba319b5SDimitry Andric Stack.push_back(U);
664ba319b5SDimitry Andric
674ba319b5SDimitry Andric while (!Stack.empty()) {
684ba319b5SDimitry Andric User *U = Stack.pop_back_val();
694ba319b5SDimitry Andric if (!Visited.insert(U).second)
704ba319b5SDimitry Andric continue;
714ba319b5SDimitry Andric
724ba319b5SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(U)) {
734ba319b5SDimitry Andric Function *F = I->getParent()->getParent();
744ba319b5SDimitry Andric if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
754ba319b5SDimitry Andric FuncsToAlwaysInline.insert(F);
764ba319b5SDimitry Andric Stack.push_back(F);
774ba319b5SDimitry Andric }
784ba319b5SDimitry Andric
794ba319b5SDimitry Andric // No need to look at further users, but we do need to inline any callers.
804ba319b5SDimitry Andric continue;
814ba319b5SDimitry Andric }
824ba319b5SDimitry Andric
834ba319b5SDimitry Andric for (User *UU : U->users())
844ba319b5SDimitry Andric Stack.push_back(UU);
854ba319b5SDimitry Andric }
864ba319b5SDimitry Andric }
874ba319b5SDimitry Andric
runOnModule(Module & M)888f0fd8f6SDimitry Andric bool AMDGPUAlwaysInline::runOnModule(Module &M) {
89d88c1a5aSDimitry Andric std::vector<GlobalAlias*> AliasesToRemove;
904ba319b5SDimitry Andric
914ba319b5SDimitry Andric SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
924ba319b5SDimitry Andric SmallPtrSet<Function *, 8> FuncsToNoInline;
93875ed548SDimitry Andric
94d88c1a5aSDimitry Andric for (GlobalAlias &A : M.aliases()) {
95d88c1a5aSDimitry Andric if (Function* F = dyn_cast<Function>(A.getAliasee())) {
96d88c1a5aSDimitry Andric A.replaceAllUsesWith(F);
97d88c1a5aSDimitry Andric AliasesToRemove.push_back(&A);
98d88c1a5aSDimitry Andric }
994ba319b5SDimitry Andric
1004ba319b5SDimitry Andric // FIXME: If the aliasee isn't a function, it's some kind of constant expr
1014ba319b5SDimitry Andric // cast that won't be inlined through.
102d88c1a5aSDimitry Andric }
103d88c1a5aSDimitry Andric
1047a7e6055SDimitry Andric if (GlobalOpt) {
105d88c1a5aSDimitry Andric for (GlobalAlias* A : AliasesToRemove) {
106d88c1a5aSDimitry Andric A->eraseFromParent();
107d88c1a5aSDimitry Andric }
1087a7e6055SDimitry Andric }
109d88c1a5aSDimitry Andric
1104ba319b5SDimitry Andric // Always force inlining of any function that uses an LDS global address. This
1114ba319b5SDimitry Andric // is something of a workaround because we don't have a way of supporting LDS
1124ba319b5SDimitry Andric // objects defined in functions. LDS is always allocated by a kernel, and it
1134ba319b5SDimitry Andric // is difficult to manage LDS usage if a function may be used by multiple
1144ba319b5SDimitry Andric // kernels.
1154ba319b5SDimitry Andric //
1164ba319b5SDimitry Andric // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
1174ba319b5SDimitry Andric // should only appear when IPO passes manages to move LDs defined in a kernel
1184ba319b5SDimitry Andric // into a single user function.
1194ba319b5SDimitry Andric
1204ba319b5SDimitry Andric for (GlobalVariable &GV : M.globals()) {
1214ba319b5SDimitry Andric // TODO: Region address
1224ba319b5SDimitry Andric unsigned AS = GV.getType()->getAddressSpace();
123*b5893f02SDimitry Andric if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)
1244ba319b5SDimitry Andric continue;
1254ba319b5SDimitry Andric
1264ba319b5SDimitry Andric recursivelyVisitUsers(GV, FuncsToAlwaysInline);
1274ba319b5SDimitry Andric }
1284ba319b5SDimitry Andric
1294ba319b5SDimitry Andric if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
1302cab237bSDimitry Andric auto IncompatAttr
1312cab237bSDimitry Andric = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
1322cab237bSDimitry Andric
133875ed548SDimitry Andric for (Function &F : M) {
1344ba319b5SDimitry Andric if (!F.isDeclaration() && !F.use_empty() &&
1354ba319b5SDimitry Andric !F.hasFnAttribute(IncompatAttr)) {
1364ba319b5SDimitry Andric if (StressCalls) {
1374ba319b5SDimitry Andric if (!FuncsToAlwaysInline.count(&F))
1384ba319b5SDimitry Andric FuncsToNoInline.insert(&F);
1394ba319b5SDimitry Andric } else
1404ba319b5SDimitry Andric FuncsToAlwaysInline.insert(&F);
1414ba319b5SDimitry Andric }
1424ba319b5SDimitry Andric }
1438f0fd8f6SDimitry Andric }
1448f0fd8f6SDimitry Andric
1454ba319b5SDimitry Andric for (Function *F : FuncsToAlwaysInline)
1464ba319b5SDimitry Andric F->addFnAttr(Attribute::AlwaysInline);
1478f0fd8f6SDimitry Andric
1484ba319b5SDimitry Andric for (Function *F : FuncsToNoInline)
1494ba319b5SDimitry Andric F->addFnAttr(Attribute::NoInline);
1504ba319b5SDimitry Andric
1514ba319b5SDimitry Andric return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
1528f0fd8f6SDimitry Andric }
1538f0fd8f6SDimitry Andric
createAMDGPUAlwaysInlinePass(bool GlobalOpt)1547a7e6055SDimitry Andric ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
1557a7e6055SDimitry Andric return new AMDGPUAlwaysInline(GlobalOpt);
1568f0fd8f6SDimitry Andric }
1574ba319b5SDimitry Andric
158