145bb48eaSTom Stellard //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// 245bb48eaSTom Stellard // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 645bb48eaSTom Stellard // 745bb48eaSTom Stellard //===----------------------------------------------------------------------===// 845bb48eaSTom Stellard // 945bb48eaSTom Stellard /// \file 1045bb48eaSTom Stellard /// This pass marks all internal functions as always_inline and creates 11f9b521fdSAlfred Huang /// duplicates of all other functions and marks the duplicates as always_inline. 1245bb48eaSTom Stellard // 1345bb48eaSTom Stellard //===----------------------------------------------------------------------===// 1445bb48eaSTom Stellard 1545bb48eaSTom Stellard #include "AMDGPU.h" 16a680199aSMatt Arsenault #include "AMDGPUTargetMachine.h" 17a680199aSMatt Arsenault #include "Utils/AMDGPUBaseInfo.h" 1845bb48eaSTom Stellard #include "llvm/IR/Module.h" 196a87e9b0Sdfukalov #include "llvm/Pass.h" 206a87e9b0Sdfukalov #include "llvm/Support/CommandLine.h" 2145bb48eaSTom Stellard 2245bb48eaSTom Stellard using namespace llvm; 2345bb48eaSTom Stellard 2445bb48eaSTom Stellard namespace { 2545bb48eaSTom Stellard 261390af2dSMatt Arsenault static cl::opt<bool> StressCalls( 271390af2dSMatt Arsenault "amdgpu-stress-function-calls", 281390af2dSMatt Arsenault cl::Hidden, 291390af2dSMatt Arsenault cl::desc("Force all functions to be noinline"), 301390af2dSMatt Arsenault cl::init(false)); 311390af2dSMatt Arsenault 3245bb48eaSTom Stellard class AMDGPUAlwaysInline : public ModulePass { 3389653dfdSStanislav Mekhanoshin bool GlobalOpt; 3489653dfdSStanislav Mekhanoshin 3545bb48eaSTom Stellard public: 36746e0657SMatt Arsenault static char ID; 37746e0657SMatt Arsenault 38746e0657SMatt Arsenault AMDGPUAlwaysInline(bool GlobalOpt = false) : 39746e0657SMatt Arsenault ModulePass(ID), GlobalOpt(GlobalOpt) { } 4045bb48eaSTom Stellard bool runOnModule(Module &M) override; 41a680199aSMatt Arsenault 42a680199aSMatt Arsenault void getAnalysisUsage(AnalysisUsage &AU) const override { 43a680199aSMatt Arsenault AU.setPreservesAll(); 44a680199aSMatt Arsenault } 4545bb48eaSTom Stellard }; 4645bb48eaSTom Stellard 4745bb48eaSTom Stellard } // End anonymous namespace 4845bb48eaSTom Stellard 49746e0657SMatt Arsenault INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", 50746e0657SMatt Arsenault "AMDGPU Inline All Functions", false, false) 51746e0657SMatt Arsenault 5245bb48eaSTom Stellard char AMDGPUAlwaysInline::ID = 0; 5345bb48eaSTom Stellard 544e838ba9SArthur Eubanks static void 554e838ba9SArthur Eubanks recursivelyVisitUsers(GlobalValue &GV, 56a680199aSMatt Arsenault SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) { 5719aacdb7SKazu Hirata SmallVector<User *, 16> Stack(GV.users()); 58a680199aSMatt Arsenault 59a680199aSMatt Arsenault SmallPtrSet<const Value *, 8> Visited; 60a680199aSMatt Arsenault 61a680199aSMatt Arsenault while (!Stack.empty()) { 62a680199aSMatt Arsenault User *U = Stack.pop_back_val(); 63a680199aSMatt Arsenault if (!Visited.insert(U).second) 64a680199aSMatt Arsenault continue; 65a680199aSMatt Arsenault 66a680199aSMatt Arsenault if (Instruction *I = dyn_cast<Instruction>(U)) { 67a680199aSMatt Arsenault Function *F = I->getParent()->getParent(); 68a680199aSMatt Arsenault if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { 69c3d3c22aSMatt Arsenault // FIXME: This is a horrible hack. We should always respect noinline, 70c3d3c22aSMatt Arsenault // and just let us hit the error when we can't handle this. 71c3d3c22aSMatt Arsenault // 72c3d3c22aSMatt Arsenault // Unfortunately, clang adds noinline to all functions at -O0. We have 73*dc6e8dfdSJacob Lambert // to override this here until that's fixed. 74c3d3c22aSMatt Arsenault F->removeFnAttr(Attribute::NoInline); 75c3d3c22aSMatt Arsenault 76a680199aSMatt Arsenault FuncsToAlwaysInline.insert(F); 77a680199aSMatt Arsenault Stack.push_back(F); 78a680199aSMatt Arsenault } 79a680199aSMatt Arsenault 80a680199aSMatt Arsenault // No need to look at further users, but we do need to inline any callers. 81a680199aSMatt Arsenault continue; 82a680199aSMatt Arsenault } 83a680199aSMatt Arsenault 8405444417SKazu Hirata append_range(Stack, U->users()); 85a680199aSMatt Arsenault } 86a680199aSMatt Arsenault } 87a680199aSMatt Arsenault 884e838ba9SArthur Eubanks static bool alwaysInlineImpl(Module &M, bool GlobalOpt) { 89eba80895SNikolay Haustov std::vector<GlobalAlias*> AliasesToRemove; 90a680199aSMatt Arsenault 91a680199aSMatt Arsenault SmallPtrSet<Function *, 8> FuncsToAlwaysInline; 92a680199aSMatt Arsenault SmallPtrSet<Function *, 8> FuncsToNoInline; 93ca95d441SMatt Arsenault 94eba80895SNikolay Haustov for (GlobalAlias &A : M.aliases()) { 95eba80895SNikolay Haustov if (Function* F = dyn_cast<Function>(A.getAliasee())) { 96eba80895SNikolay Haustov A.replaceAllUsesWith(F); 97eba80895SNikolay Haustov AliasesToRemove.push_back(&A); 98eba80895SNikolay Haustov } 99a680199aSMatt Arsenault 100a680199aSMatt Arsenault // FIXME: If the aliasee isn't a function, it's some kind of constant expr 101a680199aSMatt Arsenault // cast that won't be inlined through. 102eba80895SNikolay Haustov } 103eba80895SNikolay Haustov 10489653dfdSStanislav Mekhanoshin if (GlobalOpt) { 105eba80895SNikolay Haustov for (GlobalAlias* A : AliasesToRemove) { 106eba80895SNikolay Haustov A->eraseFromParent(); 107eba80895SNikolay Haustov } 10889653dfdSStanislav Mekhanoshin } 109eba80895SNikolay Haustov 110a680199aSMatt Arsenault // Always force inlining of any function that uses an LDS global address. This 111a680199aSMatt Arsenault // is something of a workaround because we don't have a way of supporting LDS 112a680199aSMatt Arsenault // objects defined in functions. LDS is always allocated by a kernel, and it 113a680199aSMatt Arsenault // is difficult to manage LDS usage if a function may be used by multiple 114a680199aSMatt Arsenault // kernels. 115a680199aSMatt Arsenault // 116a680199aSMatt Arsenault // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this 117a680199aSMatt Arsenault // should only appear when IPO passes manages to move LDs defined in a kernel 118a680199aSMatt Arsenault // into a single user function. 119a680199aSMatt Arsenault 120a680199aSMatt Arsenault for (GlobalVariable &GV : M.globals()) { 121a680199aSMatt Arsenault // TODO: Region address 122bc561662SMatt Arsenault unsigned AS = GV.getAddressSpace(); 1234973b0c4Shsmahesha if ((AS == AMDGPUAS::REGION_ADDRESS) || 1244973b0c4Shsmahesha (AS == AMDGPUAS::LOCAL_ADDRESS && 125106959acSVang Thao (!AMDGPUTargetMachine::EnableLowerModuleLDS || !GV.hasInitializer()))) 126a680199aSMatt Arsenault recursivelyVisitUsers(GV, FuncsToAlwaysInline); 127a680199aSMatt Arsenault } 128a680199aSMatt Arsenault 129a680199aSMatt Arsenault if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { 1301390af2dSMatt Arsenault auto IncompatAttr 1311390af2dSMatt Arsenault = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; 1321390af2dSMatt Arsenault 133ca95d441SMatt Arsenault for (Function &F : M) { 134a680199aSMatt Arsenault if (!F.isDeclaration() && !F.use_empty() && 135a680199aSMatt Arsenault !F.hasFnAttribute(IncompatAttr)) { 136a680199aSMatt Arsenault if (StressCalls) { 137a680199aSMatt Arsenault if (!FuncsToAlwaysInline.count(&F)) 138a680199aSMatt Arsenault FuncsToNoInline.insert(&F); 139a680199aSMatt Arsenault } else 140a680199aSMatt Arsenault FuncsToAlwaysInline.insert(&F); 141a680199aSMatt Arsenault } 142a680199aSMatt Arsenault } 14345bb48eaSTom Stellard } 14445bb48eaSTom Stellard 145a680199aSMatt Arsenault for (Function *F : FuncsToAlwaysInline) 146a680199aSMatt Arsenault F->addFnAttr(Attribute::AlwaysInline); 14745bb48eaSTom Stellard 148a680199aSMatt Arsenault for (Function *F : FuncsToNoInline) 149a680199aSMatt Arsenault F->addFnAttr(Attribute::NoInline); 150a680199aSMatt Arsenault 151a680199aSMatt Arsenault return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); 15245bb48eaSTom Stellard } 15345bb48eaSTom Stellard 1544e838ba9SArthur Eubanks bool AMDGPUAlwaysInline::runOnModule(Module &M) { 1554e838ba9SArthur Eubanks return alwaysInlineImpl(M, GlobalOpt); 1564e838ba9SArthur Eubanks } 1574e838ba9SArthur Eubanks 15889653dfdSStanislav Mekhanoshin ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { 15989653dfdSStanislav Mekhanoshin return new AMDGPUAlwaysInline(GlobalOpt); 16045bb48eaSTom Stellard } 161a680199aSMatt Arsenault 1624e838ba9SArthur Eubanks PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M, 1634e838ba9SArthur Eubanks ModuleAnalysisManager &AM) { 1644e838ba9SArthur Eubanks alwaysInlineImpl(M, GlobalOpt); 1654e838ba9SArthur Eubanks return PreservedAnalyses::all(); 1664e838ba9SArthur Eubanks } 167