1 //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This pass marks all internal functions as always_inline and creates 12 /// duplicates of all other functions and marks the duplicates as always_inline. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "Utils/AMDGPUBaseInfo.h" 19 #include "llvm/ADT/SmallPtrSet.h" 20 #include "llvm/IR/Module.h" 21 #include "llvm/Transforms/Utils/Cloning.h" 22 23 using namespace llvm; 24 25 namespace { 26 27 static cl::opt<bool> StressCalls( 28 "amdgpu-stress-function-calls", 29 cl::Hidden, 30 cl::desc("Force all functions to be noinline"), 31 cl::init(false)); 32 33 class AMDGPUAlwaysInline : public ModulePass { 34 bool GlobalOpt; 35 36 void recursivelyVisitUsers(GlobalValue &GV, 37 SmallPtrSetImpl<Function *> &FuncsToAlwaysInline); 38 public: 39 static char ID; 40 41 AMDGPUAlwaysInline(bool GlobalOpt = false) : 42 ModulePass(ID), GlobalOpt(GlobalOpt) { } 43 bool runOnModule(Module &M) override; 44 45 void getAnalysisUsage(AnalysisUsage &AU) const override { 46 AU.setPreservesAll(); 47 } 48 }; 49 50 } // End anonymous namespace 51 52 INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", 53 "AMDGPU Inline All Functions", false, false) 54 55 char AMDGPUAlwaysInline::ID = 0; 56 57 void AMDGPUAlwaysInline::recursivelyVisitUsers( 58 GlobalValue &GV, 59 SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) { 60 SmallVector<User *, 16> Stack; 61 62 SmallPtrSet<const Value *, 8> Visited; 63 64 for (User *U : GV.users()) 65 Stack.push_back(U); 66 67 while (!Stack.empty()) { 68 User *U = Stack.pop_back_val(); 69 if (!Visited.insert(U).second) 70 continue; 71 72 if (Instruction *I = dyn_cast<Instruction>(U)) { 73 Function *F = I->getParent()->getParent(); 74 if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { 75 FuncsToAlwaysInline.insert(F); 76 Stack.push_back(F); 77 } 78 79 // No need to look at further users, but we do need to inline any callers. 80 continue; 81 } 82 83 for (User *UU : U->users()) 84 Stack.push_back(UU); 85 } 86 } 87 88 bool AMDGPUAlwaysInline::runOnModule(Module &M) { 89 AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M); 90 91 std::vector<GlobalAlias*> AliasesToRemove; 92 93 SmallPtrSet<Function *, 8> FuncsToAlwaysInline; 94 SmallPtrSet<Function *, 8> FuncsToNoInline; 95 96 for (GlobalAlias &A : M.aliases()) { 97 if (Function* F = dyn_cast<Function>(A.getAliasee())) { 98 A.replaceAllUsesWith(F); 99 AliasesToRemove.push_back(&A); 100 } 101 102 // FIXME: If the aliasee isn't a function, it's some kind of constant expr 103 // cast that won't be inlined through. 104 } 105 106 if (GlobalOpt) { 107 for (GlobalAlias* A : AliasesToRemove) { 108 A->eraseFromParent(); 109 } 110 } 111 112 // Always force inlining of any function that uses an LDS global address. This 113 // is something of a workaround because we don't have a way of supporting LDS 114 // objects defined in functions. LDS is always allocated by a kernel, and it 115 // is difficult to manage LDS usage if a function may be used by multiple 116 // kernels. 117 // 118 // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this 119 // should only appear when IPO passes manages to move LDs defined in a kernel 120 // into a single user function. 121 122 for (GlobalVariable &GV : M.globals()) { 123 // TODO: Region address 124 unsigned AS = GV.getType()->getAddressSpace(); 125 if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS) 126 continue; 127 128 recursivelyVisitUsers(GV, FuncsToAlwaysInline); 129 } 130 131 if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { 132 auto IncompatAttr 133 = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; 134 135 for (Function &F : M) { 136 if (!F.isDeclaration() && !F.use_empty() && 137 !F.hasFnAttribute(IncompatAttr)) { 138 if (StressCalls) { 139 if (!FuncsToAlwaysInline.count(&F)) 140 FuncsToNoInline.insert(&F); 141 } else 142 FuncsToAlwaysInline.insert(&F); 143 } 144 } 145 } 146 147 for (Function *F : FuncsToAlwaysInline) 148 F->addFnAttr(Attribute::AlwaysInline); 149 150 for (Function *F : FuncsToNoInline) 151 F->addFnAttr(Attribute::NoInline); 152 153 return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); 154 } 155 156 ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { 157 return new AMDGPUAlwaysInline(GlobalOpt); 158 } 159 160