1 //===-- Internalize.cpp - Mark functions internal -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass loops over all of the functions and variables in the input module. 11 // If the function or variable is not in the list of external names given to 12 // the pass it is marked as internal. 13 // 14 // This transformation would not be legal in a regular compilation, but it gets 15 // extra information from the linker about what is safe. 16 // 17 // For example: Internalizing a function with external linkage. Only if we are 18 // told it is only used from within this module, it is safe to do it. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "llvm/Transforms/IPO.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/Analysis/CallGraph.h" 26 #include "llvm/IR/Module.h" 27 #include "llvm/Pass.h" 28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include "llvm/Transforms/Utils/GlobalStatus.h" 32 #include "llvm/Transforms/Utils/ModuleUtils.h" 33 #include <fstream> 34 #include <set> 35 using namespace llvm; 36 37 #define DEBUG_TYPE "internalize" 38 39 STATISTIC(NumAliases , "Number of aliases internalized"); 40 STATISTIC(NumFunctions, "Number of functions internalized"); 41 STATISTIC(NumGlobals , "Number of global vars internalized"); 42 43 // APIFile - A file which contains a list of symbols that should not be marked 44 // external. 45 static cl::opt<std::string> 46 APIFile("internalize-public-api-file", cl::value_desc("filename"), 47 cl::desc("A file containing list of symbol names to preserve")); 48 49 // APIList - A list of symbols that should not be marked internal. 50 static cl::list<std::string> 51 APIList("internalize-public-api-list", cl::value_desc("list"), 52 cl::desc("A list of symbol names to preserve"), 53 cl::CommaSeparated); 54 55 namespace { 56 class InternalizePass : public ModulePass { 57 std::set<std::string> ExternalNames; 58 public: 59 static char ID; // Pass identification, replacement for typeid 60 explicit InternalizePass(); 61 explicit InternalizePass(ArrayRef<const char *> ExportList); 62 void LoadFile(const char *Filename); 63 bool maybeInternalize(GlobalValue &GV, 64 const std::set<const Comdat *> &ExternalComdats); 65 void checkComdatVisibility(GlobalValue &GV, 66 std::set<const Comdat *> &ExternalComdats); 67 bool runOnModule(Module &M) override; 68 69 void getAnalysisUsage(AnalysisUsage &AU) const override { 70 AU.setPreservesCFG(); 71 AU.addPreserved<CallGraphWrapperPass>(); 72 } 73 }; 74 } // end anonymous namespace 75 76 char InternalizePass::ID = 0; 77 INITIALIZE_PASS(InternalizePass, "internalize", 78 "Internalize Global Symbols", false, false) 79 80 InternalizePass::InternalizePass() : ModulePass(ID) { 81 initializeInternalizePassPass(*PassRegistry::getPassRegistry()); 82 if (!APIFile.empty()) // If a filename is specified, use it. 83 LoadFile(APIFile.c_str()); 84 ExternalNames.insert(APIList.begin(), APIList.end()); 85 } 86 87 InternalizePass::InternalizePass(ArrayRef<const char *> ExportList) 88 : ModulePass(ID) { 89 initializeInternalizePassPass(*PassRegistry::getPassRegistry()); 90 for(ArrayRef<const char *>::const_iterator itr = ExportList.begin(); 91 itr != ExportList.end(); itr++) { 92 ExternalNames.insert(*itr); 93 } 94 } 95 96 void InternalizePass::LoadFile(const char *Filename) { 97 // Load the APIFile... 98 std::ifstream In(Filename); 99 if (!In.good()) { 100 errs() << "WARNING: Internalize couldn't load file '" << Filename 101 << "'! Continuing as if it's empty.\n"; 102 return; // Just continue as if the file were empty 103 } 104 while (In) { 105 std::string Symbol; 106 In >> Symbol; 107 if (!Symbol.empty()) 108 ExternalNames.insert(Symbol); 109 } 110 } 111 112 static bool isExternallyVisible(const GlobalValue &GV, 113 const std::set<std::string> &ExternalNames) { 114 // Function must be defined here 115 if (GV.isDeclaration()) 116 return true; 117 118 // Available externally is really just a "declaration with a body". 119 if (GV.hasAvailableExternallyLinkage()) 120 return true; 121 122 // Assume that dllexported symbols are referenced elsewhere 123 if (GV.hasDLLExportStorageClass()) 124 return true; 125 126 // Marked to keep external? 127 if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName())) 128 return true; 129 130 return false; 131 } 132 133 // Internalize GV if it is possible to do so, i.e. it is not externally visible 134 // and is not a member of an externally visible comdat. 135 bool InternalizePass::maybeInternalize( 136 GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) { 137 if (Comdat *C = GV.getComdat()) { 138 if (ExternalComdats.count(C)) 139 return false; 140 141 // If a comdat is not externally visible we can drop it. 142 if (auto GO = dyn_cast<GlobalObject>(&GV)) 143 GO->setComdat(nullptr); 144 145 if (GV.hasLocalLinkage()) 146 return false; 147 } else { 148 if (GV.hasLocalLinkage()) 149 return false; 150 151 if (isExternallyVisible(GV, ExternalNames)) 152 return false; 153 } 154 155 GV.setVisibility(GlobalValue::DefaultVisibility); 156 GV.setLinkage(GlobalValue::InternalLinkage); 157 return true; 158 } 159 160 // If GV is part of a comdat and is externally visible, keep track of its 161 // comdat so that we don't internalize any of its members. 162 void InternalizePass::checkComdatVisibility( 163 GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) { 164 Comdat *C = GV.getComdat(); 165 if (!C) 166 return; 167 168 if (isExternallyVisible(GV, ExternalNames)) 169 ExternalComdats.insert(C); 170 } 171 172 bool InternalizePass::runOnModule(Module &M) { 173 CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>(); 174 CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; 175 CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; 176 177 SmallPtrSet<GlobalValue *, 8> Used; 178 collectUsedGlobalVariables(M, Used, false); 179 180 // Collect comdat visiblity information for the module. 181 std::set<const Comdat *> ExternalComdats; 182 if (!M.getComdatSymbolTable().empty()) { 183 for (Function &F : M) 184 checkComdatVisibility(F, ExternalComdats); 185 for (GlobalVariable &GV : M.globals()) 186 checkComdatVisibility(GV, ExternalComdats); 187 for (GlobalAlias &GA : M.aliases()) 188 checkComdatVisibility(GA, ExternalComdats); 189 } 190 191 // We must assume that globals in llvm.used have a reference that not even 192 // the linker can see, so we don't internalize them. 193 // For llvm.compiler.used the situation is a bit fuzzy. The assembler and 194 // linker can drop those symbols. If this pass is running as part of LTO, 195 // one might think that it could just drop llvm.compiler.used. The problem 196 // is that even in LTO llvm doesn't see every reference. For example, 197 // we don't see references from function local inline assembly. To be 198 // conservative, we internalize symbols in llvm.compiler.used, but we 199 // keep llvm.compiler.used so that the symbol is not deleted by llvm. 200 for (GlobalValue *V : Used) { 201 ExternalNames.insert(V->getName()); 202 } 203 204 // Mark all functions not in the api as internal. 205 for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { 206 if (!maybeInternalize(*I, ExternalComdats)) 207 continue; 208 209 if (ExternalNode) 210 // Remove a callgraph edge from the external node to this function. 211 ExternalNode->removeOneAbstractEdgeTo((*CG)[I]); 212 213 ++NumFunctions; 214 DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n"); 215 } 216 217 // Never internalize the llvm.used symbol. It is used to implement 218 // attribute((used)). 219 // FIXME: Shouldn't this just filter on llvm.metadata section?? 220 ExternalNames.insert("llvm.used"); 221 ExternalNames.insert("llvm.compiler.used"); 222 223 // Never internalize anchors used by the machine module info, else the info 224 // won't find them. (see MachineModuleInfo.) 225 ExternalNames.insert("llvm.global_ctors"); 226 ExternalNames.insert("llvm.global_dtors"); 227 ExternalNames.insert("llvm.global.annotations"); 228 229 // Never internalize symbols code-gen inserts. 230 // FIXME: We should probably add this (and the __stack_chk_guard) via some 231 // type of call-back in CodeGen. 232 ExternalNames.insert("__stack_chk_fail"); 233 ExternalNames.insert("__stack_chk_guard"); 234 235 // Mark all global variables with initializers that are not in the api as 236 // internal as well. 237 for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 238 I != E; ++I) { 239 if (!maybeInternalize(*I, ExternalComdats)) 240 continue; 241 242 ++NumGlobals; 243 DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n"); 244 } 245 246 // Mark all aliases that are not in the api as internal as well. 247 for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 248 I != E; ++I) { 249 if (!maybeInternalize(*I, ExternalComdats)) 250 continue; 251 252 ++NumAliases; 253 DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n"); 254 } 255 256 // We do not keep track of whether this pass changed the module because 257 // it adds unnecessary complexity: 258 // 1) This pass will generally be near the start of the pass pipeline, so 259 // there will be no analyses to invalidate. 260 // 2) This pass will most likely end up changing the module and it isn't worth 261 // worrying about optimizing the case where the module is unchanged. 262 return true; 263 } 264 265 ModulePass *llvm::createInternalizePass() { return new InternalizePass(); } 266 267 ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) { 268 return new InternalizePass(ExportList); 269 } 270