1 //===-- Internalize.cpp - Mark functions internal -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass loops over all of the functions and variables in the input module. 11 // If the function or variable is not in the list of external names given to 12 // the pass it is marked as internal. 13 // 14 // This transformation would not be legal in a regular compilation, but it gets 15 // extra information from the linker about what is safe. 16 // 17 // For example: Internalizing a function with external linkage. Only if we are 18 // told it is only used from within this module, it is safe to do it. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "llvm/Transforms/IPO.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/ADT/StringSet.h" 26 #include "llvm/Analysis/CallGraph.h" 27 #include "llvm/IR/Module.h" 28 #include "llvm/Pass.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/raw_ostream.h" 32 #include "llvm/Transforms/Utils/GlobalStatus.h" 33 #include "llvm/Transforms/Utils/ModuleUtils.h" 34 #include <fstream> 35 #include <set> 36 using namespace llvm; 37 38 #define DEBUG_TYPE "internalize" 39 40 STATISTIC(NumAliases , "Number of aliases internalized"); 41 STATISTIC(NumFunctions, "Number of functions internalized"); 42 STATISTIC(NumGlobals , "Number of global vars internalized"); 43 44 // APIFile - A file which contains a list of symbols that should not be marked 45 // external. 46 static cl::opt<std::string> 47 APIFile("internalize-public-api-file", cl::value_desc("filename"), 48 cl::desc("A file containing list of symbol names to preserve")); 49 50 // APIList - A list of symbols that should not be marked internal. 51 static cl::list<std::string> 52 APIList("internalize-public-api-list", cl::value_desc("list"), 53 cl::desc("A list of symbol names to preserve"), 54 cl::CommaSeparated); 55 56 namespace { 57 class InternalizePass : public ModulePass { 58 StringSet<> ExternalNames; 59 60 public: 61 static char ID; // Pass identification, replacement for typeid 62 explicit InternalizePass(); 63 explicit InternalizePass(ArrayRef<const char *> ExportList); 64 explicit InternalizePass(StringSet<> ExportList); 65 void LoadFile(const char *Filename); 66 bool maybeInternalize(GlobalValue &GV, 67 const std::set<const Comdat *> &ExternalComdats); 68 void checkComdatVisibility(GlobalValue &GV, 69 std::set<const Comdat *> &ExternalComdats); 70 bool runOnModule(Module &M) override; 71 72 void getAnalysisUsage(AnalysisUsage &AU) const override { 73 AU.setPreservesCFG(); 74 AU.addPreserved<CallGraphWrapperPass>(); 75 } 76 }; 77 } // end anonymous namespace 78 79 char InternalizePass::ID = 0; 80 INITIALIZE_PASS(InternalizePass, "internalize", 81 "Internalize Global Symbols", false, false) 82 83 InternalizePass::InternalizePass() : ModulePass(ID) { 84 initializeInternalizePassPass(*PassRegistry::getPassRegistry()); 85 if (!APIFile.empty()) // If a filename is specified, use it. 86 LoadFile(APIFile.c_str()); 87 ExternalNames.insert(APIList.begin(), APIList.end()); 88 } 89 90 InternalizePass::InternalizePass(ArrayRef<const char *> ExportList) 91 : ModulePass(ID) { 92 initializeInternalizePassPass(*PassRegistry::getPassRegistry()); 93 for(ArrayRef<const char *>::const_iterator itr = ExportList.begin(); 94 itr != ExportList.end(); itr++) { 95 ExternalNames.insert(*itr); 96 } 97 } 98 99 InternalizePass::InternalizePass(StringSet<> ExportList) 100 : ModulePass(ID), ExternalNames(std::move(ExportList)) {} 101 102 void InternalizePass::LoadFile(const char *Filename) { 103 // Load the APIFile... 104 std::ifstream In(Filename); 105 if (!In.good()) { 106 errs() << "WARNING: Internalize couldn't load file '" << Filename 107 << "'! Continuing as if it's empty.\n"; 108 return; // Just continue as if the file were empty 109 } 110 while (In) { 111 std::string Symbol; 112 In >> Symbol; 113 if (!Symbol.empty()) 114 ExternalNames.insert(Symbol); 115 } 116 } 117 118 static bool isExternallyVisible(const GlobalValue &GV, 119 const StringSet<> &ExternalNames) { 120 // Function must be defined here 121 if (GV.isDeclaration()) 122 return true; 123 124 // Available externally is really just a "declaration with a body". 125 if (GV.hasAvailableExternallyLinkage()) 126 return true; 127 128 // Assume that dllexported symbols are referenced elsewhere 129 if (GV.hasDLLExportStorageClass()) 130 return true; 131 132 // Marked to keep external? 133 if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName())) 134 return true; 135 136 return false; 137 } 138 139 // Internalize GV if it is possible to do so, i.e. it is not externally visible 140 // and is not a member of an externally visible comdat. 141 bool InternalizePass::maybeInternalize( 142 GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) { 143 if (Comdat *C = GV.getComdat()) { 144 if (ExternalComdats.count(C)) 145 return false; 146 147 // If a comdat is not externally visible we can drop it. 148 if (auto GO = dyn_cast<GlobalObject>(&GV)) 149 GO->setComdat(nullptr); 150 151 if (GV.hasLocalLinkage()) 152 return false; 153 } else { 154 if (GV.hasLocalLinkage()) 155 return false; 156 157 if (isExternallyVisible(GV, ExternalNames)) 158 return false; 159 } 160 161 GV.setVisibility(GlobalValue::DefaultVisibility); 162 GV.setLinkage(GlobalValue::InternalLinkage); 163 return true; 164 } 165 166 // If GV is part of a comdat and is externally visible, keep track of its 167 // comdat so that we don't internalize any of its members. 168 void InternalizePass::checkComdatVisibility( 169 GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) { 170 Comdat *C = GV.getComdat(); 171 if (!C) 172 return; 173 174 if (isExternallyVisible(GV, ExternalNames)) 175 ExternalComdats.insert(C); 176 } 177 178 bool InternalizePass::runOnModule(Module &M) { 179 CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>(); 180 CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; 181 CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; 182 183 SmallPtrSet<GlobalValue *, 8> Used; 184 collectUsedGlobalVariables(M, Used, false); 185 186 // Collect comdat visiblity information for the module. 187 std::set<const Comdat *> ExternalComdats; 188 if (!M.getComdatSymbolTable().empty()) { 189 for (Function &F : M) 190 checkComdatVisibility(F, ExternalComdats); 191 for (GlobalVariable &GV : M.globals()) 192 checkComdatVisibility(GV, ExternalComdats); 193 for (GlobalAlias &GA : M.aliases()) 194 checkComdatVisibility(GA, ExternalComdats); 195 } 196 197 // We must assume that globals in llvm.used have a reference that not even 198 // the linker can see, so we don't internalize them. 199 // For llvm.compiler.used the situation is a bit fuzzy. The assembler and 200 // linker can drop those symbols. If this pass is running as part of LTO, 201 // one might think that it could just drop llvm.compiler.used. The problem 202 // is that even in LTO llvm doesn't see every reference. For example, 203 // we don't see references from function local inline assembly. To be 204 // conservative, we internalize symbols in llvm.compiler.used, but we 205 // keep llvm.compiler.used so that the symbol is not deleted by llvm. 206 for (GlobalValue *V : Used) { 207 ExternalNames.insert(V->getName()); 208 } 209 210 // Mark all functions not in the api as internal. 211 for (Function &I : M) { 212 if (!maybeInternalize(I, ExternalComdats)) 213 continue; 214 215 if (ExternalNode) 216 // Remove a callgraph edge from the external node to this function. 217 ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]); 218 219 ++NumFunctions; 220 DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n"); 221 } 222 223 // Never internalize the llvm.used symbol. It is used to implement 224 // attribute((used)). 225 // FIXME: Shouldn't this just filter on llvm.metadata section?? 226 ExternalNames.insert("llvm.used"); 227 ExternalNames.insert("llvm.compiler.used"); 228 229 // Never internalize anchors used by the machine module info, else the info 230 // won't find them. (see MachineModuleInfo.) 231 ExternalNames.insert("llvm.global_ctors"); 232 ExternalNames.insert("llvm.global_dtors"); 233 ExternalNames.insert("llvm.global.annotations"); 234 235 // Never internalize symbols code-gen inserts. 236 // FIXME: We should probably add this (and the __stack_chk_guard) via some 237 // type of call-back in CodeGen. 238 ExternalNames.insert("__stack_chk_fail"); 239 ExternalNames.insert("__stack_chk_guard"); 240 241 // Mark all global variables with initializers that are not in the api as 242 // internal as well. 243 for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 244 I != E; ++I) { 245 if (!maybeInternalize(*I, ExternalComdats)) 246 continue; 247 248 ++NumGlobals; 249 DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n"); 250 } 251 252 // Mark all aliases that are not in the api as internal as well. 253 for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 254 I != E; ++I) { 255 if (!maybeInternalize(*I, ExternalComdats)) 256 continue; 257 258 ++NumAliases; 259 DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n"); 260 } 261 262 // We do not keep track of whether this pass changed the module because 263 // it adds unnecessary complexity: 264 // 1) This pass will generally be near the start of the pass pipeline, so 265 // there will be no analyses to invalidate. 266 // 2) This pass will most likely end up changing the module and it isn't worth 267 // worrying about optimizing the case where the module is unchanged. 268 return true; 269 } 270 271 ModulePass *llvm::createInternalizePass() { return new InternalizePass(); } 272 273 ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) { 274 return new InternalizePass(ExportList); 275 } 276 277 ModulePass *llvm::createInternalizePass(StringSet<> ExportList) { 278 return new InternalizePass(std::move(ExportList)); 279 } 280