1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions perform manipulations on Modules. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Utils/ModuleUtils.h" 14 #include "llvm/IR/DerivedTypes.h" 15 #include "llvm/IR/Function.h" 16 #include "llvm/IR/IRBuilder.h" 17 #include "llvm/IR/Module.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 using namespace llvm; 21 22 static void appendToGlobalArray(const char *Array, Module &M, Function *F, 23 int Priority, Constant *Data) { 24 IRBuilder<> IRB(M.getContext()); 25 FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); 26 27 // Get the current set of static global constructors and add the new ctor 28 // to the list. 29 SmallVector<Constant *, 16> CurrentCtors; 30 StructType *EltTy; 31 if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) { 32 ArrayType *ATy = cast<ArrayType>(GVCtor->getValueType()); 33 StructType *OldEltTy = cast<StructType>(ATy->getElementType()); 34 // Upgrade a 2-field global array type to the new 3-field format if needed. 35 if (Data && OldEltTy->getNumElements() < 3) 36 EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), 37 IRB.getInt8PtrTy()); 38 else 39 EltTy = OldEltTy; 40 if (Constant *Init = GVCtor->getInitializer()) { 41 unsigned n = Init->getNumOperands(); 42 CurrentCtors.reserve(n + 1); 43 for (unsigned i = 0; i != n; ++i) { 44 auto Ctor = cast<Constant>(Init->getOperand(i)); 45 if (EltTy != OldEltTy) 46 Ctor = 47 ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0), 48 Ctor->getAggregateElement(1), 49 Constant::getNullValue(IRB.getInt8PtrTy())); 50 CurrentCtors.push_back(Ctor); 51 } 52 } 53 GVCtor->eraseFromParent(); 54 } else { 55 // Use the new three-field struct if there isn't one already. 56 EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), 57 IRB.getInt8PtrTy()); 58 } 59 60 // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. 61 Constant *CSVals[3]; 62 CSVals[0] = IRB.getInt32(Priority); 63 CSVals[1] = F; 64 // FIXME: Drop support for the two element form in LLVM 4.0. 65 if (EltTy->getNumElements() >= 3) 66 CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) 67 : Constant::getNullValue(IRB.getInt8PtrTy()); 68 Constant *RuntimeCtorInit = 69 ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements())); 70 71 CurrentCtors.push_back(RuntimeCtorInit); 72 73 // Create a new initializer. 74 ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); 75 Constant *NewInit = ConstantArray::get(AT, CurrentCtors); 76 77 // Create the new global variable and replace all uses of 78 // the old global variable with the new one. 79 (void)new GlobalVariable(M, NewInit->getType(), false, 80 GlobalValue::AppendingLinkage, NewInit, Array); 81 } 82 83 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { 84 appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); 85 } 86 87 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { 88 appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); 89 } 90 91 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) { 92 GlobalVariable *GV = M.getGlobalVariable(Name); 93 SmallPtrSet<Constant *, 16> InitAsSet; 94 SmallVector<Constant *, 16> Init; 95 if (GV) { 96 ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); 97 for (auto &Op : CA->operands()) { 98 Constant *C = cast_or_null<Constant>(Op); 99 if (InitAsSet.insert(C).second) 100 Init.push_back(C); 101 } 102 GV->eraseFromParent(); 103 } 104 105 Type *Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext()); 106 for (auto *V : Values) { 107 Constant *C = ConstantExpr::getBitCast(V, Int8PtrTy); 108 if (InitAsSet.insert(C).second) 109 Init.push_back(C); 110 } 111 112 if (Init.empty()) 113 return; 114 115 ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size()); 116 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 117 ConstantArray::get(ATy, Init), Name); 118 GV->setSection("llvm.metadata"); 119 } 120 121 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { 122 appendToUsedList(M, "llvm.used", Values); 123 } 124 125 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { 126 appendToUsedList(M, "llvm.compiler.used", Values); 127 } 128 129 Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) { 130 if (isa<Function>(FuncOrBitcast)) 131 return cast<Function>(FuncOrBitcast); 132 FuncOrBitcast->print(errs()); 133 errs() << '\n'; 134 std::string Err; 135 raw_string_ostream Stream(Err); 136 Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast; 137 report_fatal_error(Err); 138 } 139 140 Function *llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, 141 ArrayRef<Type *> InitArgTypes) { 142 assert(!InitName.empty() && "Expected init function name"); 143 Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction( 144 InitName, 145 FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false), 146 AttributeList())); 147 F->setLinkage(Function::ExternalLinkage); 148 return F; 149 } 150 151 std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions( 152 Module &M, StringRef CtorName, StringRef InitName, 153 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 154 StringRef VersionCheckName) { 155 assert(!InitName.empty() && "Expected init function name"); 156 assert(InitArgs.size() == InitArgTypes.size() && 157 "Sanitizer's init function expects different number of arguments"); 158 Function *InitFunction = 159 declareSanitizerInitFunction(M, InitName, InitArgTypes); 160 Function *Ctor = Function::Create( 161 FunctionType::get(Type::getVoidTy(M.getContext()), false), 162 GlobalValue::InternalLinkage, CtorName, &M); 163 BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); 164 IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB)); 165 IRB.CreateCall(InitFunction, InitArgs); 166 if (!VersionCheckName.empty()) { 167 Function *VersionCheckFunction = 168 checkSanitizerInterfaceFunction(M.getOrInsertFunction( 169 VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), 170 AttributeList())); 171 IRB.CreateCall(VersionCheckFunction, {}); 172 } 173 return std::make_pair(Ctor, InitFunction); 174 } 175 176 std::pair<Function *, Function *> 177 llvm::getOrCreateSanitizerCtorAndInitFunctions( 178 Module &M, StringRef CtorName, StringRef InitName, 179 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 180 function_ref<void(Function *, Function *)> FunctionsCreatedCallback, 181 StringRef VersionCheckName) { 182 assert(!CtorName.empty() && "Expected ctor function name"); 183 184 if (Function *Ctor = M.getFunction(CtorName)) 185 // FIXME: Sink this logic into the module, similar to the handling of 186 // globals. This will make moving to a concurrent model much easier. 187 if (Ctor->arg_size() == 0 || 188 Ctor->getReturnType() == Type::getVoidTy(M.getContext())) 189 return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)}; 190 191 Function *Ctor, *InitFunction; 192 std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions( 193 M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName); 194 FunctionsCreatedCallback(Ctor, InitFunction); 195 return std::make_pair(Ctor, InitFunction); 196 } 197 198 Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) { 199 assert(!Name.empty() && "Expected init function name"); 200 if (Function *F = M.getFunction(Name)) { 201 if (F->arg_size() != 0 || 202 F->getReturnType() != Type::getVoidTy(M.getContext())) { 203 std::string Err; 204 raw_string_ostream Stream(Err); 205 Stream << "Sanitizer interface function defined with wrong type: " << *F; 206 report_fatal_error(Err); 207 } 208 return F; 209 } 210 Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction( 211 Name, AttributeList(), Type::getVoidTy(M.getContext()))); 212 F->setLinkage(Function::ExternalLinkage); 213 214 appendToGlobalCtors(M, F, 0); 215 216 return F; 217 } 218 219 void llvm::filterDeadComdatFunctions( 220 Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) { 221 // Build a map from the comdat to the number of entries in that comdat we 222 // think are dead. If this fully covers the comdat group, then the entire 223 // group is dead. If we find another entry in the comdat group though, we'll 224 // have to preserve the whole group. 225 SmallDenseMap<Comdat *, int, 16> ComdatEntriesCovered; 226 for (Function *F : DeadComdatFunctions) { 227 Comdat *C = F->getComdat(); 228 assert(C && "Expected all input GVs to be in a comdat!"); 229 ComdatEntriesCovered[C] += 1; 230 } 231 232 auto CheckComdat = [&](Comdat &C) { 233 auto CI = ComdatEntriesCovered.find(&C); 234 if (CI == ComdatEntriesCovered.end()) 235 return; 236 237 // If this could have been covered by a dead entry, just subtract one to 238 // account for it. 239 if (CI->second > 0) { 240 CI->second -= 1; 241 return; 242 } 243 244 // If we've already accounted for all the entries that were dead, the 245 // entire comdat is alive so remove it from the map. 246 ComdatEntriesCovered.erase(CI); 247 }; 248 249 auto CheckAllComdats = [&] { 250 for (Function &F : M.functions()) 251 if (Comdat *C = F.getComdat()) { 252 CheckComdat(*C); 253 if (ComdatEntriesCovered.empty()) 254 return; 255 } 256 for (GlobalVariable &GV : M.globals()) 257 if (Comdat *C = GV.getComdat()) { 258 CheckComdat(*C); 259 if (ComdatEntriesCovered.empty()) 260 return; 261 } 262 for (GlobalAlias &GA : M.aliases()) 263 if (Comdat *C = GA.getComdat()) { 264 CheckComdat(*C); 265 if (ComdatEntriesCovered.empty()) 266 return; 267 } 268 }; 269 CheckAllComdats(); 270 271 if (ComdatEntriesCovered.empty()) { 272 DeadComdatFunctions.clear(); 273 return; 274 } 275 276 // Remove the entries that were not covering. 277 erase_if(DeadComdatFunctions, [&](GlobalValue *GV) { 278 return ComdatEntriesCovered.find(GV->getComdat()) == 279 ComdatEntriesCovered.end(); 280 }); 281 } 282 283 std::string llvm::getUniqueModuleId(Module *M) { 284 MD5 Md5; 285 bool ExportsSymbols = false; 286 auto AddGlobal = [&](GlobalValue &GV) { 287 if (GV.isDeclaration() || GV.getName().startswith("llvm.") || 288 !GV.hasExternalLinkage() || GV.hasComdat()) 289 return; 290 ExportsSymbols = true; 291 Md5.update(GV.getName()); 292 Md5.update(ArrayRef<uint8_t>{0}); 293 }; 294 295 for (auto &F : *M) 296 AddGlobal(F); 297 for (auto &GV : M->globals()) 298 AddGlobal(GV); 299 for (auto &GA : M->aliases()) 300 AddGlobal(GA); 301 for (auto &IF : M->ifuncs()) 302 AddGlobal(IF); 303 304 if (!ExportsSymbols) 305 return ""; 306 307 MD5::MD5Result R; 308 Md5.final(R); 309 310 SmallString<32> Str; 311 MD5::stringifyResult(R, Str); 312 return ("$" + Str).str(); 313 } 314