1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions perform manipulations on Modules.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/IR/DerivedTypes.h"
15 #include "llvm/IR/Function.h"
16 #include "llvm/IR/IRBuilder.h"
17 #include "llvm/IR/Module.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 using namespace llvm;
21 
22 static void appendToGlobalArray(const char *Array, Module &M, Function *F,
23                                 int Priority, Constant *Data) {
24   IRBuilder<> IRB(M.getContext());
25   FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
26 
27   // Get the current set of static global constructors and add the new ctor
28   // to the list.
29   SmallVector<Constant *, 16> CurrentCtors;
30   StructType *EltTy;
31   if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) {
32     ArrayType *ATy = cast<ArrayType>(GVCtor->getValueType());
33     StructType *OldEltTy = cast<StructType>(ATy->getElementType());
34     // Upgrade a 2-field global array type to the new 3-field format if needed.
35     if (Data && OldEltTy->getNumElements() < 3)
36       EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
37                               IRB.getInt8PtrTy());
38     else
39       EltTy = OldEltTy;
40     if (Constant *Init = GVCtor->getInitializer()) {
41       unsigned n = Init->getNumOperands();
42       CurrentCtors.reserve(n + 1);
43       for (unsigned i = 0; i != n; ++i) {
44         auto Ctor = cast<Constant>(Init->getOperand(i));
45         if (EltTy != OldEltTy)
46           Ctor =
47               ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0),
48                                   Ctor->getAggregateElement(1),
49                                   Constant::getNullValue(IRB.getInt8PtrTy()));
50         CurrentCtors.push_back(Ctor);
51       }
52     }
53     GVCtor->eraseFromParent();
54   } else {
55     // Use the new three-field struct if there isn't one already.
56     EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
57                             IRB.getInt8PtrTy());
58   }
59 
60   // Build a 2 or 3 field global_ctor entry.  We don't take a comdat key.
61   Constant *CSVals[3];
62   CSVals[0] = IRB.getInt32(Priority);
63   CSVals[1] = F;
64   // FIXME: Drop support for the two element form in LLVM 4.0.
65   if (EltTy->getNumElements() >= 3)
66     CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
67                      : Constant::getNullValue(IRB.getInt8PtrTy());
68   Constant *RuntimeCtorInit =
69       ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements()));
70 
71   CurrentCtors.push_back(RuntimeCtorInit);
72 
73   // Create a new initializer.
74   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
75   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
76 
77   // Create the new global variable and replace all uses of
78   // the old global variable with the new one.
79   (void)new GlobalVariable(M, NewInit->getType(), false,
80                            GlobalValue::AppendingLinkage, NewInit, Array);
81 }
82 
83 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
84   appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
85 }
86 
87 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
88   appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
89 }
90 
91 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
92   GlobalVariable *GV = M.getGlobalVariable(Name);
93   SmallPtrSet<Constant *, 16> InitAsSet;
94   SmallVector<Constant *, 16> Init;
95   if (GV) {
96     ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
97     for (auto &Op : CA->operands()) {
98       Constant *C = cast_or_null<Constant>(Op);
99       if (InitAsSet.insert(C).second)
100         Init.push_back(C);
101     }
102     GV->eraseFromParent();
103   }
104 
105   Type *Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext());
106   for (auto *V : Values) {
107     Constant *C = ConstantExpr::getBitCast(V, Int8PtrTy);
108     if (InitAsSet.insert(C).second)
109       Init.push_back(C);
110   }
111 
112   if (Init.empty())
113     return;
114 
115   ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size());
116   GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
117                                 ConstantArray::get(ATy, Init), Name);
118   GV->setSection("llvm.metadata");
119 }
120 
121 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
122   appendToUsedList(M, "llvm.used", Values);
123 }
124 
125 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
126   appendToUsedList(M, "llvm.compiler.used", Values);
127 }
128 
129 Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
130   if (isa<Function>(FuncOrBitcast))
131     return cast<Function>(FuncOrBitcast);
132   FuncOrBitcast->print(errs());
133   errs() << '\n';
134   std::string Err;
135   raw_string_ostream Stream(Err);
136   Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
137   report_fatal_error(Err);
138 }
139 
140 Function *llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
141                                              ArrayRef<Type *> InitArgTypes) {
142   assert(!InitName.empty() && "Expected init function name");
143   Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
144       InitName,
145       FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false),
146       AttributeList()));
147   F->setLinkage(Function::ExternalLinkage);
148   return F;
149 }
150 
151 std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
152     Module &M, StringRef CtorName, StringRef InitName,
153     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
154     StringRef VersionCheckName) {
155   assert(!InitName.empty() && "Expected init function name");
156   assert(InitArgs.size() == InitArgTypes.size() &&
157          "Sanitizer's init function expects different number of arguments");
158   Function *InitFunction =
159       declareSanitizerInitFunction(M, InitName, InitArgTypes);
160   Function *Ctor = Function::Create(
161       FunctionType::get(Type::getVoidTy(M.getContext()), false),
162       GlobalValue::InternalLinkage, CtorName, &M);
163   BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
164   IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
165   IRB.CreateCall(InitFunction, InitArgs);
166   if (!VersionCheckName.empty()) {
167     Function *VersionCheckFunction =
168         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
169             VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
170             AttributeList()));
171     IRB.CreateCall(VersionCheckFunction, {});
172   }
173   return std::make_pair(Ctor, InitFunction);
174 }
175 
176 std::pair<Function *, Function *>
177 llvm::getOrCreateSanitizerCtorAndInitFunctions(
178     Module &M, StringRef CtorName, StringRef InitName,
179     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
180     function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
181     StringRef VersionCheckName) {
182   assert(!CtorName.empty() && "Expected ctor function name");
183 
184   if (Function *Ctor = M.getFunction(CtorName))
185     // FIXME: Sink this logic into the module, similar to the handling of
186     // globals. This will make moving to a concurrent model much easier.
187     if (Ctor->arg_size() == 0 ||
188         Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
189       return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)};
190 
191   Function *Ctor, *InitFunction;
192   std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
193       M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName);
194   FunctionsCreatedCallback(Ctor, InitFunction);
195   return std::make_pair(Ctor, InitFunction);
196 }
197 
198 Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) {
199   assert(!Name.empty() && "Expected init function name");
200   if (Function *F = M.getFunction(Name)) {
201     if (F->arg_size() != 0 ||
202         F->getReturnType() != Type::getVoidTy(M.getContext())) {
203       std::string Err;
204       raw_string_ostream Stream(Err);
205       Stream << "Sanitizer interface function defined with wrong type: " << *F;
206       report_fatal_error(Err);
207     }
208     return F;
209   }
210   Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
211       Name, AttributeList(), Type::getVoidTy(M.getContext())));
212   F->setLinkage(Function::ExternalLinkage);
213 
214   appendToGlobalCtors(M, F, 0);
215 
216   return F;
217 }
218 
219 void llvm::filterDeadComdatFunctions(
220     Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) {
221   // Build a map from the comdat to the number of entries in that comdat we
222   // think are dead. If this fully covers the comdat group, then the entire
223   // group is dead. If we find another entry in the comdat group though, we'll
224   // have to preserve the whole group.
225   SmallDenseMap<Comdat *, int, 16> ComdatEntriesCovered;
226   for (Function *F : DeadComdatFunctions) {
227     Comdat *C = F->getComdat();
228     assert(C && "Expected all input GVs to be in a comdat!");
229     ComdatEntriesCovered[C] += 1;
230   }
231 
232   auto CheckComdat = [&](Comdat &C) {
233     auto CI = ComdatEntriesCovered.find(&C);
234     if (CI == ComdatEntriesCovered.end())
235       return;
236 
237     // If this could have been covered by a dead entry, just subtract one to
238     // account for it.
239     if (CI->second > 0) {
240       CI->second -= 1;
241       return;
242     }
243 
244     // If we've already accounted for all the entries that were dead, the
245     // entire comdat is alive so remove it from the map.
246     ComdatEntriesCovered.erase(CI);
247   };
248 
249   auto CheckAllComdats = [&] {
250     for (Function &F : M.functions())
251       if (Comdat *C = F.getComdat()) {
252         CheckComdat(*C);
253         if (ComdatEntriesCovered.empty())
254           return;
255       }
256     for (GlobalVariable &GV : M.globals())
257       if (Comdat *C = GV.getComdat()) {
258         CheckComdat(*C);
259         if (ComdatEntriesCovered.empty())
260           return;
261       }
262     for (GlobalAlias &GA : M.aliases())
263       if (Comdat *C = GA.getComdat()) {
264         CheckComdat(*C);
265         if (ComdatEntriesCovered.empty())
266           return;
267       }
268   };
269   CheckAllComdats();
270 
271   if (ComdatEntriesCovered.empty()) {
272     DeadComdatFunctions.clear();
273     return;
274   }
275 
276   // Remove the entries that were not covering.
277   erase_if(DeadComdatFunctions, [&](GlobalValue *GV) {
278     return ComdatEntriesCovered.find(GV->getComdat()) ==
279            ComdatEntriesCovered.end();
280   });
281 }
282 
283 std::string llvm::getUniqueModuleId(Module *M) {
284   MD5 Md5;
285   bool ExportsSymbols = false;
286   auto AddGlobal = [&](GlobalValue &GV) {
287     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
288         !GV.hasExternalLinkage() || GV.hasComdat())
289       return;
290     ExportsSymbols = true;
291     Md5.update(GV.getName());
292     Md5.update(ArrayRef<uint8_t>{0});
293   };
294 
295   for (auto &F : *M)
296     AddGlobal(F);
297   for (auto &GV : M->globals())
298     AddGlobal(GV);
299   for (auto &GA : M->aliases())
300     AddGlobal(GA);
301   for (auto &IF : M->ifuncs())
302     AddGlobal(IF);
303 
304   if (!ExportsSymbols)
305     return "";
306 
307   MD5::MD5Result R;
308   Md5.final(R);
309 
310   SmallString<32> Str;
311   MD5::stringifyResult(R, Str);
312   return ("$" + Str).str();
313 }
314