1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This family of functions perform manipulations on Modules.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Utils/ModuleUtils.h"
15 #include "llvm/IR/DerivedTypes.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 using namespace llvm;
22 
23 static void appendToGlobalArray(const char *Array, Module &M, Function *F,
24                                 int Priority, Constant *Data) {
25   IRBuilder<> IRB(M.getContext());
26   FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
27 
28   // Get the current set of static global constructors and add the new ctor
29   // to the list.
30   SmallVector<Constant *, 16> CurrentCtors;
31   StructType *EltTy;
32   if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) {
33     ArrayType *ATy = cast<ArrayType>(GVCtor->getValueType());
34     StructType *OldEltTy = cast<StructType>(ATy->getElementType());
35     // Upgrade a 2-field global array type to the new 3-field format if needed.
36     if (Data && OldEltTy->getNumElements() < 3)
37       EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
38                               IRB.getInt8PtrTy());
39     else
40       EltTy = OldEltTy;
41     if (Constant *Init = GVCtor->getInitializer()) {
42       unsigned n = Init->getNumOperands();
43       CurrentCtors.reserve(n + 1);
44       for (unsigned i = 0; i != n; ++i) {
45         auto Ctor = cast<Constant>(Init->getOperand(i));
46         if (EltTy != OldEltTy)
47           Ctor =
48               ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0),
49                                   Ctor->getAggregateElement(1),
50                                   Constant::getNullValue(IRB.getInt8PtrTy()));
51         CurrentCtors.push_back(Ctor);
52       }
53     }
54     GVCtor->eraseFromParent();
55   } else {
56     // Use the new three-field struct if there isn't one already.
57     EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
58                             IRB.getInt8PtrTy());
59   }
60 
61   // Build a 2 or 3 field global_ctor entry.  We don't take a comdat key.
62   Constant *CSVals[3];
63   CSVals[0] = IRB.getInt32(Priority);
64   CSVals[1] = F;
65   // FIXME: Drop support for the two element form in LLVM 4.0.
66   if (EltTy->getNumElements() >= 3)
67     CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
68                      : Constant::getNullValue(IRB.getInt8PtrTy());
69   Constant *RuntimeCtorInit =
70       ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements()));
71 
72   CurrentCtors.push_back(RuntimeCtorInit);
73 
74   // Create a new initializer.
75   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
76   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
77 
78   // Create the new global variable and replace all uses of
79   // the old global variable with the new one.
80   (void)new GlobalVariable(M, NewInit->getType(), false,
81                            GlobalValue::AppendingLinkage, NewInit, Array);
82 }
83 
84 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
85   appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
86 }
87 
88 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
89   appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
90 }
91 
92 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
93   GlobalVariable *GV = M.getGlobalVariable(Name);
94   SmallPtrSet<Constant *, 16> InitAsSet;
95   SmallVector<Constant *, 16> Init;
96   if (GV) {
97     ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
98     for (auto &Op : CA->operands()) {
99       Constant *C = cast_or_null<Constant>(Op);
100       if (InitAsSet.insert(C).second)
101         Init.push_back(C);
102     }
103     GV->eraseFromParent();
104   }
105 
106   Type *Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext());
107   for (auto *V : Values) {
108     Constant *C = ConstantExpr::getBitCast(V, Int8PtrTy);
109     if (InitAsSet.insert(C).second)
110       Init.push_back(C);
111   }
112 
113   if (Init.empty())
114     return;
115 
116   ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size());
117   GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
118                                 ConstantArray::get(ATy, Init), Name);
119   GV->setSection("llvm.metadata");
120 }
121 
122 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
123   appendToUsedList(M, "llvm.used", Values);
124 }
125 
126 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
127   appendToUsedList(M, "llvm.compiler.used", Values);
128 }
129 
130 Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
131   if (isa<Function>(FuncOrBitcast))
132     return cast<Function>(FuncOrBitcast);
133   FuncOrBitcast->print(errs());
134   errs() << '\n';
135   std::string Err;
136   raw_string_ostream Stream(Err);
137   Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
138   report_fatal_error(Err);
139 }
140 
141 Function *llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
142                                              ArrayRef<Type *> InitArgTypes) {
143   assert(!InitName.empty() && "Expected init function name");
144   Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
145       InitName,
146       FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false),
147       AttributeList()));
148   F->setLinkage(Function::ExternalLinkage);
149   return F;
150 }
151 
152 std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
153     Module &M, StringRef CtorName, StringRef InitName,
154     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
155     StringRef VersionCheckName) {
156   assert(!InitName.empty() && "Expected init function name");
157   assert(InitArgs.size() == InitArgTypes.size() &&
158          "Sanitizer's init function expects different number of arguments");
159   Function *InitFunction =
160       declareSanitizerInitFunction(M, InitName, InitArgTypes);
161   Function *Ctor = Function::Create(
162       FunctionType::get(Type::getVoidTy(M.getContext()), false),
163       GlobalValue::InternalLinkage, CtorName, &M);
164   BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
165   IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
166   IRB.CreateCall(InitFunction, InitArgs);
167   if (!VersionCheckName.empty()) {
168     Function *VersionCheckFunction =
169         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
170             VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
171             AttributeList()));
172     IRB.CreateCall(VersionCheckFunction, {});
173   }
174   return std::make_pair(Ctor, InitFunction);
175 }
176 
177 std::pair<Function *, Function *>
178 llvm::getOrCreateSanitizerCtorAndInitFunctions(
179     Module &M, StringRef CtorName, StringRef InitName,
180     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
181     function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
182     StringRef VersionCheckName) {
183   assert(!CtorName.empty() && "Expected ctor function name");
184 
185   if (Function *Ctor = M.getFunction(CtorName))
186     // FIXME: Sink this logic into the module, similar to the handling of
187     // globals. This will make moving to a concurrent model much easier.
188     if (Ctor->arg_size() == 0 ||
189         Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
190       return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)};
191 
192   Function *Ctor, *InitFunction;
193   std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
194       M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName);
195   FunctionsCreatedCallback(Ctor, InitFunction);
196   return std::make_pair(Ctor, InitFunction);
197 }
198 
199 Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) {
200   assert(!Name.empty() && "Expected init function name");
201   if (Function *F = M.getFunction(Name)) {
202     if (F->arg_size() != 0 ||
203         F->getReturnType() != Type::getVoidTy(M.getContext())) {
204       std::string Err;
205       raw_string_ostream Stream(Err);
206       Stream << "Sanitizer interface function defined with wrong type: " << *F;
207       report_fatal_error(Err);
208     }
209     return F;
210   }
211   Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
212       Name, AttributeList(), Type::getVoidTy(M.getContext())));
213   F->setLinkage(Function::ExternalLinkage);
214 
215   appendToGlobalCtors(M, F, 0);
216 
217   return F;
218 }
219 
220 void llvm::filterDeadComdatFunctions(
221     Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) {
222   // Build a map from the comdat to the number of entries in that comdat we
223   // think are dead. If this fully covers the comdat group, then the entire
224   // group is dead. If we find another entry in the comdat group though, we'll
225   // have to preserve the whole group.
226   SmallDenseMap<Comdat *, int, 16> ComdatEntriesCovered;
227   for (Function *F : DeadComdatFunctions) {
228     Comdat *C = F->getComdat();
229     assert(C && "Expected all input GVs to be in a comdat!");
230     ComdatEntriesCovered[C] += 1;
231   }
232 
233   auto CheckComdat = [&](Comdat &C) {
234     auto CI = ComdatEntriesCovered.find(&C);
235     if (CI == ComdatEntriesCovered.end())
236       return;
237 
238     // If this could have been covered by a dead entry, just subtract one to
239     // account for it.
240     if (CI->second > 0) {
241       CI->second -= 1;
242       return;
243     }
244 
245     // If we've already accounted for all the entries that were dead, the
246     // entire comdat is alive so remove it from the map.
247     ComdatEntriesCovered.erase(CI);
248   };
249 
250   auto CheckAllComdats = [&] {
251     for (Function &F : M.functions())
252       if (Comdat *C = F.getComdat()) {
253         CheckComdat(*C);
254         if (ComdatEntriesCovered.empty())
255           return;
256       }
257     for (GlobalVariable &GV : M.globals())
258       if (Comdat *C = GV.getComdat()) {
259         CheckComdat(*C);
260         if (ComdatEntriesCovered.empty())
261           return;
262       }
263     for (GlobalAlias &GA : M.aliases())
264       if (Comdat *C = GA.getComdat()) {
265         CheckComdat(*C);
266         if (ComdatEntriesCovered.empty())
267           return;
268       }
269   };
270   CheckAllComdats();
271 
272   if (ComdatEntriesCovered.empty()) {
273     DeadComdatFunctions.clear();
274     return;
275   }
276 
277   // Remove the entries that were not covering.
278   erase_if(DeadComdatFunctions, [&](GlobalValue *GV) {
279     return ComdatEntriesCovered.find(GV->getComdat()) ==
280            ComdatEntriesCovered.end();
281   });
282 }
283 
284 std::string llvm::getUniqueModuleId(Module *M) {
285   MD5 Md5;
286   bool ExportsSymbols = false;
287   auto AddGlobal = [&](GlobalValue &GV) {
288     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
289         !GV.hasExternalLinkage() || GV.hasComdat())
290       return;
291     ExportsSymbols = true;
292     Md5.update(GV.getName());
293     Md5.update(ArrayRef<uint8_t>{0});
294   };
295 
296   for (auto &F : *M)
297     AddGlobal(F);
298   for (auto &GV : M->globals())
299     AddGlobal(GV);
300   for (auto &GA : M->aliases())
301     AddGlobal(GA);
302   for (auto &IF : M->ifuncs())
303     AddGlobal(IF);
304 
305   if (!ExportsSymbols)
306     return "";
307 
308   MD5::MD5Result R;
309   Md5.final(R);
310 
311   SmallString<32> Str;
312   MD5::stringifyResult(R, Str);
313   return ("$" + Str).str();
314 }
315