1 //===-- EfficiencySanitizer.cpp - performance tuner -----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is a part of EfficiencySanitizer, a family of performance tuners
11 // that detects multiple performance issues via separate sub-tools.
12 //
13 // The instrumentation phase is straightforward:
14 //   - Take action on every memory access: either inlined instrumentation,
15 //     or Inserted calls to our run-time library.
16 //   - Optimizations may apply to avoid instrumenting some of the accesses.
17 //   - Turn mem{set,cpy,move} instrinsics into library calls.
18 // The rest is handled by the run-time library.
19 //===----------------------------------------------------------------------===//
20 
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/Transforms/Utils/Local.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Transforms/Instrumentation.h"
36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37 #include "llvm/Transforms/Utils/ModuleUtils.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "esan"
42 
43 // The tool type must be just one of these ClTool* options, as the tools
44 // cannot be combined due to shadow memory constraints.
45 static cl::opt<bool>
46     ClToolCacheFrag("esan-cache-frag", cl::init(false),
47                     cl::desc("Detect data cache fragmentation"), cl::Hidden);
48 static cl::opt<bool>
49     ClToolWorkingSet("esan-working-set", cl::init(false),
50                     cl::desc("Measure the working set size"), cl::Hidden);
51 // Each new tool will get its own opt flag here.
52 // These are converted to EfficiencySanitizerOptions for use
53 // in the code.
54 
55 static cl::opt<bool> ClInstrumentLoadsAndStores(
56     "esan-instrument-loads-and-stores", cl::init(true),
57     cl::desc("Instrument loads and stores"), cl::Hidden);
58 static cl::opt<bool> ClInstrumentMemIntrinsics(
59     "esan-instrument-memintrinsics", cl::init(true),
60     cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
61 static cl::opt<bool> ClInstrumentFastpath(
62     "esan-instrument-fastpath", cl::init(true),
63     cl::desc("Instrument fastpath"), cl::Hidden);
64 static cl::opt<bool> ClAuxFieldInfo(
65     "esan-aux-field-info", cl::init(true),
66     cl::desc("Generate binary with auxiliary struct field information"),
67     cl::Hidden);
68 
69 // Experiments show that the performance difference can be 2x or more,
70 // and accuracy loss is typically negligible, so we turn this on by default.
71 static cl::opt<bool> ClAssumeIntraCacheLine(
72     "esan-assume-intra-cache-line", cl::init(true),
73     cl::desc("Assume each memory access touches just one cache line, for "
74              "better performance but with a potential loss of accuracy."),
75     cl::Hidden);
76 
77 STATISTIC(NumInstrumentedLoads, "Number of instrumented loads");
78 STATISTIC(NumInstrumentedStores, "Number of instrumented stores");
79 STATISTIC(NumFastpaths, "Number of instrumented fastpaths");
80 STATISTIC(NumAccessesWithIrregularSize,
81           "Number of accesses with a size outside our targeted callout sizes");
82 STATISTIC(NumIgnoredStructs, "Number of ignored structs");
83 STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions");
84 STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions");
85 STATISTIC(NumAssumedIntraCacheLine,
86           "Number of accesses assumed to be intra-cache-line");
87 
88 static const uint64_t EsanCtorAndDtorPriority = 0;
89 static const char *const EsanModuleCtorName = "esan.module_ctor";
90 static const char *const EsanModuleDtorName = "esan.module_dtor";
91 static const char *const EsanInitName = "__esan_init";
92 static const char *const EsanExitName = "__esan_exit";
93 
94 // We need to specify the tool to the runtime earlier than
95 // the ctor is called in some cases, so we set a global variable.
96 static const char *const EsanWhichToolName = "__esan_which_tool";
97 
98 // We must keep these Shadow* constants consistent with the esan runtime.
99 // FIXME: Try to place these shadow constants, the names of the __esan_*
100 // interface functions, and the ToolType enum into a header shared between
101 // llvm and compiler-rt.
102 struct ShadowMemoryParams {
103   uint64_t ShadowMask;
104   uint64_t ShadowOffs[3];
105 };
106 
107 static const ShadowMemoryParams ShadowParams47 = {
108     0x00000fffffffffffull,
109     {
110         0x0000130000000000ull, 0x0000220000000000ull, 0x0000440000000000ull,
111     }};
112 
113 static const ShadowMemoryParams ShadowParams40 = {
114     0x0fffffffffull,
115     {
116         0x1300000000ull, 0x2200000000ull, 0x4400000000ull,
117     }};
118 
119 // This array is indexed by the ToolType enum.
120 static const int ShadowScale[] = {
121   0, // ESAN_None.
122   2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
123   6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
124 };
125 
126 // MaxStructCounterNameSize is a soft size limit to avoid insanely long
127 // names for those extremely large structs.
128 static const unsigned MaxStructCounterNameSize = 512;
129 
130 namespace {
131 
132 static EfficiencySanitizerOptions
OverrideOptionsFromCL(EfficiencySanitizerOptions Options)133 OverrideOptionsFromCL(EfficiencySanitizerOptions Options) {
134   if (ClToolCacheFrag)
135     Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
136   else if (ClToolWorkingSet)
137     Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet;
138 
139   // Direct opt invocation with no params will have the default ESAN_None.
140   // We run the default tool in that case.
141   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_None)
142     Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
143 
144   return Options;
145 }
146 
147 /// EfficiencySanitizer: instrument each module to find performance issues.
148 class EfficiencySanitizer : public ModulePass {
149 public:
EfficiencySanitizer(const EfficiencySanitizerOptions & Opts=EfficiencySanitizerOptions ())150   EfficiencySanitizer(
151       const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions())
152       : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {}
153   StringRef getPassName() const override;
154   void getAnalysisUsage(AnalysisUsage &AU) const override;
155   bool runOnModule(Module &M) override;
156   static char ID;
157 
158 private:
159   bool initOnModule(Module &M);
160   void initializeCallbacks(Module &M);
161   bool shouldIgnoreStructType(StructType *StructTy);
162   void createStructCounterName(
163       StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr);
164   void createCacheFragAuxGV(
165     Module &M, const DataLayout &DL, StructType *StructTy,
166     GlobalVariable *&TypeNames, GlobalVariable *&Offsets, GlobalVariable *&Size);
167   GlobalVariable *createCacheFragInfoGV(Module &M, const DataLayout &DL,
168                                         Constant *UnitName);
169   Constant *createEsanInitToolInfoArg(Module &M, const DataLayout &DL);
170   void createDestructor(Module &M, Constant *ToolInfoArg);
171   bool runOnFunction(Function &F, Module &M);
172   bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
173   bool instrumentMemIntrinsic(MemIntrinsic *MI);
174   bool instrumentGetElementPtr(Instruction *I, Module &M);
175   bool insertCounterUpdate(Instruction *I, StructType *StructTy,
176                            unsigned CounterIdx);
getFieldCounterIdx(StructType * StructTy)177   unsigned getFieldCounterIdx(StructType *StructTy) {
178     return 0;
179   }
getArrayCounterIdx(StructType * StructTy)180   unsigned getArrayCounterIdx(StructType *StructTy) {
181     return StructTy->getNumElements();
182   }
getStructCounterSize(StructType * StructTy)183   unsigned getStructCounterSize(StructType *StructTy) {
184     // The struct counter array includes:
185     // - one counter for each struct field,
186     // - one counter for the struct access within an array.
187     return (StructTy->getNumElements()/*field*/ + 1/*array*/);
188   }
189   bool shouldIgnoreMemoryAccess(Instruction *I);
190   int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
191   Value *appToShadow(Value *Shadow, IRBuilder<> &IRB);
192   bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore,
193                           Value *Addr, unsigned Alignment);
194   // Each tool has its own fastpath routine:
195   bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL,
196                                    Value *Addr, unsigned Alignment);
197   bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL,
198                                     Value *Addr, unsigned Alignment);
199 
200   EfficiencySanitizerOptions Options;
201   LLVMContext *Ctx;
202   Type *IntptrTy;
203   // Our slowpath involves callouts to the runtime library.
204   // Access sizes are powers of two: 1, 2, 4, 8, 16.
205   static const size_t NumberOfAccessSizes = 5;
206   Function *EsanAlignedLoad[NumberOfAccessSizes];
207   Function *EsanAlignedStore[NumberOfAccessSizes];
208   Function *EsanUnalignedLoad[NumberOfAccessSizes];
209   Function *EsanUnalignedStore[NumberOfAccessSizes];
210   // For irregular sizes of any alignment:
211   Function *EsanUnalignedLoadN, *EsanUnalignedStoreN;
212   Function *MemmoveFn, *MemcpyFn, *MemsetFn;
213   Function *EsanCtorFunction;
214   Function *EsanDtorFunction;
215   // Remember the counter variable for each struct type to avoid
216   // recomputing the variable name later during instrumentation.
217   std::map<Type *, GlobalVariable *> StructTyMap;
218   ShadowMemoryParams ShadowParams;
219 };
220 } // namespace
221 
222 char EfficiencySanitizer::ID = 0;
223 INITIALIZE_PASS_BEGIN(
224     EfficiencySanitizer, "esan",
225     "EfficiencySanitizer: finds performance issues.", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)226 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
227 INITIALIZE_PASS_END(
228     EfficiencySanitizer, "esan",
229     "EfficiencySanitizer: finds performance issues.", false, false)
230 
231 StringRef EfficiencySanitizer::getPassName() const {
232   return "EfficiencySanitizer";
233 }
234 
getAnalysisUsage(AnalysisUsage & AU) const235 void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
236   AU.addRequired<TargetLibraryInfoWrapperPass>();
237 }
238 
239 ModulePass *
createEfficiencySanitizerPass(const EfficiencySanitizerOptions & Options)240 llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) {
241   return new EfficiencySanitizer(Options);
242 }
243 
initializeCallbacks(Module & M)244 void EfficiencySanitizer::initializeCallbacks(Module &M) {
245   IRBuilder<> IRB(M.getContext());
246   // Initialize the callbacks.
247   for (size_t Idx = 0; Idx < NumberOfAccessSizes; ++Idx) {
248     const unsigned ByteSize = 1U << Idx;
249     std::string ByteSizeStr = utostr(ByteSize);
250     // We'll inline the most common (i.e., aligned and frequent sizes)
251     // load + store instrumentation: these callouts are for the slowpath.
252     SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr);
253     EsanAlignedLoad[Idx] =
254         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
255             AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
256     SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr);
257     EsanAlignedStore[Idx] =
258         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
259             AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
260     SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr);
261     EsanUnalignedLoad[Idx] =
262         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
263             UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
264     SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr);
265     EsanUnalignedStore[Idx] =
266         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
267             UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
268   }
269   EsanUnalignedLoadN = checkSanitizerInterfaceFunction(
270       M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(),
271                             IRB.getInt8PtrTy(), IntptrTy));
272   EsanUnalignedStoreN = checkSanitizerInterfaceFunction(
273       M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(),
274                             IRB.getInt8PtrTy(), IntptrTy));
275   MemmoveFn = checkSanitizerInterfaceFunction(
276       M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
277                             IRB.getInt8PtrTy(), IntptrTy));
278   MemcpyFn = checkSanitizerInterfaceFunction(
279       M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
280                             IRB.getInt8PtrTy(), IntptrTy));
281   MemsetFn = checkSanitizerInterfaceFunction(
282       M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
283                             IRB.getInt32Ty(), IntptrTy));
284 }
285 
shouldIgnoreStructType(StructType * StructTy)286 bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) {
287   if (StructTy == nullptr || StructTy->isOpaque() /* no struct body */)
288     return true;
289   return false;
290 }
291 
createStructCounterName(StructType * StructTy,SmallString<MaxStructCounterNameSize> & NameStr)292 void EfficiencySanitizer::createStructCounterName(
293     StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr) {
294   // Append NumFields and field type ids to avoid struct conflicts
295   // with the same name but different fields.
296   if (StructTy->hasName())
297     NameStr += StructTy->getName();
298   else
299     NameStr += "struct.anon";
300   // We allow the actual size of the StructCounterName to be larger than
301   // MaxStructCounterNameSize and append $NumFields and at least one
302   // field type id.
303   // Append $NumFields.
304   NameStr += "$";
305   Twine(StructTy->getNumElements()).toVector(NameStr);
306   // Append struct field type ids in the reverse order.
307   for (int i = StructTy->getNumElements() - 1; i >= 0; --i) {
308     NameStr += "$";
309     Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr);
310     if (NameStr.size() >= MaxStructCounterNameSize)
311       break;
312   }
313   if (StructTy->isLiteral()) {
314     // End with $ for literal struct.
315     NameStr += "$";
316   }
317 }
318 
319 // Create global variables with auxiliary information (e.g., struct field size,
320 // offset, and type name) for better user report.
createCacheFragAuxGV(Module & M,const DataLayout & DL,StructType * StructTy,GlobalVariable * & TypeName,GlobalVariable * & Offset,GlobalVariable * & Size)321 void EfficiencySanitizer::createCacheFragAuxGV(
322     Module &M, const DataLayout &DL, StructType *StructTy,
323     GlobalVariable *&TypeName, GlobalVariable *&Offset,
324     GlobalVariable *&Size) {
325   auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
326   auto *Int32Ty = Type::getInt32Ty(*Ctx);
327   // FieldTypeName.
328   auto *TypeNameArrayTy = ArrayType::get(Int8PtrTy, StructTy->getNumElements());
329   TypeName = new GlobalVariable(M, TypeNameArrayTy, true,
330                                  GlobalVariable::InternalLinkage, nullptr);
331   SmallVector<Constant *, 16> TypeNameVec;
332   // FieldOffset.
333   auto *OffsetArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
334   Offset = new GlobalVariable(M, OffsetArrayTy, true,
335                               GlobalVariable::InternalLinkage, nullptr);
336   SmallVector<Constant *, 16> OffsetVec;
337   // FieldSize
338   auto *SizeArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
339   Size = new GlobalVariable(M, SizeArrayTy, true,
340                             GlobalVariable::InternalLinkage, nullptr);
341   SmallVector<Constant *, 16> SizeVec;
342   for (unsigned i = 0; i < StructTy->getNumElements(); ++i) {
343     Type *Ty = StructTy->getElementType(i);
344     std::string Str;
345     raw_string_ostream StrOS(Str);
346     Ty->print(StrOS);
347     TypeNameVec.push_back(
348         ConstantExpr::getPointerCast(
349             createPrivateGlobalForString(M, StrOS.str(), true),
350             Int8PtrTy));
351     OffsetVec.push_back(
352         ConstantInt::get(Int32Ty,
353                          DL.getStructLayout(StructTy)->getElementOffset(i)));
354     SizeVec.push_back(ConstantInt::get(Int32Ty,
355                                        DL.getTypeAllocSize(Ty)));
356     }
357   TypeName->setInitializer(ConstantArray::get(TypeNameArrayTy, TypeNameVec));
358   Offset->setInitializer(ConstantArray::get(OffsetArrayTy, OffsetVec));
359   Size->setInitializer(ConstantArray::get(SizeArrayTy, SizeVec));
360 }
361 
362 // Create the global variable for the cache-fragmentation tool.
createCacheFragInfoGV(Module & M,const DataLayout & DL,Constant * UnitName)363 GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV(
364     Module &M, const DataLayout &DL, Constant *UnitName) {
365   assert(Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag);
366 
367   auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
368   auto *Int8PtrPtrTy = Int8PtrTy->getPointerTo();
369   auto *Int32Ty = Type::getInt32Ty(*Ctx);
370   auto *Int32PtrTy = Type::getInt32PtrTy(*Ctx);
371   auto *Int64Ty = Type::getInt64Ty(*Ctx);
372   auto *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
373   // This structure should be kept consistent with the StructInfo struct
374   // in the runtime library.
375   // struct StructInfo {
376   //   const char *StructName;
377   //   u32 Size;
378   //   u32 NumFields;
379   //   u32 *FieldOffset;           // auxiliary struct field info.
380   //   u32 *FieldSize;             // auxiliary struct field info.
381   //   const char **FieldTypeName; // auxiliary struct field info.
382   //   u64 *FieldCounters;
383   //   u64 *ArrayCounter;
384   // };
385   auto *StructInfoTy =
386       StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy,
387                       Int8PtrPtrTy, Int64PtrTy, Int64PtrTy);
388   auto *StructInfoPtrTy = StructInfoTy->getPointerTo();
389   // This structure should be kept consistent with the CacheFragInfo struct
390   // in the runtime library.
391   // struct CacheFragInfo {
392   //   const char *UnitName;
393   //   u32 NumStructs;
394   //   StructInfo *Structs;
395   // };
396   auto *CacheFragInfoTy = StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy);
397 
398   std::vector<StructType *> Vec = M.getIdentifiedStructTypes();
399   unsigned NumStructs = 0;
400   SmallVector<Constant *, 16> Initializers;
401 
402   for (auto &StructTy : Vec) {
403     if (shouldIgnoreStructType(StructTy)) {
404       ++NumIgnoredStructs;
405       continue;
406     }
407     ++NumStructs;
408 
409     // StructName.
410     SmallString<MaxStructCounterNameSize> CounterNameStr;
411     createStructCounterName(StructTy, CounterNameStr);
412     GlobalVariable *StructCounterName = createPrivateGlobalForString(
413         M, CounterNameStr, /*AllowMerging*/true);
414 
415     // Counters.
416     // We create the counter array with StructCounterName and weak linkage
417     // so that the structs with the same name and layout from different
418     // compilation units will be merged into one.
419     auto *CounterArrayTy = ArrayType::get(Int64Ty,
420                                           getStructCounterSize(StructTy));
421     GlobalVariable *Counters =
422       new GlobalVariable(M, CounterArrayTy, false,
423                          GlobalVariable::WeakAnyLinkage,
424                          ConstantAggregateZero::get(CounterArrayTy),
425                          CounterNameStr);
426 
427     // Remember the counter variable for each struct type.
428     StructTyMap.insert(std::pair<Type *, GlobalVariable *>(StructTy, Counters));
429 
430     // We pass the field type name array, offset array, and size array to
431     // the runtime for better reporting.
432     GlobalVariable *TypeName = nullptr, *Offset = nullptr, *Size = nullptr;
433     if (ClAuxFieldInfo)
434       createCacheFragAuxGV(M, DL, StructTy, TypeName, Offset, Size);
435 
436     Constant *FieldCounterIdx[2];
437     FieldCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
438     FieldCounterIdx[1] = ConstantInt::get(Int32Ty,
439                                           getFieldCounterIdx(StructTy));
440     Constant *ArrayCounterIdx[2];
441     ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
442     ArrayCounterIdx[1] = ConstantInt::get(Int32Ty,
443                                           getArrayCounterIdx(StructTy));
444     Initializers.push_back(ConstantStruct::get(
445         StructInfoTy,
446         ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy),
447         ConstantInt::get(Int32Ty,
448                          DL.getStructLayout(StructTy)->getSizeInBytes()),
449         ConstantInt::get(Int32Ty, StructTy->getNumElements()),
450         Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy)
451                           : ConstantExpr::getPointerCast(Offset, Int32PtrTy),
452         Size == nullptr ? ConstantPointerNull::get(Int32PtrTy)
453                         : ConstantExpr::getPointerCast(Size, Int32PtrTy),
454         TypeName == nullptr
455             ? ConstantPointerNull::get(Int8PtrPtrTy)
456             : ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy),
457         ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
458                                        FieldCounterIdx),
459         ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
460                                        ArrayCounterIdx)));
461   }
462   // Structs.
463   Constant *StructInfo;
464   if (NumStructs == 0) {
465     StructInfo = ConstantPointerNull::get(StructInfoPtrTy);
466   } else {
467     auto *StructInfoArrayTy = ArrayType::get(StructInfoTy, NumStructs);
468     StructInfo = ConstantExpr::getPointerCast(
469         new GlobalVariable(M, StructInfoArrayTy, false,
470                            GlobalVariable::InternalLinkage,
471                            ConstantArray::get(StructInfoArrayTy, Initializers)),
472         StructInfoPtrTy);
473   }
474 
475   auto *CacheFragInfoGV = new GlobalVariable(
476       M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage,
477       ConstantStruct::get(CacheFragInfoTy, UnitName,
478                           ConstantInt::get(Int32Ty, NumStructs), StructInfo));
479   return CacheFragInfoGV;
480 }
481 
482 // Create the tool-specific argument passed to EsanInit and EsanExit.
createEsanInitToolInfoArg(Module & M,const DataLayout & DL)483 Constant *EfficiencySanitizer::createEsanInitToolInfoArg(Module &M,
484                                                          const DataLayout &DL) {
485   // This structure contains tool-specific information about each compilation
486   // unit (module) and is passed to the runtime library.
487   GlobalVariable *ToolInfoGV = nullptr;
488 
489   auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
490   // Compilation unit name.
491   auto *UnitName = ConstantExpr::getPointerCast(
492       createPrivateGlobalForString(M, M.getModuleIdentifier(), true),
493       Int8PtrTy);
494 
495   // Create the tool-specific variable.
496   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag)
497     ToolInfoGV = createCacheFragInfoGV(M, DL, UnitName);
498 
499   if (ToolInfoGV != nullptr)
500     return ConstantExpr::getPointerCast(ToolInfoGV, Int8PtrTy);
501 
502   // Create the null pointer if no tool-specific variable created.
503   return ConstantPointerNull::get(Int8PtrTy);
504 }
505 
createDestructor(Module & M,Constant * ToolInfoArg)506 void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) {
507   PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
508   EsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*Ctx),
509                                                         false),
510                                       GlobalValue::InternalLinkage,
511                                       EsanModuleDtorName, &M);
512   ReturnInst::Create(*Ctx, BasicBlock::Create(*Ctx, "", EsanDtorFunction));
513   IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator());
514   Function *EsanExit = checkSanitizerInterfaceFunction(
515       M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(),
516                             Int8PtrTy));
517   EsanExit->setLinkage(Function::ExternalLinkage);
518   IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg});
519   appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority);
520 }
521 
initOnModule(Module & M)522 bool EfficiencySanitizer::initOnModule(Module &M) {
523 
524   Triple TargetTriple(M.getTargetTriple());
525   if (TargetTriple.isMIPS64())
526     ShadowParams = ShadowParams40;
527   else
528     ShadowParams = ShadowParams47;
529 
530   Ctx = &M.getContext();
531   const DataLayout &DL = M.getDataLayout();
532   IRBuilder<> IRB(M.getContext());
533   IntegerType *OrdTy = IRB.getInt32Ty();
534   PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
535   IntptrTy = DL.getIntPtrType(M.getContext());
536   // Create the variable passed to EsanInit and EsanExit.
537   Constant *ToolInfoArg = createEsanInitToolInfoArg(M, DL);
538   // Constructor
539   // We specify the tool type both in the EsanWhichToolName global
540   // and as an arg to the init routine as a sanity check.
541   std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
542       M, EsanModuleCtorName, EsanInitName, /*InitArgTypes=*/{OrdTy, Int8PtrTy},
543       /*InitArgs=*/{
544         ConstantInt::get(OrdTy, static_cast<int>(Options.ToolType)),
545         ToolInfoArg});
546   appendToGlobalCtors(M, EsanCtorFunction, EsanCtorAndDtorPriority);
547 
548   createDestructor(M, ToolInfoArg);
549 
550   new GlobalVariable(M, OrdTy, true,
551                      GlobalValue::WeakAnyLinkage,
552                      ConstantInt::get(OrdTy,
553                                       static_cast<int>(Options.ToolType)),
554                      EsanWhichToolName);
555 
556   return true;
557 }
558 
appToShadow(Value * Shadow,IRBuilder<> & IRB)559 Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) {
560   // Shadow = ((App & Mask) + Offs) >> Scale
561   Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowParams.ShadowMask));
562   uint64_t Offs;
563   int Scale = ShadowScale[Options.ToolType];
564   if (Scale <= 2)
565     Offs = ShadowParams.ShadowOffs[Scale];
566   else
567     Offs = ShadowParams.ShadowOffs[0] << Scale;
568   Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs));
569   if (Scale > 0)
570     Shadow = IRB.CreateLShr(Shadow, Scale);
571   return Shadow;
572 }
573 
shouldIgnoreMemoryAccess(Instruction * I)574 bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) {
575   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
576     // We'd like to know about cache fragmentation in vtable accesses and
577     // constant data references, so we do not currently ignore anything.
578     return false;
579   } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
580     // TODO: the instrumentation disturbs the data layout on the stack, so we
581     // may want to add an option to ignore stack references (if we can
582     // distinguish them) to reduce overhead.
583   }
584   // TODO(bruening): future tools will be returning true for some cases.
585   return false;
586 }
587 
runOnModule(Module & M)588 bool EfficiencySanitizer::runOnModule(Module &M) {
589   bool Res = initOnModule(M);
590   initializeCallbacks(M);
591   for (auto &F : M) {
592     Res |= runOnFunction(F, M);
593   }
594   return Res;
595 }
596 
runOnFunction(Function & F,Module & M)597 bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) {
598   // This is required to prevent instrumenting the call to __esan_init from
599   // within the module constructor.
600   if (&F == EsanCtorFunction)
601     return false;
602   SmallVector<Instruction *, 8> LoadsAndStores;
603   SmallVector<Instruction *, 8> MemIntrinCalls;
604   SmallVector<Instruction *, 8> GetElementPtrs;
605   bool Res = false;
606   const DataLayout &DL = M.getDataLayout();
607   const TargetLibraryInfo *TLI =
608       &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
609 
610   for (auto &BB : F) {
611     for (auto &Inst : BB) {
612       if ((isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
613            isa<AtomicRMWInst>(Inst) || isa<AtomicCmpXchgInst>(Inst)) &&
614           !shouldIgnoreMemoryAccess(&Inst))
615         LoadsAndStores.push_back(&Inst);
616       else if (isa<MemIntrinsic>(Inst))
617         MemIntrinCalls.push_back(&Inst);
618       else if (isa<GetElementPtrInst>(Inst))
619         GetElementPtrs.push_back(&Inst);
620       else if (CallInst *CI = dyn_cast<CallInst>(&Inst))
621         maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
622     }
623   }
624 
625   if (ClInstrumentLoadsAndStores) {
626     for (auto Inst : LoadsAndStores) {
627       Res |= instrumentLoadOrStore(Inst, DL);
628     }
629   }
630 
631   if (ClInstrumentMemIntrinsics) {
632     for (auto Inst : MemIntrinCalls) {
633       Res |= instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
634     }
635   }
636 
637   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
638     for (auto Inst : GetElementPtrs) {
639       Res |= instrumentGetElementPtr(Inst, M);
640     }
641   }
642 
643   return Res;
644 }
645 
instrumentLoadOrStore(Instruction * I,const DataLayout & DL)646 bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I,
647                                                 const DataLayout &DL) {
648   IRBuilder<> IRB(I);
649   bool IsStore;
650   Value *Addr;
651   unsigned Alignment;
652   if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
653     IsStore = false;
654     Alignment = Load->getAlignment();
655     Addr = Load->getPointerOperand();
656   } else if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
657     IsStore = true;
658     Alignment = Store->getAlignment();
659     Addr = Store->getPointerOperand();
660   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
661     IsStore = true;
662     Alignment = 0;
663     Addr = RMW->getPointerOperand();
664   } else if (AtomicCmpXchgInst *Xchg = dyn_cast<AtomicCmpXchgInst>(I)) {
665     IsStore = true;
666     Alignment = 0;
667     Addr = Xchg->getPointerOperand();
668   } else
669     llvm_unreachable("Unsupported mem access type");
670 
671   Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
672   const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
673   Value *OnAccessFunc = nullptr;
674 
675   // Convert 0 to the default alignment.
676   if (Alignment == 0)
677     Alignment = DL.getPrefTypeAlignment(OrigTy);
678 
679   if (IsStore)
680     NumInstrumentedStores++;
681   else
682     NumInstrumentedLoads++;
683   int Idx = getMemoryAccessFuncIndex(Addr, DL);
684   if (Idx < 0) {
685     OnAccessFunc = IsStore ? EsanUnalignedStoreN : EsanUnalignedLoadN;
686     IRB.CreateCall(OnAccessFunc,
687                    {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
688                     ConstantInt::get(IntptrTy, TypeSizeBytes)});
689   } else {
690     if (ClInstrumentFastpath &&
691         instrumentFastpath(I, DL, IsStore, Addr, Alignment)) {
692       NumFastpaths++;
693       return true;
694     }
695     if (Alignment == 0 || (Alignment % TypeSizeBytes) == 0)
696       OnAccessFunc = IsStore ? EsanAlignedStore[Idx] : EsanAlignedLoad[Idx];
697     else
698       OnAccessFunc = IsStore ? EsanUnalignedStore[Idx] : EsanUnalignedLoad[Idx];
699     IRB.CreateCall(OnAccessFunc,
700                    IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
701   }
702   return true;
703 }
704 
705 // It's simplest to replace the memset/memmove/memcpy intrinsics with
706 // calls that the runtime library intercepts.
707 // Our pass is late enough that calls should not turn back into intrinsics.
instrumentMemIntrinsic(MemIntrinsic * MI)708 bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
709   IRBuilder<> IRB(MI);
710   bool Res = false;
711   if (isa<MemSetInst>(MI)) {
712     IRB.CreateCall(
713         MemsetFn,
714         {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
715          IRB.CreateIntCast(MI->getArgOperand(1), IRB.getInt32Ty(), false),
716          IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
717     MI->eraseFromParent();
718     Res = true;
719   } else if (isa<MemTransferInst>(MI)) {
720     IRB.CreateCall(
721         isa<MemCpyInst>(MI) ? MemcpyFn : MemmoveFn,
722         {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
723          IRB.CreatePointerCast(MI->getArgOperand(1), IRB.getInt8PtrTy()),
724          IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
725     MI->eraseFromParent();
726     Res = true;
727   } else
728     llvm_unreachable("Unsupported mem intrinsic type");
729   return Res;
730 }
731 
instrumentGetElementPtr(Instruction * I,Module & M)732 bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) {
733   GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I);
734   bool Res = false;
735   if (GepInst == nullptr || GepInst->getNumIndices() == 1) {
736     ++NumIgnoredGEPs;
737     return false;
738   }
739   Type *SourceTy = GepInst->getSourceElementType();
740   StructType *StructTy = nullptr;
741   ConstantInt *Idx;
742   // Check if GEP calculates address from a struct array.
743   if (isa<StructType>(SourceTy)) {
744     StructTy = cast<StructType>(SourceTy);
745     Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1));
746     if ((Idx == nullptr || Idx->getSExtValue() != 0) &&
747         !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0)
748       Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy));
749   }
750   // Iterate all (except the first and the last) idx within each GEP instruction
751   // for possible nested struct field address calculation.
752   for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) {
753     SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(),
754                                    GepInst->idx_begin() + i);
755     Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec);
756     unsigned CounterIdx = 0;
757     if (isa<ArrayType>(Ty)) {
758       ArrayType *ArrayTy = cast<ArrayType>(Ty);
759       StructTy = dyn_cast<StructType>(ArrayTy->getElementType());
760       if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
761         continue;
762       // The last counter for struct array access.
763       CounterIdx = getArrayCounterIdx(StructTy);
764     } else if (isa<StructType>(Ty)) {
765       StructTy = cast<StructType>(Ty);
766       if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
767         continue;
768       // Get the StructTy's subfield index.
769       Idx = cast<ConstantInt>(GepInst->getOperand(i+1));
770       assert(Idx->getSExtValue() >= 0 &&
771              Idx->getSExtValue() < StructTy->getNumElements());
772       CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue();
773     }
774     Res |= insertCounterUpdate(I, StructTy, CounterIdx);
775   }
776   if (Res)
777     ++NumInstrumentedGEPs;
778   else
779     ++NumIgnoredGEPs;
780   return Res;
781 }
782 
insertCounterUpdate(Instruction * I,StructType * StructTy,unsigned CounterIdx)783 bool EfficiencySanitizer::insertCounterUpdate(Instruction *I,
784                                               StructType *StructTy,
785                                               unsigned CounterIdx) {
786   GlobalVariable *CounterArray = StructTyMap[StructTy];
787   if (CounterArray == nullptr)
788     return false;
789   IRBuilder<> IRB(I);
790   Constant *Indices[2];
791   // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and
792   // http://llvm.org/docs/GetElementPtr.html.
793   // The first index of the GEP instruction steps through the first operand,
794   // i.e., the array itself.
795   Indices[0] = ConstantInt::get(IRB.getInt32Ty(), 0);
796   // The second index is the index within the array.
797   Indices[1] = ConstantInt::get(IRB.getInt32Ty(), CounterIdx);
798   Constant *Counter =
799     ConstantExpr::getGetElementPtr(
800         ArrayType::get(IRB.getInt64Ty(), getStructCounterSize(StructTy)),
801         CounterArray, Indices);
802   Value *Load = IRB.CreateLoad(Counter);
803   IRB.CreateStore(IRB.CreateAdd(Load, ConstantInt::get(IRB.getInt64Ty(), 1)),
804                   Counter);
805   return true;
806 }
807 
getMemoryAccessFuncIndex(Value * Addr,const DataLayout & DL)808 int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr,
809                                                   const DataLayout &DL) {
810   Type *OrigPtrTy = Addr->getType();
811   Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
812   assert(OrigTy->isSized());
813   // The size is always a multiple of 8.
814   uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
815   if (TypeSizeBytes != 1 && TypeSizeBytes != 2 && TypeSizeBytes != 4 &&
816       TypeSizeBytes != 8 && TypeSizeBytes != 16) {
817     // Irregular sizes do not have per-size call targets.
818     NumAccessesWithIrregularSize++;
819     return -1;
820   }
821   size_t Idx = countTrailingZeros(TypeSizeBytes);
822   assert(Idx < NumberOfAccessSizes);
823   return Idx;
824 }
825 
instrumentFastpath(Instruction * I,const DataLayout & DL,bool IsStore,Value * Addr,unsigned Alignment)826 bool EfficiencySanitizer::instrumentFastpath(Instruction *I,
827                                              const DataLayout &DL, bool IsStore,
828                                              Value *Addr, unsigned Alignment) {
829   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
830     return instrumentFastpathCacheFrag(I, DL, Addr, Alignment);
831   } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
832     return instrumentFastpathWorkingSet(I, DL, Addr, Alignment);
833   }
834   return false;
835 }
836 
instrumentFastpathCacheFrag(Instruction * I,const DataLayout & DL,Value * Addr,unsigned Alignment)837 bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I,
838                                                       const DataLayout &DL,
839                                                       Value *Addr,
840                                                       unsigned Alignment) {
841   // Do nothing.
842   return true; // Return true to avoid slowpath instrumentation.
843 }
844 
instrumentFastpathWorkingSet(Instruction * I,const DataLayout & DL,Value * Addr,unsigned Alignment)845 bool EfficiencySanitizer::instrumentFastpathWorkingSet(
846     Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) {
847   assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this
848   IRBuilder<> IRB(I);
849   Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
850   const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
851   // Bail to the slowpath if the access might touch multiple cache lines.
852   // An access aligned to its size is guaranteed to be intra-cache-line.
853   // getMemoryAccessFuncIndex has already ruled out a size larger than 16
854   // and thus larger than a cache line for platforms this tool targets
855   // (and our shadow memory setup assumes 64-byte cache lines).
856   assert(TypeSize <= 128);
857   if (!(TypeSize == 8 ||
858         (Alignment % (TypeSize / 8)) == 0)) {
859     if (ClAssumeIntraCacheLine)
860       ++NumAssumedIntraCacheLine;
861     else
862       return false;
863   }
864 
865   // We inline instrumentation to set the corresponding shadow bits for
866   // each cache line touched by the application.  Here we handle a single
867   // load or store where we've already ruled out the possibility that it
868   // might touch more than one cache line and thus we simply update the
869   // shadow memory for a single cache line.
870   // Our shadow memory model is fine with races when manipulating shadow values.
871   // We generate the following code:
872   //
873   //   const char BitMask = 0x81;
874   //   char *ShadowAddr = appToShadow(AppAddr);
875   //   if ((*ShadowAddr & BitMask) != BitMask)
876   //     *ShadowAddr |= Bitmask;
877   //
878   Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy);
879   Value *ShadowPtr = appToShadow(AddrPtr, IRB);
880   Type *ShadowTy = IntegerType::get(*Ctx, 8U);
881   Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
882   // The bottom bit is used for the current sampling period's working set.
883   // The top bit is used for the total working set.  We set both on each
884   // memory access, if they are not already set.
885   Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B
886 
887   Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
888   // The AND and CMP will be turned into a TEST instruction by the compiler.
889   Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask);
890   Instruction *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
891   // FIXME: do I need to call SetCurrentDebugLocation?
892   IRB.SetInsertPoint(CmpTerm);
893   // We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
894   // which are used by the runtime library.
895   Value *NewVal = IRB.CreateOr(OldValue, ValueMask);
896   IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
897   IRB.SetInsertPoint(I);
898 
899   return true;
900 }
901