1 //===-- ThreadSanitizer.cpp - race detector -------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a part of ThreadSanitizer, a race detector. 11 // 12 // The tool is under development, for the details about previous versions see 13 // http://code.google.com/p/data-race-test 14 // 15 // The instrumentation phase is quite simple: 16 // - Insert calls to run-time library before every memory access. 17 // - Optimizations may apply to avoid instrumenting some of the accesses. 18 // - Insert calls at function entry/exit. 19 // The rest is handled by the run-time library. 20 //===----------------------------------------------------------------------===// 21 22 #define DEBUG_TYPE "tsan" 23 24 #include "FunctionBlackList.h" 25 #include "llvm/ADT/SmallSet.h" 26 #include "llvm/ADT/SmallString.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/ADT/StringExtras.h" 30 #include "llvm/Intrinsics.h" 31 #include "llvm/Function.h" 32 #include "llvm/LLVMContext.h" 33 #include "llvm/Metadata.h" 34 #include "llvm/Module.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/IRBuilder.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include "llvm/Target/TargetData.h" 41 #include "llvm/Transforms/Instrumentation.h" 42 #include "llvm/Transforms/Utils/ModuleUtils.h" 43 #include "llvm/Type.h" 44 45 using namespace llvm; 46 47 static cl::opt<std::string> ClBlackListFile("tsan-blacklist", 48 cl::desc("Blacklist file"), cl::Hidden); 49 50 STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); 51 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); 52 STATISTIC(NumOmittedReadsBeforeWrite, 53 "Number of reads ignored due to following writes"); 54 STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); 55 STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); 56 STATISTIC(NumOmittedReadsFromConstantGlobals, 57 "Number of reads from constant globals"); 58 STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads"); 59 60 namespace { 61 62 /// ThreadSanitizer: instrument the code in module to find races. 63 struct ThreadSanitizer : public FunctionPass { 64 ThreadSanitizer(); 65 bool runOnFunction(Function &F); 66 bool doInitialization(Module &M); 67 bool instrumentLoadOrStore(Instruction *I); 68 static char ID; // Pass identification, replacement for typeid. 69 70 private: 71 void choseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, 72 SmallVectorImpl<Instruction*> &All); 73 bool addrPointsToConstantData(Value *Addr); 74 75 TargetData *TD; 76 OwningPtr<FunctionBlackList> BL; 77 // Callbacks to run-time library are computed in doInitialization. 78 Value *TsanFuncEntry; 79 Value *TsanFuncExit; 80 // Accesses sizes are powers of two: 1, 2, 4, 8, 16. 81 static const size_t kNumberOfAccessSizes = 5; 82 Value *TsanRead[kNumberOfAccessSizes]; 83 Value *TsanWrite[kNumberOfAccessSizes]; 84 Value *TsanVptrUpdate; 85 }; 86 } // namespace 87 88 char ThreadSanitizer::ID = 0; 89 INITIALIZE_PASS(ThreadSanitizer, "tsan", 90 "ThreadSanitizer: detects data races.", 91 false, false) 92 93 ThreadSanitizer::ThreadSanitizer() 94 : FunctionPass(ID), 95 TD(NULL) { 96 } 97 98 FunctionPass *llvm::createThreadSanitizerPass() { 99 return new ThreadSanitizer(); 100 } 101 102 bool ThreadSanitizer::doInitialization(Module &M) { 103 TD = getAnalysisIfAvailable<TargetData>(); 104 if (!TD) 105 return false; 106 BL.reset(new FunctionBlackList(ClBlackListFile)); 107 108 // Always insert a call to __tsan_init into the module's CTORs. 109 IRBuilder<> IRB(M.getContext()); 110 Value *TsanInit = M.getOrInsertFunction("__tsan_init", 111 IRB.getVoidTy(), NULL); 112 appendToGlobalCtors(M, cast<Function>(TsanInit), 0); 113 114 // Initialize the callbacks. 115 TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", IRB.getVoidTy(), 116 IRB.getInt8PtrTy(), NULL); 117 TsanFuncExit = M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), 118 NULL); 119 for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { 120 SmallString<32> ReadName("__tsan_read"); 121 ReadName += itostr(1 << i); 122 TsanRead[i] = M.getOrInsertFunction(ReadName, IRB.getVoidTy(), 123 IRB.getInt8PtrTy(), NULL); 124 SmallString<32> WriteName("__tsan_write"); 125 WriteName += itostr(1 << i); 126 TsanWrite[i] = M.getOrInsertFunction(WriteName, IRB.getVoidTy(), 127 IRB.getInt8PtrTy(), NULL); 128 } 129 TsanVptrUpdate = M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(), 130 IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 131 NULL); 132 return true; 133 } 134 135 static bool isVtableAccess(Instruction *I) { 136 if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) { 137 if (Tag->getNumOperands() < 1) return false; 138 if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { 139 if (Tag1->getString() == "vtable pointer") return true; 140 } 141 } 142 return false; 143 } 144 145 bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { 146 // If this is a GEP, just analyze its pointer operand. 147 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) 148 Addr = GEP->getPointerOperand(); 149 150 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 151 if (GV->isConstant()) { 152 // Reads from constant globals can not race with any writes. 153 NumOmittedReadsFromConstantGlobals++; 154 return true; 155 } 156 } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) { 157 if (isVtableAccess(L)) { 158 // Reads from a vtable pointer can not race with any writes. 159 NumOmittedReadsFromVtable++; 160 return true; 161 } 162 } 163 return false; 164 } 165 166 // Instrumenting some of the accesses may be proven redundant. 167 // Currently handled: 168 // - read-before-write (within same BB, no calls between) 169 // 170 // We do not handle some of the patterns that should not survive 171 // after the classic compiler optimizations. 172 // E.g. two reads from the same temp should be eliminated by CSE, 173 // two writes should be eliminated by DSE, etc. 174 // 175 // 'Local' is a vector of insns within the same BB (no calls between). 176 // 'All' is a vector of insns that will be instrumented. 177 void ThreadSanitizer::choseInstructionsToInstrument( 178 SmallVectorImpl<Instruction*> &Local, 179 SmallVectorImpl<Instruction*> &All) { 180 SmallSet<Value*, 8> WriteTargets; 181 // Iterate from the end. 182 for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(), 183 E = Local.rend(); It != E; ++It) { 184 Instruction *I = *It; 185 if (StoreInst *Store = dyn_cast<StoreInst>(I)) { 186 WriteTargets.insert(Store->getPointerOperand()); 187 } else { 188 LoadInst *Load = cast<LoadInst>(I); 189 Value *Addr = Load->getPointerOperand(); 190 if (WriteTargets.count(Addr)) { 191 // We will write to this temp, so no reason to analyze the read. 192 NumOmittedReadsBeforeWrite++; 193 continue; 194 } 195 if (addrPointsToConstantData(Addr)) { 196 // Addr points to some constant data -- it can not race with any writes. 197 continue; 198 } 199 } 200 All.push_back(I); 201 } 202 Local.clear(); 203 } 204 205 bool ThreadSanitizer::runOnFunction(Function &F) { 206 if (!TD) return false; 207 if (BL->isIn(F)) return false; 208 SmallVector<Instruction*, 8> RetVec; 209 SmallVector<Instruction*, 8> AllLoadsAndStores; 210 SmallVector<Instruction*, 8> LocalLoadsAndStores; 211 bool Res = false; 212 bool HasCalls = false; 213 214 // Traverse all instructions, collect loads/stores/returns, check for calls. 215 for (Function::iterator FI = F.begin(), FE = F.end(); 216 FI != FE; ++FI) { 217 BasicBlock &BB = *FI; 218 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); 219 BI != BE; ++BI) { 220 if (isa<LoadInst>(BI) || isa<StoreInst>(BI)) 221 LocalLoadsAndStores.push_back(BI); 222 else if (isa<ReturnInst>(BI)) 223 RetVec.push_back(BI); 224 else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { 225 HasCalls = true; 226 choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); 227 } 228 } 229 choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); 230 } 231 232 // We have collected all loads and stores. 233 // FIXME: many of these accesses do not need to be checked for races 234 // (e.g. variables that do not escape, etc). 235 236 // Instrument memory accesses. 237 for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) { 238 Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); 239 } 240 241 // Instrument function entry/exit points if there were instrumented accesses. 242 if (Res || HasCalls) { 243 IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); 244 Value *ReturnAddress = IRB.CreateCall( 245 Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress), 246 IRB.getInt32(0)); 247 IRB.CreateCall(TsanFuncEntry, ReturnAddress); 248 for (size_t i = 0, n = RetVec.size(); i < n; ++i) { 249 IRBuilder<> IRBRet(RetVec[i]); 250 IRBRet.CreateCall(TsanFuncExit); 251 } 252 Res = true; 253 } 254 return Res; 255 } 256 257 bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { 258 IRBuilder<> IRB(I); 259 bool IsWrite = isa<StoreInst>(*I); 260 Value *Addr = IsWrite 261 ? cast<StoreInst>(I)->getPointerOperand() 262 : cast<LoadInst>(I)->getPointerOperand(); 263 Type *OrigPtrTy = Addr->getType(); 264 Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); 265 assert(OrigTy->isSized()); 266 uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); 267 if (TypeSize != 8 && TypeSize != 16 && 268 TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { 269 NumAccessesWithBadSize++; 270 // Ignore all unusual sizes. 271 return false; 272 } 273 if (IsWrite && isVtableAccess(I)) { 274 Value *StoredValue = cast<StoreInst>(I)->getValueOperand(); 275 IRB.CreateCall2(TsanVptrUpdate, 276 IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), 277 IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())); 278 NumInstrumentedVtableWrites++; 279 return true; 280 } 281 size_t Idx = CountTrailingZeros_32(TypeSize / 8); 282 assert(Idx < kNumberOfAccessSizes); 283 Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; 284 IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); 285 if (IsWrite) NumInstrumentedWrites++; 286 else NumInstrumentedReads++; 287 return true; 288 } 289