1 //===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains routines that help determine which pointers are captured. 11 // A pointer value is captured if the function makes a copy of any part of the 12 // pointer that outlives the call. Not being captured means, more or less, that 13 // the pointer is only dereferenced and not stored in a global. Returning part 14 // of the pointer as the function return value may or may not count as capturing 15 // the pointer, depending on the context. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallSet.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Analysis/CFG.h" 23 #include "llvm/Analysis/CaptureTracking.h" 24 #include "llvm/IR/CallSite.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Instructions.h" 28 29 using namespace llvm; 30 31 CaptureTracker::~CaptureTracker() {} 32 33 bool CaptureTracker::shouldExplore(const Use *U) { return true; } 34 35 namespace { 36 struct SimpleCaptureTracker : public CaptureTracker { 37 explicit SimpleCaptureTracker(bool ReturnCaptures) 38 : ReturnCaptures(ReturnCaptures), Captured(false) {} 39 40 void tooManyUses() override { Captured = true; } 41 42 bool captured(const Use *U) override { 43 if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures) 44 return false; 45 46 Captured = true; 47 return true; 48 } 49 50 bool ReturnCaptures; 51 52 bool Captured; 53 }; 54 55 struct NumberedInstCache { 56 SmallDenseMap<const Instruction *, unsigned, 32> NumberedInsts; 57 BasicBlock::const_iterator LastInstFound; 58 unsigned LastInstPos; 59 const BasicBlock *BB; 60 61 NumberedInstCache(const BasicBlock *BasicB) : LastInstPos(0), BB(BasicB) { 62 LastInstFound = BB->end(); 63 } 64 65 /// \brief Find the first instruction 'A' or 'B' in 'BB'. Number out 66 /// instruction while walking 'BB'. 67 const Instruction *find(const Instruction *A, const Instruction *B) { 68 const Instruction *Inst = nullptr; 69 assert(!(LastInstFound == BB->end() && LastInstPos != 0) && 70 "Instruction supposed to be in NumberedInsts"); 71 72 // Start the search with the instruction found in the last lookup round. 73 auto II = BB->begin(); 74 auto IE = BB->end(); 75 if (LastInstFound != IE) 76 II = std::next(LastInstFound); 77 78 // Number all instructions up to the point where we find 'A' or 'B'. 79 for (++LastInstPos; II != IE; ++II, ++LastInstPos) { 80 Inst = cast<Instruction>(II); 81 NumberedInsts[Inst] = LastInstPos; 82 if (Inst == A || Inst == B) 83 break; 84 } 85 86 assert(II != IE && "Instruction not found?"); 87 LastInstFound = II; 88 return Inst; 89 } 90 91 /// \brief Find out whether 'A' dominates 'B', meaning whether 'A' 92 /// comes before 'B' in 'BB'. This is a simplification that considers 93 /// cached instruction positions and ignores other basic blocks, being 94 /// only relevant to compare relative instructions positions inside 'BB'. 95 bool dominates(const Instruction *A, const Instruction *B) { 96 assert(A->getParent() == B->getParent() && 97 "Instructions must be in the same basic block!"); 98 99 unsigned NA = NumberedInsts.lookup(A); 100 unsigned NB = NumberedInsts.lookup(B); 101 if (NA && NB) 102 return NA < NB; 103 if (NA) 104 return true; 105 if (NB) 106 return false; 107 108 return A == find(A, B); 109 } 110 }; 111 112 /// Only find pointer captures which happen before the given instruction. Uses 113 /// the dominator tree to determine whether one instruction is before another. 114 /// Only support the case where the Value is defined in the same basic block 115 /// as the given instruction and the use. 116 struct CapturesBefore : public CaptureTracker { 117 118 CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT, 119 bool IncludeI) 120 : LocalInstCache(I->getParent()), BeforeHere(I), DT(DT), 121 ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {} 122 123 void tooManyUses() override { Captured = true; } 124 125 bool isSafeToPrune(Instruction *I) { 126 BasicBlock *BB = I->getParent(); 127 // We explore this usage only if the usage can reach "BeforeHere". 128 // If use is not reachable from entry, there is no need to explore. 129 if (BeforeHere != I && !DT->isReachableFromEntry(BB)) 130 return true; 131 132 // Compute the case where both instructions are inside the same basic 133 // block. Since instructions in the same BB as BeforeHere are numbered in 134 // 'LocalInstCache', avoid using 'dominates' and 'isPotentiallyReachable' 135 // which are very expensive for large basic blocks. 136 if (BB == BeforeHere->getParent()) { 137 // 'I' dominates 'BeforeHere' => not safe to prune. 138 // 139 // The value defined by an invoke dominates an instruction only if it 140 // dominates every instruction in UseBB. A PHI is dominated only if 141 // the instruction dominates every possible use in the UseBB. Since 142 // UseBB == BB, avoid pruning. 143 if (isa<InvokeInst>(BeforeHere) || isa<PHINode>(I) || I == BeforeHere) 144 return false; 145 if (!LocalInstCache.dominates(BeforeHere, I)) 146 return false; 147 148 // 'BeforeHere' comes before 'I', it's safe to prune if we also 149 // guarantee that 'I' never reaches 'BeforeHere' through a back-edge or 150 // by its successors, i.e, prune if: 151 // 152 // (1) BB is an entry block or have no sucessors. 153 // (2) There's no path coming back through BB sucessors. 154 if (BB == &BB->getParent()->getEntryBlock() || 155 !BB->getTerminator()->getNumSuccessors()) 156 return true; 157 158 SmallVector<BasicBlock*, 32> Worklist; 159 Worklist.append(succ_begin(BB), succ_end(BB)); 160 if (!isPotentiallyReachableFromMany(Worklist, BB, DT)) 161 return true; 162 163 return false; 164 } 165 166 // If the value is defined in the same basic block as use and BeforeHere, 167 // there is no need to explore the use if BeforeHere dominates use. 168 // Check whether there is a path from I to BeforeHere. 169 if (BeforeHere != I && DT->dominates(BeforeHere, I) && 170 !isPotentiallyReachable(I, BeforeHere, DT)) 171 return true; 172 173 return false; 174 } 175 176 bool shouldExplore(const Use *U) override { 177 Instruction *I = cast<Instruction>(U->getUser()); 178 179 if (BeforeHere == I && !IncludeI) 180 return false; 181 182 if (isSafeToPrune(I)) 183 return false; 184 185 return true; 186 } 187 188 bool captured(const Use *U) override { 189 if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures) 190 return false; 191 192 if (!shouldExplore(U)) 193 return false; 194 195 Captured = true; 196 return true; 197 } 198 199 NumberedInstCache LocalInstCache; 200 const Instruction *BeforeHere; 201 DominatorTree *DT; 202 203 bool ReturnCaptures; 204 bool IncludeI; 205 206 bool Captured; 207 }; 208 } 209 210 /// PointerMayBeCaptured - Return true if this pointer value may be captured 211 /// by the enclosing function (which is required to exist). This routine can 212 /// be expensive, so consider caching the results. The boolean ReturnCaptures 213 /// specifies whether returning the value (or part of it) from the function 214 /// counts as capturing it or not. The boolean StoreCaptures specified whether 215 /// storing the value (or part of it) into memory anywhere automatically 216 /// counts as capturing it or not. 217 bool llvm::PointerMayBeCaptured(const Value *V, 218 bool ReturnCaptures, bool StoreCaptures) { 219 assert(!isa<GlobalValue>(V) && 220 "It doesn't make sense to ask whether a global is captured."); 221 222 // TODO: If StoreCaptures is not true, we could do Fancy analysis 223 // to determine whether this store is not actually an escape point. 224 // In that case, BasicAliasAnalysis should be updated as well to 225 // take advantage of this. 226 (void)StoreCaptures; 227 228 SimpleCaptureTracker SCT(ReturnCaptures); 229 PointerMayBeCaptured(V, &SCT); 230 return SCT.Captured; 231 } 232 233 /// PointerMayBeCapturedBefore - Return true if this pointer value may be 234 /// captured by the enclosing function (which is required to exist). If a 235 /// DominatorTree is provided, only captures which happen before the given 236 /// instruction are considered. This routine can be expensive, so consider 237 /// caching the results. The boolean ReturnCaptures specifies whether 238 /// returning the value (or part of it) from the function counts as capturing 239 /// it or not. The boolean StoreCaptures specified whether storing the value 240 /// (or part of it) into memory anywhere automatically counts as capturing it 241 /// or not. 242 bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, 243 bool StoreCaptures, const Instruction *I, 244 DominatorTree *DT, bool IncludeI) { 245 assert(!isa<GlobalValue>(V) && 246 "It doesn't make sense to ask whether a global is captured."); 247 248 if (!DT) 249 return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures); 250 251 // TODO: See comment in PointerMayBeCaptured regarding what could be done 252 // with StoreCaptures. 253 254 CapturesBefore CB(ReturnCaptures, I, DT, IncludeI); 255 PointerMayBeCaptured(V, &CB); 256 return CB.Captured; 257 } 258 259 /// TODO: Write a new FunctionPass AliasAnalysis so that it can keep 260 /// a cache. Then we can move the code from BasicAliasAnalysis into 261 /// that path, and remove this threshold. 262 static int const Threshold = 20; 263 264 void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { 265 assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); 266 SmallVector<const Use *, Threshold> Worklist; 267 SmallSet<const Use *, Threshold> Visited; 268 int Count = 0; 269 270 for (const Use &U : V->uses()) { 271 // If there are lots of uses, conservatively say that the value 272 // is captured to avoid taking too much compile time. 273 if (Count++ >= Threshold) 274 return Tracker->tooManyUses(); 275 276 if (!Tracker->shouldExplore(&U)) continue; 277 Visited.insert(&U); 278 Worklist.push_back(&U); 279 } 280 281 while (!Worklist.empty()) { 282 const Use *U = Worklist.pop_back_val(); 283 Instruction *I = cast<Instruction>(U->getUser()); 284 V = U->get(); 285 286 switch (I->getOpcode()) { 287 case Instruction::Call: 288 case Instruction::Invoke: { 289 CallSite CS(I); 290 // Not captured if the callee is readonly, doesn't return a copy through 291 // its return value and doesn't unwind (a readonly function can leak bits 292 // by throwing an exception or not depending on the input value). 293 if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) 294 break; 295 296 // Not captured if only passed via 'nocapture' arguments. Note that 297 // calling a function pointer does not in itself cause the pointer to 298 // be captured. This is a subtle point considering that (for example) 299 // the callee might return its own address. It is analogous to saying 300 // that loading a value from a pointer does not cause the pointer to be 301 // captured, even though the loaded value might be the pointer itself 302 // (think of self-referential objects). 303 CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); 304 for (CallSite::arg_iterator A = B; A != E; ++A) 305 if (A->get() == V && !CS.doesNotCapture(A - B)) 306 // The parameter is not marked 'nocapture' - captured. 307 if (Tracker->captured(U)) 308 return; 309 break; 310 } 311 case Instruction::Load: 312 // Loading from a pointer does not cause it to be captured. 313 break; 314 case Instruction::VAArg: 315 // "va-arg" from a pointer does not cause it to be captured. 316 break; 317 case Instruction::Store: 318 if (V == I->getOperand(0)) 319 // Stored the pointer - conservatively assume it may be captured. 320 if (Tracker->captured(U)) 321 return; 322 // Storing to the pointee does not cause the pointer to be captured. 323 break; 324 case Instruction::BitCast: 325 case Instruction::GetElementPtr: 326 case Instruction::PHI: 327 case Instruction::Select: 328 case Instruction::AddrSpaceCast: 329 // The original value is not captured via this if the new value isn't. 330 Count = 0; 331 for (Use &UU : I->uses()) { 332 // If there are lots of uses, conservatively say that the value 333 // is captured to avoid taking too much compile time. 334 if (Count++ >= Threshold) 335 return Tracker->tooManyUses(); 336 337 if (Visited.insert(&UU).second) 338 if (Tracker->shouldExplore(&UU)) 339 Worklist.push_back(&UU); 340 } 341 break; 342 case Instruction::ICmp: 343 // Don't count comparisons of a no-alias return value against null as 344 // captures. This allows us to ignore comparisons of malloc results 345 // with null, for example. 346 if (ConstantPointerNull *CPN = 347 dyn_cast<ConstantPointerNull>(I->getOperand(1))) 348 if (CPN->getType()->getAddressSpace() == 0) 349 if (isNoAliasCall(V->stripPointerCasts())) 350 break; 351 // Otherwise, be conservative. There are crazy ways to capture pointers 352 // using comparisons. 353 if (Tracker->captured(U)) 354 return; 355 break; 356 default: 357 // Something else - be conservative and say it is captured. 358 if (Tracker->captured(U)) 359 return; 360 break; 361 } 362 } 363 364 // All uses examined. 365 } 366