1 //===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains routines that help determine which pointers are captured.
10 // A pointer value is captured if the function makes a copy of any part of the
11 // pointer that outlives the call.  Not being captured means, more or less, that
12 // the pointer is only dereferenced and not stored in a global.  Returning part
13 // of the pointer as the function return value may or may not count as capturing
14 // the pointer, depending on the context.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/Analysis/CaptureTracking.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Analysis/AliasAnalysis.h"
22 #include "llvm/Analysis/CFG.h"
23 #include "llvm/Analysis/ValueTracking.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 
29 using namespace llvm;
30 
31 CaptureTracker::~CaptureTracker() {}
32 
33 bool CaptureTracker::shouldExplore(const Use *U) { return true; }
34 
35 bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) {
36   // An inbounds GEP can either be a valid pointer (pointing into
37   // or to the end of an allocation), or be null in the default
38   // address space. So for an inbounds GEP there is no way to let
39   // the pointer escape using clever GEP hacking because doing so
40   // would make the pointer point outside of the allocated object
41   // and thus make the GEP result a poison value. Similarly, other
42   // dereferenceable pointers cannot be manipulated without producing
43   // poison.
44   if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
45     if (GEP->isInBounds())
46       return true;
47   bool CanBeNull;
48   return O->getPointerDereferenceableBytes(DL, CanBeNull);
49 }
50 
51 namespace {
52   struct SimpleCaptureTracker : public CaptureTracker {
53     explicit SimpleCaptureTracker(bool ReturnCaptures)
54       : ReturnCaptures(ReturnCaptures), Captured(false) {}
55 
56     void tooManyUses() override { Captured = true; }
57 
58     bool captured(const Use *U) override {
59       if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
60         return false;
61 
62       Captured = true;
63       return true;
64     }
65 
66     bool ReturnCaptures;
67 
68     bool Captured;
69   };
70 
71   /// Only find pointer captures which happen before the given instruction. Uses
72   /// the dominator tree to determine whether one instruction is before another.
73   /// Only support the case where the Value is defined in the same basic block
74   /// as the given instruction and the use.
75   struct CapturesBefore : public CaptureTracker {
76 
77     CapturesBefore(bool ReturnCaptures, const Instruction *I, const DominatorTree *DT,
78                    bool IncludeI)
79       : BeforeHere(I), DT(DT),
80         ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {}
81 
82     void tooManyUses() override { Captured = true; }
83 
84     bool isSafeToPrune(Instruction *I) {
85       BasicBlock *BB = I->getParent();
86       // We explore this usage only if the usage can reach "BeforeHere".
87       // If use is not reachable from entry, there is no need to explore.
88       if (BeforeHere != I && !DT->isReachableFromEntry(BB))
89         return true;
90 
91       // Compute the case where both instructions are inside the same basic
92       // block.
93       if (BB == BeforeHere->getParent()) {
94         // 'I' dominates 'BeforeHere' => not safe to prune.
95         //
96         // The value defined by an invoke dominates an instruction only
97         // if it dominates every instruction in UseBB. A PHI is dominated only
98         // if the instruction dominates every possible use in the UseBB. Since
99         // UseBB == BB, avoid pruning.
100         if (isa<InvokeInst>(BeforeHere) || isa<PHINode>(I) || I == BeforeHere)
101           return false;
102         if (!BeforeHere->comesBefore(I))
103           return false;
104 
105         // 'BeforeHere' comes before 'I', it's safe to prune if we also
106         // guarantee that 'I' never reaches 'BeforeHere' through a back-edge or
107         // by its successors, i.e, prune if:
108         //
109         //  (1) BB is an entry block or have no successors.
110         //  (2) There's no path coming back through BB successors.
111         if (BB == &BB->getParent()->getEntryBlock() ||
112             !BB->getTerminator()->getNumSuccessors())
113           return true;
114 
115         SmallVector<BasicBlock*, 32> Worklist;
116         Worklist.append(succ_begin(BB), succ_end(BB));
117         return !isPotentiallyReachableFromMany(Worklist, BB, nullptr, DT);
118       }
119 
120       // If the value is defined in the same basic block as use and BeforeHere,
121       // there is no need to explore the use if BeforeHere dominates use.
122       // Check whether there is a path from I to BeforeHere.
123       if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
124           !isPotentiallyReachable(I, BeforeHere, nullptr, DT))
125         return true;
126 
127       return false;
128     }
129 
130     bool shouldExplore(const Use *U) override {
131       Instruction *I = cast<Instruction>(U->getUser());
132 
133       if (BeforeHere == I && !IncludeI)
134         return false;
135 
136       if (isSafeToPrune(I))
137         return false;
138 
139       return true;
140     }
141 
142     bool captured(const Use *U) override {
143       if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
144         return false;
145 
146       if (!shouldExplore(U))
147         return false;
148 
149       Captured = true;
150       return true;
151     }
152 
153     const Instruction *BeforeHere;
154     const DominatorTree *DT;
155 
156     bool ReturnCaptures;
157     bool IncludeI;
158 
159     bool Captured;
160   };
161 }
162 
163 /// PointerMayBeCaptured - Return true if this pointer value may be captured
164 /// by the enclosing function (which is required to exist).  This routine can
165 /// be expensive, so consider caching the results.  The boolean ReturnCaptures
166 /// specifies whether returning the value (or part of it) from the function
167 /// counts as capturing it or not.  The boolean StoreCaptures specified whether
168 /// storing the value (or part of it) into memory anywhere automatically
169 /// counts as capturing it or not.
170 bool llvm::PointerMayBeCaptured(const Value *V,
171                                 bool ReturnCaptures, bool StoreCaptures,
172                                 unsigned MaxUsesToExplore) {
173   assert(!isa<GlobalValue>(V) &&
174          "It doesn't make sense to ask whether a global is captured.");
175 
176   // TODO: If StoreCaptures is not true, we could do Fancy analysis
177   // to determine whether this store is not actually an escape point.
178   // In that case, BasicAliasAnalysis should be updated as well to
179   // take advantage of this.
180   (void)StoreCaptures;
181 
182   SimpleCaptureTracker SCT(ReturnCaptures);
183   PointerMayBeCaptured(V, &SCT, MaxUsesToExplore);
184   return SCT.Captured;
185 }
186 
187 /// PointerMayBeCapturedBefore - Return true if this pointer value may be
188 /// captured by the enclosing function (which is required to exist). If a
189 /// DominatorTree is provided, only captures which happen before the given
190 /// instruction are considered. This routine can be expensive, so consider
191 /// caching the results.  The boolean ReturnCaptures specifies whether
192 /// returning the value (or part of it) from the function counts as capturing
193 /// it or not.  The boolean StoreCaptures specified whether storing the value
194 /// (or part of it) into memory anywhere automatically counts as capturing it
195 /// or not.
196 bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
197                                       bool StoreCaptures, const Instruction *I,
198                                       const DominatorTree *DT, bool IncludeI,
199                                       unsigned MaxUsesToExplore) {
200   assert(!isa<GlobalValue>(V) &&
201          "It doesn't make sense to ask whether a global is captured.");
202 
203   if (!DT)
204     return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures,
205                                 MaxUsesToExplore);
206 
207   // TODO: See comment in PointerMayBeCaptured regarding what could be done
208   // with StoreCaptures.
209 
210   CapturesBefore CB(ReturnCaptures, I, DT, IncludeI);
211   PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
212   return CB.Captured;
213 }
214 
215 void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
216                                 unsigned MaxUsesToExplore) {
217   assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
218   SmallVector<const Use *, DefaultMaxUsesToExplore> Worklist;
219   SmallSet<const Use *, DefaultMaxUsesToExplore> Visited;
220 
221   auto AddUses = [&](const Value *V) {
222     unsigned Count = 0;
223     for (const Use &U : V->uses()) {
224       // If there are lots of uses, conservatively say that the value
225       // is captured to avoid taking too much compile time.
226       if (Count++ >= MaxUsesToExplore)
227         return Tracker->tooManyUses();
228       if (!Visited.insert(&U).second)
229         continue;
230       if (!Tracker->shouldExplore(&U))
231         continue;
232       Worklist.push_back(&U);
233     }
234   };
235   AddUses(V);
236 
237   while (!Worklist.empty()) {
238     const Use *U = Worklist.pop_back_val();
239     Instruction *I = cast<Instruction>(U->getUser());
240     V = U->get();
241 
242     switch (I->getOpcode()) {
243     case Instruction::Call:
244     case Instruction::Invoke: {
245       auto *Call = cast<CallBase>(I);
246       // Not captured if the callee is readonly, doesn't return a copy through
247       // its return value and doesn't unwind (a readonly function can leak bits
248       // by throwing an exception or not depending on the input value).
249       if (Call->onlyReadsMemory() && Call->doesNotThrow() &&
250           Call->getType()->isVoidTy())
251         break;
252 
253       // The pointer is not captured if returned pointer is not captured.
254       // NOTE: CaptureTracking users should not assume that only functions
255       // marked with nocapture do not capture. This means that places like
256       // GetUnderlyingObject in ValueTracking or DecomposeGEPExpression
257       // in BasicAA also need to know about this property.
258       if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call,
259                                                                       true)) {
260         AddUses(Call);
261         break;
262       }
263 
264       // Volatile operations effectively capture the memory location that they
265       // load and store to.
266       if (auto *MI = dyn_cast<MemIntrinsic>(Call))
267         if (MI->isVolatile())
268           if (Tracker->captured(U))
269             return;
270 
271       // Not captured if only passed via 'nocapture' arguments.  Note that
272       // calling a function pointer does not in itself cause the pointer to
273       // be captured.  This is a subtle point considering that (for example)
274       // the callee might return its own address.  It is analogous to saying
275       // that loading a value from a pointer does not cause the pointer to be
276       // captured, even though the loaded value might be the pointer itself
277       // (think of self-referential objects).
278       for (auto IdxOpPair : enumerate(Call->data_ops())) {
279         int Idx = IdxOpPair.index();
280         Value *A = IdxOpPair.value();
281         if (A == V && !Call->doesNotCapture(Idx))
282           // The parameter is not marked 'nocapture' - captured.
283           if (Tracker->captured(U))
284             return;
285       }
286       break;
287     }
288     case Instruction::Load:
289       // Volatile loads make the address observable.
290       if (cast<LoadInst>(I)->isVolatile())
291         if (Tracker->captured(U))
292           return;
293       break;
294     case Instruction::VAArg:
295       // "va-arg" from a pointer does not cause it to be captured.
296       break;
297     case Instruction::Store:
298         // Stored the pointer - conservatively assume it may be captured.
299         // Volatile stores make the address observable.
300       if (V == I->getOperand(0) || cast<StoreInst>(I)->isVolatile())
301         if (Tracker->captured(U))
302           return;
303       break;
304     case Instruction::AtomicRMW: {
305       // atomicrmw conceptually includes both a load and store from
306       // the same location.
307       // As with a store, the location being accessed is not captured,
308       // but the value being stored is.
309       // Volatile stores make the address observable.
310       auto *ARMWI = cast<AtomicRMWInst>(I);
311       if (ARMWI->getValOperand() == V || ARMWI->isVolatile())
312         if (Tracker->captured(U))
313           return;
314       break;
315     }
316     case Instruction::AtomicCmpXchg: {
317       // cmpxchg conceptually includes both a load and store from
318       // the same location.
319       // As with a store, the location being accessed is not captured,
320       // but the value being stored is.
321       // Volatile stores make the address observable.
322       auto *ACXI = cast<AtomicCmpXchgInst>(I);
323       if (ACXI->getCompareOperand() == V || ACXI->getNewValOperand() == V ||
324           ACXI->isVolatile())
325         if (Tracker->captured(U))
326           return;
327       break;
328     }
329     case Instruction::BitCast:
330     case Instruction::GetElementPtr:
331     case Instruction::PHI:
332     case Instruction::Select:
333     case Instruction::AddrSpaceCast:
334       // The original value is not captured via this if the new value isn't.
335       AddUses(I);
336       break;
337     case Instruction::ICmp: {
338       unsigned Idx = (I->getOperand(0) == V) ? 0 : 1;
339       unsigned OtherIdx = 1 - Idx;
340       if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) {
341         // Don't count comparisons of a no-alias return value against null as
342         // captures. This allows us to ignore comparisons of malloc results
343         // with null, for example.
344         if (CPN->getType()->getAddressSpace() == 0)
345           if (isNoAliasCall(V->stripPointerCasts()))
346             break;
347         if (!I->getFunction()->nullPointerIsDefined()) {
348           auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation();
349           // Comparing a dereferenceable_or_null pointer against null cannot
350           // lead to pointer escapes, because if it is not null it must be a
351           // valid (in-bounds) pointer.
352           if (Tracker->isDereferenceableOrNull(O, I->getModule()->getDataLayout()))
353             break;
354         }
355       }
356       // Comparison against value stored in global variable. Given the pointer
357       // does not escape, its value cannot be guessed and stored separately in a
358       // global variable.
359       auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIdx));
360       if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
361         break;
362       // Otherwise, be conservative. There are crazy ways to capture pointers
363       // using comparisons.
364       if (Tracker->captured(U))
365         return;
366       break;
367     }
368     default:
369       // Something else - be conservative and say it is captured.
370       if (Tracker->captured(U))
371         return;
372       break;
373     }
374   }
375 
376   // All uses examined.
377 }
378