1 //===- TypeErasedDataflowAnalysis.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines type-erased base types and functions for building dataflow
10 // analyses that run over Control-Flow Graphs (CFGs).
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include <algorithm>
15 #include <memory>
16 #include <system_error>
17 #include <utility>
18 #include <vector>
19
20 #include "clang/AST/DeclCXX.h"
21 #include "clang/AST/OperationKinds.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Analysis/Analyses/PostOrderCFGView.h"
24 #include "clang/Analysis/CFG.h"
25 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
26 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
27 #include "clang/Analysis/FlowSensitive/Transfer.h"
28 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
29 #include "clang/Analysis/FlowSensitive/Value.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/DenseSet.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/ErrorHandling.h"
37
38 namespace clang {
39 namespace dataflow {
40
41 class StmtToEnvMapImpl : public StmtToEnvMap {
42 public:
StmtToEnvMapImpl(const ControlFlowContext & CFCtx,llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockToState)43 StmtToEnvMapImpl(
44 const ControlFlowContext &CFCtx,
45 llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>>
46 BlockToState)
47 : CFCtx(CFCtx), BlockToState(BlockToState) {}
48
getEnvironment(const Stmt & S) const49 const Environment *getEnvironment(const Stmt &S) const override {
50 auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S));
51 assert(BlockIt != CFCtx.getStmtToBlock().end());
52 const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()];
53 assert(State);
54 return &State.value().Env;
55 }
56
57 private:
58 const ControlFlowContext &CFCtx;
59 llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockToState;
60 };
61
62 /// Returns the index of `Block` in the successors of `Pred`.
blockIndexInPredecessor(const CFGBlock & Pred,const CFGBlock & Block)63 static int blockIndexInPredecessor(const CFGBlock &Pred,
64 const CFGBlock &Block) {
65 auto BlockPos = llvm::find_if(
66 Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) {
67 return Succ && Succ->getBlockID() == Block.getBlockID();
68 });
69 return BlockPos - Pred.succ_begin();
70 }
71
72 /// Extends the flow condition of an environment based on a terminator
73 /// statement.
74 class TerminatorVisitor : public ConstStmtVisitor<TerminatorVisitor> {
75 public:
TerminatorVisitor(const StmtToEnvMap & StmtToEnv,Environment & Env,int BlockSuccIdx,TransferOptions TransferOpts)76 TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env,
77 int BlockSuccIdx, TransferOptions TransferOpts)
78 : StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx),
79 TransferOpts(TransferOpts) {}
80
VisitIfStmt(const IfStmt * S)81 void VisitIfStmt(const IfStmt *S) {
82 auto *Cond = S->getCond();
83 assert(Cond != nullptr);
84 extendFlowCondition(*Cond);
85 }
86
VisitWhileStmt(const WhileStmt * S)87 void VisitWhileStmt(const WhileStmt *S) {
88 auto *Cond = S->getCond();
89 assert(Cond != nullptr);
90 extendFlowCondition(*Cond);
91 }
92
VisitDoStmt(const DoStmt * S)93 void VisitDoStmt(const DoStmt *S) {
94 auto *Cond = S->getCond();
95 assert(Cond != nullptr);
96 extendFlowCondition(*Cond);
97 }
98
VisitForStmt(const ForStmt * S)99 void VisitForStmt(const ForStmt *S) {
100 auto *Cond = S->getCond();
101 if (Cond != nullptr)
102 extendFlowCondition(*Cond);
103 }
104
VisitBinaryOperator(const BinaryOperator * S)105 void VisitBinaryOperator(const BinaryOperator *S) {
106 assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr);
107 auto *LHS = S->getLHS();
108 assert(LHS != nullptr);
109 extendFlowCondition(*LHS);
110 }
111
VisitConditionalOperator(const ConditionalOperator * S)112 void VisitConditionalOperator(const ConditionalOperator *S) {
113 auto *Cond = S->getCond();
114 assert(Cond != nullptr);
115 extendFlowCondition(*Cond);
116 }
117
118 private:
extendFlowCondition(const Expr & Cond)119 void extendFlowCondition(const Expr &Cond) {
120 // The terminator sub-expression might not be evaluated.
121 if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr)
122 transfer(StmtToEnv, Cond, Env, TransferOpts);
123
124 // FIXME: The flow condition must be an r-value, so `SkipPast::None` should
125 // suffice.
126 auto *Val =
127 cast_or_null<BoolValue>(Env.getValue(Cond, SkipPast::Reference));
128 // Value merging depends on flow conditions from different environments
129 // being mutually exclusive -- that is, they cannot both be true in their
130 // entirety (even if they may share some clauses). So, we need *some* value
131 // for the condition expression, even if just an atom.
132 if (Val == nullptr) {
133 // FIXME: Consider introducing a helper for this get-or-create pattern.
134 auto *Loc = Env.getStorageLocation(Cond, SkipPast::None);
135 if (Loc == nullptr) {
136 Loc = &Env.createStorageLocation(Cond);
137 Env.setStorageLocation(Cond, *Loc);
138 }
139 Val = &Env.makeAtomicBoolValue();
140 Env.setValue(*Loc, *Val);
141 }
142
143 // The condition must be inverted for the successor that encompasses the
144 // "else" branch, if such exists.
145 if (BlockSuccIdx == 1)
146 Val = &Env.makeNot(*Val);
147
148 Env.addToFlowCondition(*Val);
149 }
150
151 const StmtToEnvMap &StmtToEnv;
152 Environment &Env;
153 int BlockSuccIdx;
154 TransferOptions TransferOpts;
155 };
156
157 /// Computes the input state for a given basic block by joining the output
158 /// states of its predecessors.
159 ///
160 /// Requirements:
161 ///
162 /// All predecessors of `Block` except those with loop back edges must have
163 /// already been transferred. States in `BlockStates` that are set to
164 /// `llvm::None` represent basic blocks that are not evaluated yet.
computeBlockInputState(const ControlFlowContext & CFCtx,std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> & BlockStates,const CFGBlock & Block,const Environment & InitEnv,TypeErasedDataflowAnalysis & Analysis)165 static TypeErasedDataflowAnalysisState computeBlockInputState(
166 const ControlFlowContext &CFCtx,
167 std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
168 const CFGBlock &Block, const Environment &InitEnv,
169 TypeErasedDataflowAnalysis &Analysis) {
170 llvm::DenseSet<const CFGBlock *> Preds;
171 Preds.insert(Block.pred_begin(), Block.pred_end());
172 if (Block.getTerminator().isTemporaryDtorsBranch()) {
173 // This handles a special case where the code that produced the CFG includes
174 // a conditional operator with a branch that constructs a temporary and
175 // calls a destructor annotated as noreturn. The CFG models this as follows:
176 //
177 // B1 (contains the condition of the conditional operator) - succs: B2, B3
178 // B2 (contains code that does not call a noreturn destructor) - succs: B4
179 // B3 (contains code that calls a noreturn destructor) - succs: B4
180 // B4 (has temporary destructor terminator) - succs: B5, B6
181 // B5 (noreturn block that is associated with the noreturn destructor call)
182 // B6 (contains code that follows the conditional operator statement)
183 //
184 // The first successor (B5 above) of a basic block with a temporary
185 // destructor terminator (B4 above) is the block that evaluates the
186 // destructor. If that block has a noreturn element then the predecessor
187 // block that constructed the temporary object (B3 above) is effectively a
188 // noreturn block and its state should not be used as input for the state
189 // of the block that has a temporary destructor terminator (B4 above). This
190 // holds regardless of which branch of the ternary operator calls the
191 // noreturn destructor. However, it doesn't cases where a nested ternary
192 // operator includes a branch that contains a noreturn destructor call.
193 //
194 // See `NoreturnDestructorTest` for concrete examples.
195 if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) {
196 auto StmtBlock = CFCtx.getStmtToBlock().find(Block.getTerminatorStmt());
197 assert(StmtBlock != CFCtx.getStmtToBlock().end());
198 Preds.erase(StmtBlock->getSecond());
199 }
200 }
201
202 llvm::Optional<TypeErasedDataflowAnalysisState> MaybeState;
203 bool ApplyBuiltinTransfer = Analysis.applyBuiltinTransfer();
204
205 for (const CFGBlock *Pred : Preds) {
206 // Skip if the `Block` is unreachable or control flow cannot get past it.
207 if (!Pred || Pred->hasNoReturnElement())
208 continue;
209
210 // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a
211 // loop back edge to `Block`.
212 const llvm::Optional<TypeErasedDataflowAnalysisState> &MaybePredState =
213 BlockStates[Pred->getBlockID()];
214 if (!MaybePredState)
215 continue;
216
217 TypeErasedDataflowAnalysisState PredState = MaybePredState.value();
218 if (ApplyBuiltinTransfer) {
219 if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
220 const StmtToEnvMapImpl StmtToEnv(CFCtx, BlockStates);
221 TerminatorVisitor(StmtToEnv, PredState.Env,
222 blockIndexInPredecessor(*Pred, Block),
223 Analysis.builtinTransferOptions())
224 .Visit(PredTerminatorStmt);
225 }
226 }
227
228 if (MaybeState) {
229 Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice);
230 MaybeState->Env.join(PredState.Env, Analysis);
231 } else {
232 MaybeState = std::move(PredState);
233 }
234 }
235 if (!MaybeState) {
236 // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()`
237 // to enable building analyses like computation of dominators that
238 // initialize the state of each basic block differently.
239 MaybeState.emplace(Analysis.typeErasedInitialElement(), InitEnv);
240 }
241 return *MaybeState;
242 }
243
244 /// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`.
245 /// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it
246 /// is evaluated.
transferCFGStmt(const ControlFlowContext & CFCtx,llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates,const CFGStmt & CfgStmt,TypeErasedDataflowAnalysis & Analysis,TypeErasedDataflowAnalysisState & State,std::function<void (const CFGStmt &,const TypeErasedDataflowAnalysisState &)> HandleTransferredStmt)247 static void transferCFGStmt(
248 const ControlFlowContext &CFCtx,
249 llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates,
250 const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis,
251 TypeErasedDataflowAnalysisState &State,
252 std::function<void(const CFGStmt &,
253 const TypeErasedDataflowAnalysisState &)>
254 HandleTransferredStmt) {
255 const Stmt *S = CfgStmt.getStmt();
256 assert(S != nullptr);
257
258 if (Analysis.applyBuiltinTransfer())
259 transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env,
260 Analysis.builtinTransferOptions());
261 Analysis.transferTypeErased(S, State.Lattice, State.Env);
262
263 if (HandleTransferredStmt != nullptr)
264 HandleTransferredStmt(CfgStmt, State);
265 }
266
267 /// Transfers `State` by evaluating `CfgInit`.
transferCFGInitializer(const CFGInitializer & CfgInit,TypeErasedDataflowAnalysisState & State)268 static void transferCFGInitializer(const CFGInitializer &CfgInit,
269 TypeErasedDataflowAnalysisState &State) {
270 const auto &ThisLoc = *cast<AggregateStorageLocation>(
271 State.Env.getThisPointeeStorageLocation());
272
273 const CXXCtorInitializer *Initializer = CfgInit.getInitializer();
274 assert(Initializer != nullptr);
275
276 const FieldDecl *Member = Initializer->getMember();
277 if (Member == nullptr)
278 // Not a field initializer.
279 return;
280
281 auto *InitStmt = Initializer->getInit();
282 assert(InitStmt != nullptr);
283
284 auto *InitStmtLoc =
285 State.Env.getStorageLocation(*InitStmt, SkipPast::Reference);
286 if (InitStmtLoc == nullptr)
287 return;
288
289 auto *InitStmtVal = State.Env.getValue(*InitStmtLoc);
290 if (InitStmtVal == nullptr)
291 return;
292
293 if (Member->getType()->isReferenceType()) {
294 auto &MemberLoc = ThisLoc.getChild(*Member);
295 State.Env.setValue(MemberLoc,
296 State.Env.takeOwnership(
297 std::make_unique<ReferenceValue>(*InitStmtLoc)));
298 } else {
299 auto &MemberLoc = ThisLoc.getChild(*Member);
300 State.Env.setValue(MemberLoc, *InitStmtVal);
301 }
302 }
303
transferBlock(const ControlFlowContext & CFCtx,std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> & BlockStates,const CFGBlock & Block,const Environment & InitEnv,TypeErasedDataflowAnalysis & Analysis,std::function<void (const CFGStmt &,const TypeErasedDataflowAnalysisState &)> HandleTransferredStmt)304 TypeErasedDataflowAnalysisState transferBlock(
305 const ControlFlowContext &CFCtx,
306 std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
307 const CFGBlock &Block, const Environment &InitEnv,
308 TypeErasedDataflowAnalysis &Analysis,
309 std::function<void(const CFGStmt &,
310 const TypeErasedDataflowAnalysisState &)>
311 HandleTransferredStmt) {
312 TypeErasedDataflowAnalysisState State =
313 computeBlockInputState(CFCtx, BlockStates, Block, InitEnv, Analysis);
314 for (const CFGElement &Element : Block) {
315 switch (Element.getKind()) {
316 case CFGElement::Statement:
317 transferCFGStmt(CFCtx, BlockStates, *Element.getAs<CFGStmt>(), Analysis,
318 State, HandleTransferredStmt);
319 break;
320 case CFGElement::Initializer:
321 if (Analysis.applyBuiltinTransfer())
322 transferCFGInitializer(*Element.getAs<CFGInitializer>(), State);
323 break;
324 default:
325 // FIXME: Evaluate other kinds of `CFGElement`.
326 break;
327 }
328 }
329 return State;
330 }
331
332 llvm::Expected<std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>>
runTypeErasedDataflowAnalysis(const ControlFlowContext & CFCtx,TypeErasedDataflowAnalysis & Analysis,const Environment & InitEnv,std::function<void (const Stmt *,const TypeErasedDataflowAnalysisState &)> PostVisitStmt)333 runTypeErasedDataflowAnalysis(
334 const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis,
335 const Environment &InitEnv,
336 std::function<void(const Stmt *, const TypeErasedDataflowAnalysisState &)>
337 PostVisitStmt) {
338 PostOrderCFGView POV(&CFCtx.getCFG());
339 ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV);
340
341 std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates;
342 BlockStates.resize(CFCtx.getCFG().size(), llvm::None);
343
344 // The entry basic block doesn't contain statements so it can be skipped.
345 const CFGBlock &Entry = CFCtx.getCFG().getEntry();
346 BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(),
347 InitEnv};
348 Worklist.enqueueSuccessors(&Entry);
349
350 // Bugs in lattices and transfer functions can prevent the analysis from
351 // converging. To limit the damage (infinite loops) that these bugs can cause,
352 // limit the number of iterations.
353 // FIXME: Consider making the maximum number of iterations configurable.
354 // FIXME: Consider restricting the number of backedges followed, rather than
355 // iterations.
356 // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
357 // of iterations, number of functions that time out, etc.
358 static constexpr uint32_t MaxAverageVisitsPerBlock = 4;
359 static constexpr uint32_t AbsoluteMaxIterations = 1 << 16;
360 const uint32_t RelativeMaxIterations =
361 MaxAverageVisitsPerBlock * BlockStates.size();
362 const uint32_t MaxIterations =
363 std::min(RelativeMaxIterations, AbsoluteMaxIterations);
364 uint32_t Iterations = 0;
365 while (const CFGBlock *Block = Worklist.dequeue()) {
366 if (++Iterations > MaxIterations) {
367 return llvm::createStringError(std::errc::timed_out,
368 "maximum number of iterations reached");
369 }
370
371 const llvm::Optional<TypeErasedDataflowAnalysisState> &OldBlockState =
372 BlockStates[Block->getBlockID()];
373 TypeErasedDataflowAnalysisState NewBlockState =
374 transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis);
375
376 if (OldBlockState &&
377 Analysis.isEqualTypeErased(OldBlockState.value().Lattice,
378 NewBlockState.Lattice) &&
379 OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) {
380 // The state of `Block` didn't change after transfer so there's no need to
381 // revisit its successors.
382 continue;
383 }
384
385 BlockStates[Block->getBlockID()] = std::move(NewBlockState);
386
387 // Do not add unreachable successor blocks to `Worklist`.
388 if (Block->hasNoReturnElement())
389 continue;
390
391 Worklist.enqueueSuccessors(Block);
392 }
393 // FIXME: Consider evaluating unreachable basic blocks (those that have a
394 // state set to `llvm::None` at this point) to also analyze dead code.
395
396 if (PostVisitStmt) {
397 for (const CFGBlock *Block : CFCtx.getCFG()) {
398 // Skip blocks that were not evaluated.
399 if (!BlockStates[Block->getBlockID()])
400 continue;
401 transferBlock(
402 CFCtx, BlockStates, *Block, InitEnv, Analysis,
403 [&PostVisitStmt](const clang::CFGStmt &Stmt,
404 const TypeErasedDataflowAnalysisState &State) {
405 PostVisitStmt(Stmt.getStmt(), State);
406 });
407 }
408 }
409
410 return BlockStates;
411 }
412
413 } // namespace dataflow
414 } // namespace clang
415