//===- TypeErasedDataflowAnalysis.cpp -------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines type-erased base types and functions for building dataflow // analyses that run over Control-Flow Graphs (CFGs). // //===----------------------------------------------------------------------===// #include #include #include #include #include #include "clang/AST/DeclCXX.h" #include "clang/AST/OperationKinds.h" #include "clang/AST/StmtVisitor.h" #include "clang/Analysis/Analyses/PostOrderCFGView.h" #include "clang/Analysis/CFG.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" #include "clang/Analysis/FlowSensitive/Transfer.h" #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" namespace clang { namespace dataflow { class StmtToEnvMapImpl : public StmtToEnvMap { public: StmtToEnvMapImpl( const ControlFlowContext &CFCtx, llvm::ArrayRef> BlockToState) : CFCtx(CFCtx), BlockToState(BlockToState) {} const Environment *getEnvironment(const Stmt &S) const override { auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S)); assert(BlockIt != CFCtx.getStmtToBlock().end()); const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()]; assert(State); return &State.value().Env; } private: const ControlFlowContext &CFCtx; llvm::ArrayRef> BlockToState; }; /// Returns the index of `Block` in the successors of `Pred`. static int blockIndexInPredecessor(const CFGBlock &Pred, const CFGBlock &Block) { auto BlockPos = llvm::find_if( Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) { return Succ && Succ->getBlockID() == Block.getBlockID(); }); return BlockPos - Pred.succ_begin(); } /// Extends the flow condition of an environment based on a terminator /// statement. class TerminatorVisitor : public ConstStmtVisitor { public: TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env, int BlockSuccIdx, TransferOptions TransferOpts) : StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx), TransferOpts(TransferOpts) {} void VisitIfStmt(const IfStmt *S) { auto *Cond = S->getCond(); assert(Cond != nullptr); extendFlowCondition(*Cond); } void VisitWhileStmt(const WhileStmt *S) { auto *Cond = S->getCond(); assert(Cond != nullptr); extendFlowCondition(*Cond); } void VisitDoStmt(const DoStmt *S) { auto *Cond = S->getCond(); assert(Cond != nullptr); extendFlowCondition(*Cond); } void VisitForStmt(const ForStmt *S) { auto *Cond = S->getCond(); if (Cond != nullptr) extendFlowCondition(*Cond); } void VisitBinaryOperator(const BinaryOperator *S) { assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr); auto *LHS = S->getLHS(); assert(LHS != nullptr); extendFlowCondition(*LHS); } void VisitConditionalOperator(const ConditionalOperator *S) { auto *Cond = S->getCond(); assert(Cond != nullptr); extendFlowCondition(*Cond); } private: void extendFlowCondition(const Expr &Cond) { // The terminator sub-expression might not be evaluated. if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr) transfer(StmtToEnv, Cond, Env, TransferOpts); // FIXME: The flow condition must be an r-value, so `SkipPast::None` should // suffice. auto *Val = cast_or_null(Env.getValue(Cond, SkipPast::Reference)); // Value merging depends on flow conditions from different environments // being mutually exclusive -- that is, they cannot both be true in their // entirety (even if they may share some clauses). So, we need *some* value // for the condition expression, even if just an atom. if (Val == nullptr) { // FIXME: Consider introducing a helper for this get-or-create pattern. auto *Loc = Env.getStorageLocation(Cond, SkipPast::None); if (Loc == nullptr) { Loc = &Env.createStorageLocation(Cond); Env.setStorageLocation(Cond, *Loc); } Val = &Env.makeAtomicBoolValue(); Env.setValue(*Loc, *Val); } // The condition must be inverted for the successor that encompasses the // "else" branch, if such exists. if (BlockSuccIdx == 1) Val = &Env.makeNot(*Val); Env.addToFlowCondition(*Val); } const StmtToEnvMap &StmtToEnv; Environment &Env; int BlockSuccIdx; TransferOptions TransferOpts; }; /// Computes the input state for a given basic block by joining the output /// states of its predecessors. /// /// Requirements: /// /// All predecessors of `Block` except those with loop back edges must have /// already been transferred. States in `BlockStates` that are set to /// `llvm::None` represent basic blocks that are not evaluated yet. static TypeErasedDataflowAnalysisState computeBlockInputState( const ControlFlowContext &CFCtx, std::vector> &BlockStates, const CFGBlock &Block, const Environment &InitEnv, TypeErasedDataflowAnalysis &Analysis) { llvm::DenseSet Preds; Preds.insert(Block.pred_begin(), Block.pred_end()); if (Block.getTerminator().isTemporaryDtorsBranch()) { // This handles a special case where the code that produced the CFG includes // a conditional operator with a branch that constructs a temporary and // calls a destructor annotated as noreturn. The CFG models this as follows: // // B1 (contains the condition of the conditional operator) - succs: B2, B3 // B2 (contains code that does not call a noreturn destructor) - succs: B4 // B3 (contains code that calls a noreturn destructor) - succs: B4 // B4 (has temporary destructor terminator) - succs: B5, B6 // B5 (noreturn block that is associated with the noreturn destructor call) // B6 (contains code that follows the conditional operator statement) // // The first successor (B5 above) of a basic block with a temporary // destructor terminator (B4 above) is the block that evaluates the // destructor. If that block has a noreturn element then the predecessor // block that constructed the temporary object (B3 above) is effectively a // noreturn block and its state should not be used as input for the state // of the block that has a temporary destructor terminator (B4 above). This // holds regardless of which branch of the ternary operator calls the // noreturn destructor. However, it doesn't cases where a nested ternary // operator includes a branch that contains a noreturn destructor call. // // See `NoreturnDestructorTest` for concrete examples. if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { auto StmtBlock = CFCtx.getStmtToBlock().find(Block.getTerminatorStmt()); assert(StmtBlock != CFCtx.getStmtToBlock().end()); Preds.erase(StmtBlock->getSecond()); } } llvm::Optional MaybeState; bool ApplyBuiltinTransfer = Analysis.applyBuiltinTransfer(); for (const CFGBlock *Pred : Preds) { // Skip if the `Block` is unreachable or control flow cannot get past it. if (!Pred || Pred->hasNoReturnElement()) continue; // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a // loop back edge to `Block`. const llvm::Optional &MaybePredState = BlockStates[Pred->getBlockID()]; if (!MaybePredState) continue; TypeErasedDataflowAnalysisState PredState = MaybePredState.value(); if (ApplyBuiltinTransfer) { if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) { const StmtToEnvMapImpl StmtToEnv(CFCtx, BlockStates); TerminatorVisitor(StmtToEnv, PredState.Env, blockIndexInPredecessor(*Pred, Block), Analysis.builtinTransferOptions()) .Visit(PredTerminatorStmt); } } if (MaybeState) { Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice); MaybeState->Env.join(PredState.Env, Analysis); } else { MaybeState = std::move(PredState); } } if (!MaybeState) { // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()` // to enable building analyses like computation of dominators that // initialize the state of each basic block differently. MaybeState.emplace(Analysis.typeErasedInitialElement(), InitEnv); } return *MaybeState; } /// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`. /// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it /// is evaluated. static void transferCFGStmt( const ControlFlowContext &CFCtx, llvm::ArrayRef> BlockStates, const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis, TypeErasedDataflowAnalysisState &State, std::function HandleTransferredStmt) { const Stmt *S = CfgStmt.getStmt(); assert(S != nullptr); if (Analysis.applyBuiltinTransfer()) transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env, Analysis.builtinTransferOptions()); Analysis.transferTypeErased(S, State.Lattice, State.Env); if (HandleTransferredStmt != nullptr) HandleTransferredStmt(CfgStmt, State); } /// Transfers `State` by evaluating `CfgInit`. static void transferCFGInitializer(const CFGInitializer &CfgInit, TypeErasedDataflowAnalysisState &State) { const auto &ThisLoc = *cast( State.Env.getThisPointeeStorageLocation()); const CXXCtorInitializer *Initializer = CfgInit.getInitializer(); assert(Initializer != nullptr); const FieldDecl *Member = Initializer->getMember(); if (Member == nullptr) // Not a field initializer. return; auto *InitStmt = Initializer->getInit(); assert(InitStmt != nullptr); auto *InitStmtLoc = State.Env.getStorageLocation(*InitStmt, SkipPast::Reference); if (InitStmtLoc == nullptr) return; auto *InitStmtVal = State.Env.getValue(*InitStmtLoc); if (InitStmtVal == nullptr) return; if (Member->getType()->isReferenceType()) { auto &MemberLoc = ThisLoc.getChild(*Member); State.Env.setValue(MemberLoc, State.Env.takeOwnership( std::make_unique(*InitStmtLoc))); } else { auto &MemberLoc = ThisLoc.getChild(*Member); State.Env.setValue(MemberLoc, *InitStmtVal); } } TypeErasedDataflowAnalysisState transferBlock( const ControlFlowContext &CFCtx, std::vector> &BlockStates, const CFGBlock &Block, const Environment &InitEnv, TypeErasedDataflowAnalysis &Analysis, std::function HandleTransferredStmt) { TypeErasedDataflowAnalysisState State = computeBlockInputState(CFCtx, BlockStates, Block, InitEnv, Analysis); for (const CFGElement &Element : Block) { switch (Element.getKind()) { case CFGElement::Statement: transferCFGStmt(CFCtx, BlockStates, *Element.getAs(), Analysis, State, HandleTransferredStmt); break; case CFGElement::Initializer: if (Analysis.applyBuiltinTransfer()) transferCFGInitializer(*Element.getAs(), State); break; default: // FIXME: Evaluate other kinds of `CFGElement`. break; } } return State; } llvm::Expected>> runTypeErasedDataflowAnalysis( const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv, std::function PostVisitStmt) { PostOrderCFGView POV(&CFCtx.getCFG()); ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV); std::vector> BlockStates; BlockStates.resize(CFCtx.getCFG().size(), llvm::None); // The entry basic block doesn't contain statements so it can be skipped. const CFGBlock &Entry = CFCtx.getCFG().getEntry(); BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(), InitEnv}; Worklist.enqueueSuccessors(&Entry); // Bugs in lattices and transfer functions can prevent the analysis from // converging. To limit the damage (infinite loops) that these bugs can cause, // limit the number of iterations. // FIXME: Consider making the maximum number of iterations configurable. // FIXME: Consider restricting the number of backedges followed, rather than // iterations. // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number // of iterations, number of functions that time out, etc. static constexpr uint32_t MaxAverageVisitsPerBlock = 4; static constexpr uint32_t AbsoluteMaxIterations = 1 << 16; const uint32_t RelativeMaxIterations = MaxAverageVisitsPerBlock * BlockStates.size(); const uint32_t MaxIterations = std::min(RelativeMaxIterations, AbsoluteMaxIterations); uint32_t Iterations = 0; while (const CFGBlock *Block = Worklist.dequeue()) { if (++Iterations > MaxIterations) { return llvm::createStringError(std::errc::timed_out, "maximum number of iterations reached"); } const llvm::Optional &OldBlockState = BlockStates[Block->getBlockID()]; TypeErasedDataflowAnalysisState NewBlockState = transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis); if (OldBlockState && Analysis.isEqualTypeErased(OldBlockState.value().Lattice, NewBlockState.Lattice) && OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { // The state of `Block` didn't change after transfer so there's no need to // revisit its successors. continue; } BlockStates[Block->getBlockID()] = std::move(NewBlockState); // Do not add unreachable successor blocks to `Worklist`. if (Block->hasNoReturnElement()) continue; Worklist.enqueueSuccessors(Block); } // FIXME: Consider evaluating unreachable basic blocks (those that have a // state set to `llvm::None` at this point) to also analyze dead code. if (PostVisitStmt) { for (const CFGBlock *Block : CFCtx.getCFG()) { // Skip blocks that were not evaluated. if (!BlockStates[Block->getBlockID()]) continue; transferBlock( CFCtx, BlockStates, *Block, InitEnv, Analysis, [&PostVisitStmt](const clang::CFGStmt &Stmt, const TypeErasedDataflowAnalysisState &State) { PostVisitStmt(Stmt.getStmt(), State); }); } } return BlockStates; } } // namespace dataflow } // namespace clang