1 //=- ReachableCodePathInsensitive.cpp ---------------------------*- C++ --*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a flow-sensitive, path-insensitive analysis of
11 // determining reachable blocks within a CFG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/Analyses/ReachableCode.h"
16 #include "clang/Lex/Preprocessor.h"
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/ExprObjC.h"
20 #include "clang/AST/StmtCXX.h"
21 #include "clang/Analysis/AnalysisContext.h"
22 #include "clang/Analysis/CFG.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/SmallVector.h"
26 
27 using namespace clang;
28 
29 //===----------------------------------------------------------------------===//
30 // Core Reachability Analysis routines.
31 //===----------------------------------------------------------------------===//
32 
33 static bool isEnumConstant(const Expr *Ex) {
34   const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Ex);
35   if (!DR)
36     return false;
37   return isa<EnumConstantDecl>(DR->getDecl());
38 }
39 
40 static const Expr *stripStdStringCtor(const Expr *Ex) {
41   // Go crazy pattern matching an implicit construction of std::string("").
42   const ExprWithCleanups *EWC = dyn_cast<ExprWithCleanups>(Ex);
43   if (!EWC)
44     return 0;
45   const CXXConstructExpr *CCE = dyn_cast<CXXConstructExpr>(EWC->getSubExpr());
46   if (!CCE)
47     return 0;
48   QualType Ty = CCE->getType();
49   if (const ElaboratedType *ET = dyn_cast<ElaboratedType>(Ty))
50     Ty = ET->getNamedType();
51   const TypedefType *TT = dyn_cast<TypedefType>(Ty);
52   StringRef Name = TT->getDecl()->getName();
53   if (Name != "string")
54     return 0;
55   if (CCE->getNumArgs() != 1)
56     return 0;
57   const MaterializeTemporaryExpr *MTE =
58     dyn_cast<MaterializeTemporaryExpr>(CCE->getArg(0));
59   if (!MTE)
60     return 0;
61   CXXBindTemporaryExpr *CBT =
62     dyn_cast<CXXBindTemporaryExpr>(MTE->GetTemporaryExpr()->IgnoreParenCasts());
63   if (!CBT)
64     return 0;
65   Ex = CBT->getSubExpr()->IgnoreParenCasts();
66   CCE = dyn_cast<CXXConstructExpr>(Ex);
67   if (!CCE)
68     return 0;
69   if (CCE->getNumArgs() != 1)
70     return 0;
71   return dyn_cast<StringLiteral>(CCE->getArg(0)->IgnoreParenCasts());
72 }
73 
74 /// Strip away "sugar" around trivial expressions that are for the
75 /// purpose of this analysis considered uninteresting for dead code warnings.
76 static const Expr *stripExprSugar(const Expr *Ex) {
77   Ex = Ex->IgnoreParenCasts();
78   // If 'Ex' is a constructor for a std::string, strip that
79   // away.  We can only get here if the trivial expression was
80   // something like a C string literal, with the std::string
81   // just wrapping that value.
82   if (const Expr *StdStringVal = stripStdStringCtor(Ex))
83     return StdStringVal;
84   return Ex;
85 }
86 
87 static bool isTrivialExpression(const Expr *Ex) {
88   Ex = Ex->IgnoreParenCasts();
89   return isa<IntegerLiteral>(Ex) || isa<StringLiteral>(Ex) ||
90          isa<CXXBoolLiteralExpr>(Ex) || isa<ObjCBoolLiteralExpr>(Ex) ||
91          isa<CharacterLiteral>(Ex) ||
92          isEnumConstant(Ex);
93 }
94 
95 static bool isTrivialDoWhile(const CFGBlock *B, const Stmt *S) {
96   // Check if the block ends with a do...while() and see if 'S' is the
97   // condition.
98   if (const Stmt *Term = B->getTerminator()) {
99     if (const DoStmt *DS = dyn_cast<DoStmt>(Term)) {
100       const Expr *Cond = DS->getCond();
101       return Cond == S && isTrivialExpression(Cond);
102     }
103   }
104   return false;
105 }
106 
107 static bool isTrivialReturn(const CFGBlock *B, const Stmt *S) {
108   // Look to see if the block ends with a 'return', and see if 'S'
109   // is a substatement.  The 'return' may not be the last element in
110   // the block because of destructors.
111   for (CFGBlock::const_reverse_iterator I = B->rbegin(), E = B->rend();
112        I != E; ++I) {
113     if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) {
114       if (const ReturnStmt *RS = dyn_cast<ReturnStmt>(CS->getStmt())) {
115         // Determine if we need to lock at the body of the block
116         // before the dead return.
117         if (RS == S)
118           return true;
119         if (const Expr *RE = RS->getRetValue()) {
120           RE = stripExprSugar(RE->IgnoreParenCasts());
121           return RE == S && isTrivialExpression(RE);
122         }
123       }
124       break;
125     }
126   }
127   return false;
128 }
129 
130 static SourceLocation getTopMostMacro(SourceLocation Loc, SourceManager &SM) {
131   assert(Loc.isMacroID());
132   SourceLocation Last;
133   while (Loc.isMacroID()) {
134     Last = Loc;
135     Loc = SM.getImmediateMacroCallerLoc(Loc);
136   }
137   return Last;
138 }
139 
140 /// Returns true if the statement is expanded from a configuration macro.
141 static bool isExpandedFromConfigurationMacro(const Stmt *S,
142                                              Preprocessor &PP,
143                                              bool IgnoreYES_NO = false) {
144   // FIXME: This is not very precise.  Here we just check to see if the
145   // value comes from a macro, but we can do much better.  This is likely
146   // to be over conservative.  This logic is factored into a separate function
147   // so that we can refine it later.
148   SourceLocation L = S->getLocStart();
149   if (L.isMacroID()) {
150     if (IgnoreYES_NO) {
151       // The Objective-C constant 'YES' and 'NO'
152       // are defined as macros.  Do not treat them
153       // as configuration values.
154       SourceManager &SM = PP.getSourceManager();
155       SourceLocation TopL = getTopMostMacro(L, SM);
156       StringRef MacroName = PP.getImmediateMacroName(TopL);
157       if (MacroName == "YES" || MacroName == "NO")
158         return false;
159     }
160     return true;
161   }
162   return false;
163 }
164 
165 /// Returns true if the statement represents a configuration value.
166 ///
167 /// A configuration value is something usually determined at compile-time
168 /// to conditionally always execute some branch.  Such guards are for
169 /// "sometimes unreachable" code.  Such code is usually not interesting
170 /// to report as unreachable, and may mask truly unreachable code within
171 /// those blocks.
172 static bool isConfigurationValue(const Stmt *S,
173                                  Preprocessor &PP,
174                                  bool IncludeIntegers = true) {
175   if (!S)
176     return false;
177 
178   if (const Expr *Ex = dyn_cast<Expr>(S))
179     S = Ex->IgnoreParenCasts();
180 
181   switch (S->getStmtClass()) {
182     case Stmt::DeclRefExprClass: {
183       const DeclRefExpr *DR = cast<DeclRefExpr>(S);
184       const ValueDecl *D = DR->getDecl();
185       if (const EnumConstantDecl *ED = dyn_cast<EnumConstantDecl>(D))
186         return isConfigurationValue(ED->getInitExpr(), PP);
187       if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
188         // As a heuristic, treat globals as configuration values.  Note
189         // that we only will get here if Sema evaluated this
190         // condition to a constant expression, which means the global
191         // had to be declared in a way to be a truly constant value.
192         // We could generalize this to local variables, but it isn't
193         // clear if those truly represent configuration values that
194         // gate unreachable code.
195         if (!VD->hasLocalStorage())
196           return true;
197 
198         // As a heuristic, locals that have been marked 'const' explicitly
199         // can be treated as configuration values as well.
200         return VD->getType().isLocalConstQualified();
201       }
202       return false;
203     }
204     case Stmt::IntegerLiteralClass:
205       return IncludeIntegers ? isExpandedFromConfigurationMacro(S, PP)
206                              : false;
207     case Stmt::ObjCBoolLiteralExprClass:
208       return isExpandedFromConfigurationMacro(S, PP, /* IgnoreYES_NO */ true);
209 
210     case Stmt::UnaryExprOrTypeTraitExprClass:
211       return true;
212     case Stmt::BinaryOperatorClass: {
213       const BinaryOperator *B = cast<BinaryOperator>(S);
214       // Only include raw integers (not enums) as configuration
215       // values if they are used in a logical or comparison operator
216       // (not arithmetic).
217       IncludeIntegers &= (B->isLogicalOp() || B->isComparisonOp());
218       return isConfigurationValue(B->getLHS(), PP, IncludeIntegers) ||
219              isConfigurationValue(B->getRHS(), PP, IncludeIntegers);
220     }
221     case Stmt::UnaryOperatorClass: {
222       const UnaryOperator *UO = cast<UnaryOperator>(S);
223       return UO->getOpcode() == UO_LNot &&
224              isConfigurationValue(UO->getSubExpr(), PP);
225     }
226     default:
227       return false;
228   }
229 }
230 
231 /// Returns true if we should always explore all successors of a block.
232 static bool shouldTreatSuccessorsAsReachable(const CFGBlock *B,
233                                              Preprocessor &PP) {
234   if (const Stmt *Term = B->getTerminator()) {
235     if (isa<SwitchStmt>(Term))
236       return true;
237     // Specially handle '||' and '&&'.
238     if (isa<BinaryOperator>(Term))
239       return isConfigurationValue(Term, PP);
240   }
241 
242   return isConfigurationValue(B->getTerminatorCondition(), PP);
243 }
244 
245 static unsigned scanFromBlock(const CFGBlock *Start,
246                               llvm::BitVector &Reachable,
247                               Preprocessor *PP,
248                               bool IncludeSometimesUnreachableEdges) {
249   unsigned count = 0;
250 
251   // Prep work queue
252   SmallVector<const CFGBlock*, 32> WL;
253 
254   // The entry block may have already been marked reachable
255   // by the caller.
256   if (!Reachable[Start->getBlockID()]) {
257     ++count;
258     Reachable[Start->getBlockID()] = true;
259   }
260 
261   WL.push_back(Start);
262 
263   // Find the reachable blocks from 'Start'.
264   while (!WL.empty()) {
265     const CFGBlock *item = WL.pop_back_val();
266 
267     // There are cases where we want to treat all successors as reachable.
268     // The idea is that some "sometimes unreachable" code is not interesting,
269     // and that we should forge ahead and explore those branches anyway.
270     // This allows us to potentially uncover some "always unreachable" code
271     // within the "sometimes unreachable" code.
272     // Look at the successors and mark then reachable.
273     Optional<bool> TreatAllSuccessorsAsReachable;
274     if (!IncludeSometimesUnreachableEdges)
275       TreatAllSuccessorsAsReachable = false;
276 
277     for (CFGBlock::const_succ_iterator I = item->succ_begin(),
278          E = item->succ_end(); I != E; ++I) {
279       const CFGBlock *B = *I;
280       if (!B) do {
281         const CFGBlock *UB = I->getPossiblyUnreachableBlock();
282         if (!UB)
283           break;
284 
285         if (!TreatAllSuccessorsAsReachable.hasValue()) {
286           assert(PP);
287           TreatAllSuccessorsAsReachable =
288             shouldTreatSuccessorsAsReachable(item, *PP);
289         }
290 
291         if (TreatAllSuccessorsAsReachable.getValue()) {
292           B = UB;
293           break;
294         }
295       }
296       while (false);
297 
298       if (B) {
299         unsigned blockID = B->getBlockID();
300         if (!Reachable[blockID]) {
301           Reachable.set(blockID);
302           WL.push_back(B);
303           ++count;
304         }
305       }
306     }
307   }
308   return count;
309 }
310 
311 static unsigned scanMaybeReachableFromBlock(const CFGBlock *Start,
312                                             Preprocessor &PP,
313                                             llvm::BitVector &Reachable) {
314   return scanFromBlock(Start, Reachable, &PP, true);
315 }
316 
317 //===----------------------------------------------------------------------===//
318 // Dead Code Scanner.
319 //===----------------------------------------------------------------------===//
320 
321 namespace {
322   class DeadCodeScan {
323     llvm::BitVector Visited;
324     llvm::BitVector &Reachable;
325     SmallVector<const CFGBlock *, 10> WorkList;
326     Preprocessor &PP;
327 
328     typedef SmallVector<std::pair<const CFGBlock *, const Stmt *>, 12>
329     DeferredLocsTy;
330 
331     DeferredLocsTy DeferredLocs;
332 
333   public:
334     DeadCodeScan(llvm::BitVector &reachable, Preprocessor &PP)
335     : Visited(reachable.size()),
336       Reachable(reachable),
337       PP(PP) {}
338 
339     void enqueue(const CFGBlock *block);
340     unsigned scanBackwards(const CFGBlock *Start,
341     clang::reachable_code::Callback &CB);
342 
343     bool isDeadCodeRoot(const CFGBlock *Block);
344 
345     const Stmt *findDeadCode(const CFGBlock *Block);
346 
347     void reportDeadCode(const CFGBlock *B,
348     const Stmt *S,
349     clang::reachable_code::Callback &CB);
350     };
351 }
352 
353 void DeadCodeScan::enqueue(const CFGBlock *block) {
354   unsigned blockID = block->getBlockID();
355   if (Reachable[blockID] || Visited[blockID])
356     return;
357   Visited[blockID] = true;
358   WorkList.push_back(block);
359 }
360 
361 bool DeadCodeScan::isDeadCodeRoot(const clang::CFGBlock *Block) {
362   bool isDeadRoot = true;
363 
364   for (CFGBlock::const_pred_iterator I = Block->pred_begin(),
365        E = Block->pred_end(); I != E; ++I) {
366     if (const CFGBlock *PredBlock = *I) {
367       unsigned blockID = PredBlock->getBlockID();
368       if (Visited[blockID]) {
369         isDeadRoot = false;
370         continue;
371       }
372       if (!Reachable[blockID]) {
373         isDeadRoot = false;
374         Visited[blockID] = true;
375         WorkList.push_back(PredBlock);
376         continue;
377       }
378     }
379   }
380 
381   return isDeadRoot;
382 }
383 
384 static bool isValidDeadStmt(const Stmt *S) {
385   if (S->getLocStart().isInvalid())
386     return false;
387   if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(S))
388     return BO->getOpcode() != BO_Comma;
389   return true;
390 }
391 
392 const Stmt *DeadCodeScan::findDeadCode(const clang::CFGBlock *Block) {
393   for (CFGBlock::const_iterator I = Block->begin(), E = Block->end(); I!=E; ++I)
394     if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) {
395       const Stmt *S = CS->getStmt();
396       if (isValidDeadStmt(S))
397         return S;
398     }
399 
400   if (CFGTerminator T = Block->getTerminator()) {
401     if (!T.isTemporaryDtorsBranch()) {
402       const Stmt *S = T.getStmt();
403       if (isValidDeadStmt(S))
404         return S;
405     }
406   }
407 
408   return 0;
409 }
410 
411 static int SrcCmp(const std::pair<const CFGBlock *, const Stmt *> *p1,
412                   const std::pair<const CFGBlock *, const Stmt *> *p2) {
413   if (p1->second->getLocStart() < p2->second->getLocStart())
414     return -1;
415   if (p2->second->getLocStart() < p1->second->getLocStart())
416     return 1;
417   return 0;
418 }
419 
420 unsigned DeadCodeScan::scanBackwards(const clang::CFGBlock *Start,
421                                      clang::reachable_code::Callback &CB) {
422 
423   unsigned count = 0;
424   enqueue(Start);
425 
426   while (!WorkList.empty()) {
427     const CFGBlock *Block = WorkList.pop_back_val();
428 
429     // It is possible that this block has been marked reachable after
430     // it was enqueued.
431     if (Reachable[Block->getBlockID()])
432       continue;
433 
434     // Look for any dead code within the block.
435     const Stmt *S = findDeadCode(Block);
436 
437     if (!S) {
438       // No dead code.  Possibly an empty block.  Look at dead predecessors.
439       for (CFGBlock::const_pred_iterator I = Block->pred_begin(),
440            E = Block->pred_end(); I != E; ++I) {
441         if (const CFGBlock *predBlock = *I)
442           enqueue(predBlock);
443       }
444       continue;
445     }
446 
447     // Specially handle macro-expanded code.
448     if (S->getLocStart().isMacroID()) {
449       count += scanMaybeReachableFromBlock(Block, PP, Reachable);
450       continue;
451     }
452 
453     if (isDeadCodeRoot(Block)) {
454       reportDeadCode(Block, S, CB);
455       count += scanMaybeReachableFromBlock(Block, PP, Reachable);
456     }
457     else {
458       // Record this statement as the possibly best location in a
459       // strongly-connected component of dead code for emitting a
460       // warning.
461       DeferredLocs.push_back(std::make_pair(Block, S));
462     }
463   }
464 
465   // If we didn't find a dead root, then report the dead code with the
466   // earliest location.
467   if (!DeferredLocs.empty()) {
468     llvm::array_pod_sort(DeferredLocs.begin(), DeferredLocs.end(), SrcCmp);
469     for (DeferredLocsTy::iterator I = DeferredLocs.begin(),
470          E = DeferredLocs.end(); I != E; ++I) {
471       const CFGBlock *Block = I->first;
472       if (Reachable[Block->getBlockID()])
473         continue;
474       reportDeadCode(Block, I->second, CB);
475       count += scanMaybeReachableFromBlock(Block, PP, Reachable);
476     }
477   }
478 
479   return count;
480 }
481 
482 static SourceLocation GetUnreachableLoc(const Stmt *S,
483                                         SourceRange &R1,
484                                         SourceRange &R2) {
485   R1 = R2 = SourceRange();
486 
487   if (const Expr *Ex = dyn_cast<Expr>(S))
488     S = Ex->IgnoreParenImpCasts();
489 
490   switch (S->getStmtClass()) {
491     case Expr::BinaryOperatorClass: {
492       const BinaryOperator *BO = cast<BinaryOperator>(S);
493       return BO->getOperatorLoc();
494     }
495     case Expr::UnaryOperatorClass: {
496       const UnaryOperator *UO = cast<UnaryOperator>(S);
497       R1 = UO->getSubExpr()->getSourceRange();
498       return UO->getOperatorLoc();
499     }
500     case Expr::CompoundAssignOperatorClass: {
501       const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(S);
502       R1 = CAO->getLHS()->getSourceRange();
503       R2 = CAO->getRHS()->getSourceRange();
504       return CAO->getOperatorLoc();
505     }
506     case Expr::BinaryConditionalOperatorClass:
507     case Expr::ConditionalOperatorClass: {
508       const AbstractConditionalOperator *CO =
509       cast<AbstractConditionalOperator>(S);
510       return CO->getQuestionLoc();
511     }
512     case Expr::MemberExprClass: {
513       const MemberExpr *ME = cast<MemberExpr>(S);
514       R1 = ME->getSourceRange();
515       return ME->getMemberLoc();
516     }
517     case Expr::ArraySubscriptExprClass: {
518       const ArraySubscriptExpr *ASE = cast<ArraySubscriptExpr>(S);
519       R1 = ASE->getLHS()->getSourceRange();
520       R2 = ASE->getRHS()->getSourceRange();
521       return ASE->getRBracketLoc();
522     }
523     case Expr::CStyleCastExprClass: {
524       const CStyleCastExpr *CSC = cast<CStyleCastExpr>(S);
525       R1 = CSC->getSubExpr()->getSourceRange();
526       return CSC->getLParenLoc();
527     }
528     case Expr::CXXFunctionalCastExprClass: {
529       const CXXFunctionalCastExpr *CE = cast <CXXFunctionalCastExpr>(S);
530       R1 = CE->getSubExpr()->getSourceRange();
531       return CE->getLocStart();
532     }
533     case Stmt::CXXTryStmtClass: {
534       return cast<CXXTryStmt>(S)->getHandler(0)->getCatchLoc();
535     }
536     case Expr::ObjCBridgedCastExprClass: {
537       const ObjCBridgedCastExpr *CSC = cast<ObjCBridgedCastExpr>(S);
538       R1 = CSC->getSubExpr()->getSourceRange();
539       return CSC->getLParenLoc();
540     }
541     default: ;
542   }
543   R1 = S->getSourceRange();
544   return S->getLocStart();
545 }
546 
547 void DeadCodeScan::reportDeadCode(const CFGBlock *B,
548                                   const Stmt *S,
549                                   clang::reachable_code::Callback &CB) {
550   // The kind of unreachable code found.
551   reachable_code::UnreachableKind UK = reachable_code::UK_Other;
552 
553   do {
554     // Suppress idiomatic cases of calling a noreturn function just
555     // before executing a 'break'.  If there is other code after the 'break'
556     // in the block then don't suppress the warning.
557     if (isa<BreakStmt>(S)) {
558       UK = reachable_code::UK_Break;
559       break;
560     }
561 
562     if (isTrivialDoWhile(B, S))
563       return;
564 
565     // Suppress trivial 'return' statements that are dead.
566     if (isTrivialReturn(B, S)) {
567       UK = reachable_code::UK_TrivialReturn;
568       break;
569     }
570 
571   } while(false);
572 
573   SourceRange R1, R2;
574   SourceLocation Loc = GetUnreachableLoc(S, R1, R2);
575   CB.HandleUnreachable(UK, Loc, R1, R2);
576 }
577 
578 //===----------------------------------------------------------------------===//
579 // Reachability APIs.
580 //===----------------------------------------------------------------------===//
581 
582 namespace clang { namespace reachable_code {
583 
584 void Callback::anchor() { }
585 
586 unsigned ScanReachableFromBlock(const CFGBlock *Start,
587                                 llvm::BitVector &Reachable) {
588   return scanFromBlock(Start, Reachable, /* SourceManager* */ 0, false);
589 }
590 
591 void FindUnreachableCode(AnalysisDeclContext &AC, Preprocessor &PP,
592                          Callback &CB) {
593 
594   CFG *cfg = AC.getCFG();
595   if (!cfg)
596     return;
597 
598   // Scan for reachable blocks from the entrance of the CFG.
599   // If there are no unreachable blocks, we're done.
600   llvm::BitVector reachable(cfg->getNumBlockIDs());
601   unsigned numReachable =
602     scanMaybeReachableFromBlock(&cfg->getEntry(), PP, reachable);
603   if (numReachable == cfg->getNumBlockIDs())
604     return;
605 
606   // If there aren't explicit EH edges, we should include the 'try' dispatch
607   // blocks as roots.
608   if (!AC.getCFGBuildOptions().AddEHEdges) {
609     for (CFG::try_block_iterator I = cfg->try_blocks_begin(),
610          E = cfg->try_blocks_end() ; I != E; ++I) {
611       numReachable += scanMaybeReachableFromBlock(*I, PP, reachable);
612     }
613     if (numReachable == cfg->getNumBlockIDs())
614       return;
615   }
616 
617   // There are some unreachable blocks.  We need to find the root blocks that
618   // contain code that should be considered unreachable.
619   for (CFG::iterator I = cfg->begin(), E = cfg->end(); I != E; ++I) {
620     const CFGBlock *block = *I;
621     // A block may have been marked reachable during this loop.
622     if (reachable[block->getBlockID()])
623       continue;
624 
625     DeadCodeScan DS(reachable, PP);
626     numReachable += DS.scanBackwards(block, CB);
627 
628     if (numReachable == cfg->getNumBlockIDs())
629       return;
630   }
631 }
632 
633 }} // end namespace clang::reachable_code
634