1 //=- ReachableCodePathInsensitive.cpp ---------------------------*- C++ --*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a flow-sensitive, path-insensitive analysis of 11 // determining reachable blocks within a CFG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/Analyses/ReachableCode.h" 16 #include "clang/Lex/Preprocessor.h" 17 #include "clang/AST/Expr.h" 18 #include "clang/AST/ExprCXX.h" 19 #include "clang/AST/ExprObjC.h" 20 #include "clang/AST/StmtCXX.h" 21 #include "clang/Analysis/AnalysisContext.h" 22 #include "clang/Analysis/CFG.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "llvm/ADT/BitVector.h" 25 #include "llvm/ADT/SmallVector.h" 26 27 using namespace clang; 28 29 //===----------------------------------------------------------------------===// 30 // Core Reachability Analysis routines. 31 //===----------------------------------------------------------------------===// 32 33 static bool isEnumConstant(const Expr *Ex) { 34 const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Ex); 35 if (!DR) 36 return false; 37 return isa<EnumConstantDecl>(DR->getDecl()); 38 } 39 40 static const Expr *stripStdStringCtor(const Expr *Ex) { 41 // Go crazy pattern matching an implicit construction of std::string(""). 42 const ExprWithCleanups *EWC = dyn_cast<ExprWithCleanups>(Ex); 43 if (!EWC) 44 return 0; 45 const CXXConstructExpr *CCE = dyn_cast<CXXConstructExpr>(EWC->getSubExpr()); 46 if (!CCE) 47 return 0; 48 QualType Ty = CCE->getType(); 49 if (const ElaboratedType *ET = dyn_cast<ElaboratedType>(Ty)) 50 Ty = ET->getNamedType(); 51 const TypedefType *TT = dyn_cast<TypedefType>(Ty); 52 StringRef Name = TT->getDecl()->getName(); 53 if (Name != "string") 54 return 0; 55 if (CCE->getNumArgs() != 1) 56 return 0; 57 const MaterializeTemporaryExpr *MTE = 58 dyn_cast<MaterializeTemporaryExpr>(CCE->getArg(0)); 59 if (!MTE) 60 return 0; 61 CXXBindTemporaryExpr *CBT = 62 dyn_cast<CXXBindTemporaryExpr>(MTE->GetTemporaryExpr()->IgnoreParenCasts()); 63 if (!CBT) 64 return 0; 65 Ex = CBT->getSubExpr()->IgnoreParenCasts(); 66 CCE = dyn_cast<CXXConstructExpr>(Ex); 67 if (!CCE) 68 return 0; 69 if (CCE->getNumArgs() != 1) 70 return 0; 71 return dyn_cast<StringLiteral>(CCE->getArg(0)->IgnoreParenCasts()); 72 } 73 74 /// Strip away "sugar" around trivial expressions that are for the 75 /// purpose of this analysis considered uninteresting for dead code warnings. 76 static const Expr *stripExprSugar(const Expr *Ex) { 77 Ex = Ex->IgnoreParenCasts(); 78 // If 'Ex' is a constructor for a std::string, strip that 79 // away. We can only get here if the trivial expression was 80 // something like a C string literal, with the std::string 81 // just wrapping that value. 82 if (const Expr *StdStringVal = stripStdStringCtor(Ex)) 83 return StdStringVal; 84 return Ex; 85 } 86 87 static bool isTrivialExpression(const Expr *Ex) { 88 Ex = Ex->IgnoreParenCasts(); 89 return isa<IntegerLiteral>(Ex) || isa<StringLiteral>(Ex) || 90 isa<CXXBoolLiteralExpr>(Ex) || isa<ObjCBoolLiteralExpr>(Ex) || 91 isa<CharacterLiteral>(Ex) || 92 isEnumConstant(Ex); 93 } 94 95 static bool isTrivialDoWhile(const CFGBlock *B, const Stmt *S) { 96 // Check if the block ends with a do...while() and see if 'S' is the 97 // condition. 98 if (const Stmt *Term = B->getTerminator()) { 99 if (const DoStmt *DS = dyn_cast<DoStmt>(Term)) { 100 const Expr *Cond = DS->getCond(); 101 return Cond == S && isTrivialExpression(Cond); 102 } 103 } 104 return false; 105 } 106 107 static bool isTrivialReturn(const CFGBlock *B, const Stmt *S) { 108 // Look to see if the block ends with a 'return', and see if 'S' 109 // is a substatement. The 'return' may not be the last element in 110 // the block because of destructors. 111 for (CFGBlock::const_reverse_iterator I = B->rbegin(), E = B->rend(); 112 I != E; ++I) { 113 if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) { 114 if (const ReturnStmt *RS = dyn_cast<ReturnStmt>(CS->getStmt())) { 115 // Determine if we need to lock at the body of the block 116 // before the dead return. 117 if (RS == S) 118 return true; 119 if (const Expr *RE = RS->getRetValue()) { 120 RE = stripExprSugar(RE->IgnoreParenCasts()); 121 return RE == S && isTrivialExpression(RE); 122 } 123 } 124 break; 125 } 126 } 127 return false; 128 } 129 130 static SourceLocation getTopMostMacro(SourceLocation Loc, SourceManager &SM) { 131 assert(Loc.isMacroID()); 132 SourceLocation Last; 133 while (Loc.isMacroID()) { 134 Last = Loc; 135 Loc = SM.getImmediateMacroCallerLoc(Loc); 136 } 137 return Last; 138 } 139 140 /// Returns true if the statement is expanded from a configuration macro. 141 static bool isExpandedFromConfigurationMacro(const Stmt *S, 142 Preprocessor &PP, 143 bool IgnoreYES_NO = false) { 144 // FIXME: This is not very precise. Here we just check to see if the 145 // value comes from a macro, but we can do much better. This is likely 146 // to be over conservative. This logic is factored into a separate function 147 // so that we can refine it later. 148 SourceLocation L = S->getLocStart(); 149 if (L.isMacroID()) { 150 if (IgnoreYES_NO) { 151 // The Objective-C constant 'YES' and 'NO' 152 // are defined as macros. Do not treat them 153 // as configuration values. 154 SourceManager &SM = PP.getSourceManager(); 155 SourceLocation TopL = getTopMostMacro(L, SM); 156 StringRef MacroName = PP.getImmediateMacroName(TopL); 157 if (MacroName == "YES" || MacroName == "NO") 158 return false; 159 } 160 return true; 161 } 162 return false; 163 } 164 165 /// Returns true if the statement represents a configuration value. 166 /// 167 /// A configuration value is something usually determined at compile-time 168 /// to conditionally always execute some branch. Such guards are for 169 /// "sometimes unreachable" code. Such code is usually not interesting 170 /// to report as unreachable, and may mask truly unreachable code within 171 /// those blocks. 172 static bool isConfigurationValue(const Stmt *S, 173 Preprocessor &PP, 174 bool IncludeIntegers = true) { 175 if (!S) 176 return false; 177 178 if (const Expr *Ex = dyn_cast<Expr>(S)) 179 S = Ex->IgnoreParenCasts(); 180 181 switch (S->getStmtClass()) { 182 case Stmt::DeclRefExprClass: { 183 const DeclRefExpr *DR = cast<DeclRefExpr>(S); 184 const ValueDecl *D = DR->getDecl(); 185 if (const EnumConstantDecl *ED = dyn_cast<EnumConstantDecl>(D)) 186 return isConfigurationValue(ED->getInitExpr(), PP); 187 if (const VarDecl *VD = dyn_cast<VarDecl>(D)) { 188 // As a heuristic, treat globals as configuration values. Note 189 // that we only will get here if Sema evaluated this 190 // condition to a constant expression, which means the global 191 // had to be declared in a way to be a truly constant value. 192 // We could generalize this to local variables, but it isn't 193 // clear if those truly represent configuration values that 194 // gate unreachable code. 195 if (!VD->hasLocalStorage()) 196 return true; 197 198 // As a heuristic, locals that have been marked 'const' explicitly 199 // can be treated as configuration values as well. 200 return VD->getType().isLocalConstQualified(); 201 } 202 return false; 203 } 204 case Stmt::IntegerLiteralClass: 205 return IncludeIntegers ? isExpandedFromConfigurationMacro(S, PP) 206 : false; 207 case Stmt::ObjCBoolLiteralExprClass: 208 return isExpandedFromConfigurationMacro(S, PP, /* IgnoreYES_NO */ true); 209 210 case Stmt::UnaryExprOrTypeTraitExprClass: 211 return true; 212 case Stmt::BinaryOperatorClass: { 213 const BinaryOperator *B = cast<BinaryOperator>(S); 214 // Only include raw integers (not enums) as configuration 215 // values if they are used in a logical or comparison operator 216 // (not arithmetic). 217 IncludeIntegers &= (B->isLogicalOp() || B->isComparisonOp()); 218 return isConfigurationValue(B->getLHS(), PP, IncludeIntegers) || 219 isConfigurationValue(B->getRHS(), PP, IncludeIntegers); 220 } 221 case Stmt::UnaryOperatorClass: { 222 const UnaryOperator *UO = cast<UnaryOperator>(S); 223 return UO->getOpcode() == UO_LNot && 224 isConfigurationValue(UO->getSubExpr(), PP); 225 } 226 default: 227 return false; 228 } 229 } 230 231 /// Returns true if we should always explore all successors of a block. 232 static bool shouldTreatSuccessorsAsReachable(const CFGBlock *B, 233 Preprocessor &PP) { 234 if (const Stmt *Term = B->getTerminator()) { 235 if (isa<SwitchStmt>(Term)) 236 return true; 237 // Specially handle '||' and '&&'. 238 if (isa<BinaryOperator>(Term)) 239 return isConfigurationValue(Term, PP); 240 } 241 242 return isConfigurationValue(B->getTerminatorCondition(), PP); 243 } 244 245 static unsigned scanFromBlock(const CFGBlock *Start, 246 llvm::BitVector &Reachable, 247 Preprocessor *PP, 248 bool IncludeSometimesUnreachableEdges) { 249 unsigned count = 0; 250 251 // Prep work queue 252 SmallVector<const CFGBlock*, 32> WL; 253 254 // The entry block may have already been marked reachable 255 // by the caller. 256 if (!Reachable[Start->getBlockID()]) { 257 ++count; 258 Reachable[Start->getBlockID()] = true; 259 } 260 261 WL.push_back(Start); 262 263 // Find the reachable blocks from 'Start'. 264 while (!WL.empty()) { 265 const CFGBlock *item = WL.pop_back_val(); 266 267 // There are cases where we want to treat all successors as reachable. 268 // The idea is that some "sometimes unreachable" code is not interesting, 269 // and that we should forge ahead and explore those branches anyway. 270 // This allows us to potentially uncover some "always unreachable" code 271 // within the "sometimes unreachable" code. 272 // Look at the successors and mark then reachable. 273 Optional<bool> TreatAllSuccessorsAsReachable; 274 if (!IncludeSometimesUnreachableEdges) 275 TreatAllSuccessorsAsReachable = false; 276 277 for (CFGBlock::const_succ_iterator I = item->succ_begin(), 278 E = item->succ_end(); I != E; ++I) { 279 const CFGBlock *B = *I; 280 if (!B) do { 281 const CFGBlock *UB = I->getPossiblyUnreachableBlock(); 282 if (!UB) 283 break; 284 285 if (!TreatAllSuccessorsAsReachable.hasValue()) { 286 assert(PP); 287 TreatAllSuccessorsAsReachable = 288 shouldTreatSuccessorsAsReachable(item, *PP); 289 } 290 291 if (TreatAllSuccessorsAsReachable.getValue()) { 292 B = UB; 293 break; 294 } 295 } 296 while (false); 297 298 if (B) { 299 unsigned blockID = B->getBlockID(); 300 if (!Reachable[blockID]) { 301 Reachable.set(blockID); 302 WL.push_back(B); 303 ++count; 304 } 305 } 306 } 307 } 308 return count; 309 } 310 311 static unsigned scanMaybeReachableFromBlock(const CFGBlock *Start, 312 Preprocessor &PP, 313 llvm::BitVector &Reachable) { 314 return scanFromBlock(Start, Reachable, &PP, true); 315 } 316 317 //===----------------------------------------------------------------------===// 318 // Dead Code Scanner. 319 //===----------------------------------------------------------------------===// 320 321 namespace { 322 class DeadCodeScan { 323 llvm::BitVector Visited; 324 llvm::BitVector &Reachable; 325 SmallVector<const CFGBlock *, 10> WorkList; 326 Preprocessor &PP; 327 328 typedef SmallVector<std::pair<const CFGBlock *, const Stmt *>, 12> 329 DeferredLocsTy; 330 331 DeferredLocsTy DeferredLocs; 332 333 public: 334 DeadCodeScan(llvm::BitVector &reachable, Preprocessor &PP) 335 : Visited(reachable.size()), 336 Reachable(reachable), 337 PP(PP) {} 338 339 void enqueue(const CFGBlock *block); 340 unsigned scanBackwards(const CFGBlock *Start, 341 clang::reachable_code::Callback &CB); 342 343 bool isDeadCodeRoot(const CFGBlock *Block); 344 345 const Stmt *findDeadCode(const CFGBlock *Block); 346 347 void reportDeadCode(const CFGBlock *B, 348 const Stmt *S, 349 clang::reachable_code::Callback &CB); 350 }; 351 } 352 353 void DeadCodeScan::enqueue(const CFGBlock *block) { 354 unsigned blockID = block->getBlockID(); 355 if (Reachable[blockID] || Visited[blockID]) 356 return; 357 Visited[blockID] = true; 358 WorkList.push_back(block); 359 } 360 361 bool DeadCodeScan::isDeadCodeRoot(const clang::CFGBlock *Block) { 362 bool isDeadRoot = true; 363 364 for (CFGBlock::const_pred_iterator I = Block->pred_begin(), 365 E = Block->pred_end(); I != E; ++I) { 366 if (const CFGBlock *PredBlock = *I) { 367 unsigned blockID = PredBlock->getBlockID(); 368 if (Visited[blockID]) { 369 isDeadRoot = false; 370 continue; 371 } 372 if (!Reachable[blockID]) { 373 isDeadRoot = false; 374 Visited[blockID] = true; 375 WorkList.push_back(PredBlock); 376 continue; 377 } 378 } 379 } 380 381 return isDeadRoot; 382 } 383 384 static bool isValidDeadStmt(const Stmt *S) { 385 if (S->getLocStart().isInvalid()) 386 return false; 387 if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) 388 return BO->getOpcode() != BO_Comma; 389 return true; 390 } 391 392 const Stmt *DeadCodeScan::findDeadCode(const clang::CFGBlock *Block) { 393 for (CFGBlock::const_iterator I = Block->begin(), E = Block->end(); I!=E; ++I) 394 if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) { 395 const Stmt *S = CS->getStmt(); 396 if (isValidDeadStmt(S)) 397 return S; 398 } 399 400 if (CFGTerminator T = Block->getTerminator()) { 401 if (!T.isTemporaryDtorsBranch()) { 402 const Stmt *S = T.getStmt(); 403 if (isValidDeadStmt(S)) 404 return S; 405 } 406 } 407 408 return 0; 409 } 410 411 static int SrcCmp(const std::pair<const CFGBlock *, const Stmt *> *p1, 412 const std::pair<const CFGBlock *, const Stmt *> *p2) { 413 if (p1->second->getLocStart() < p2->second->getLocStart()) 414 return -1; 415 if (p2->second->getLocStart() < p1->second->getLocStart()) 416 return 1; 417 return 0; 418 } 419 420 unsigned DeadCodeScan::scanBackwards(const clang::CFGBlock *Start, 421 clang::reachable_code::Callback &CB) { 422 423 unsigned count = 0; 424 enqueue(Start); 425 426 while (!WorkList.empty()) { 427 const CFGBlock *Block = WorkList.pop_back_val(); 428 429 // It is possible that this block has been marked reachable after 430 // it was enqueued. 431 if (Reachable[Block->getBlockID()]) 432 continue; 433 434 // Look for any dead code within the block. 435 const Stmt *S = findDeadCode(Block); 436 437 if (!S) { 438 // No dead code. Possibly an empty block. Look at dead predecessors. 439 for (CFGBlock::const_pred_iterator I = Block->pred_begin(), 440 E = Block->pred_end(); I != E; ++I) { 441 if (const CFGBlock *predBlock = *I) 442 enqueue(predBlock); 443 } 444 continue; 445 } 446 447 // Specially handle macro-expanded code. 448 if (S->getLocStart().isMacroID()) { 449 count += scanMaybeReachableFromBlock(Block, PP, Reachable); 450 continue; 451 } 452 453 if (isDeadCodeRoot(Block)) { 454 reportDeadCode(Block, S, CB); 455 count += scanMaybeReachableFromBlock(Block, PP, Reachable); 456 } 457 else { 458 // Record this statement as the possibly best location in a 459 // strongly-connected component of dead code for emitting a 460 // warning. 461 DeferredLocs.push_back(std::make_pair(Block, S)); 462 } 463 } 464 465 // If we didn't find a dead root, then report the dead code with the 466 // earliest location. 467 if (!DeferredLocs.empty()) { 468 llvm::array_pod_sort(DeferredLocs.begin(), DeferredLocs.end(), SrcCmp); 469 for (DeferredLocsTy::iterator I = DeferredLocs.begin(), 470 E = DeferredLocs.end(); I != E; ++I) { 471 const CFGBlock *Block = I->first; 472 if (Reachable[Block->getBlockID()]) 473 continue; 474 reportDeadCode(Block, I->second, CB); 475 count += scanMaybeReachableFromBlock(Block, PP, Reachable); 476 } 477 } 478 479 return count; 480 } 481 482 static SourceLocation GetUnreachableLoc(const Stmt *S, 483 SourceRange &R1, 484 SourceRange &R2) { 485 R1 = R2 = SourceRange(); 486 487 if (const Expr *Ex = dyn_cast<Expr>(S)) 488 S = Ex->IgnoreParenImpCasts(); 489 490 switch (S->getStmtClass()) { 491 case Expr::BinaryOperatorClass: { 492 const BinaryOperator *BO = cast<BinaryOperator>(S); 493 return BO->getOperatorLoc(); 494 } 495 case Expr::UnaryOperatorClass: { 496 const UnaryOperator *UO = cast<UnaryOperator>(S); 497 R1 = UO->getSubExpr()->getSourceRange(); 498 return UO->getOperatorLoc(); 499 } 500 case Expr::CompoundAssignOperatorClass: { 501 const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(S); 502 R1 = CAO->getLHS()->getSourceRange(); 503 R2 = CAO->getRHS()->getSourceRange(); 504 return CAO->getOperatorLoc(); 505 } 506 case Expr::BinaryConditionalOperatorClass: 507 case Expr::ConditionalOperatorClass: { 508 const AbstractConditionalOperator *CO = 509 cast<AbstractConditionalOperator>(S); 510 return CO->getQuestionLoc(); 511 } 512 case Expr::MemberExprClass: { 513 const MemberExpr *ME = cast<MemberExpr>(S); 514 R1 = ME->getSourceRange(); 515 return ME->getMemberLoc(); 516 } 517 case Expr::ArraySubscriptExprClass: { 518 const ArraySubscriptExpr *ASE = cast<ArraySubscriptExpr>(S); 519 R1 = ASE->getLHS()->getSourceRange(); 520 R2 = ASE->getRHS()->getSourceRange(); 521 return ASE->getRBracketLoc(); 522 } 523 case Expr::CStyleCastExprClass: { 524 const CStyleCastExpr *CSC = cast<CStyleCastExpr>(S); 525 R1 = CSC->getSubExpr()->getSourceRange(); 526 return CSC->getLParenLoc(); 527 } 528 case Expr::CXXFunctionalCastExprClass: { 529 const CXXFunctionalCastExpr *CE = cast <CXXFunctionalCastExpr>(S); 530 R1 = CE->getSubExpr()->getSourceRange(); 531 return CE->getLocStart(); 532 } 533 case Stmt::CXXTryStmtClass: { 534 return cast<CXXTryStmt>(S)->getHandler(0)->getCatchLoc(); 535 } 536 case Expr::ObjCBridgedCastExprClass: { 537 const ObjCBridgedCastExpr *CSC = cast<ObjCBridgedCastExpr>(S); 538 R1 = CSC->getSubExpr()->getSourceRange(); 539 return CSC->getLParenLoc(); 540 } 541 default: ; 542 } 543 R1 = S->getSourceRange(); 544 return S->getLocStart(); 545 } 546 547 void DeadCodeScan::reportDeadCode(const CFGBlock *B, 548 const Stmt *S, 549 clang::reachable_code::Callback &CB) { 550 // The kind of unreachable code found. 551 reachable_code::UnreachableKind UK = reachable_code::UK_Other; 552 553 do { 554 // Suppress idiomatic cases of calling a noreturn function just 555 // before executing a 'break'. If there is other code after the 'break' 556 // in the block then don't suppress the warning. 557 if (isa<BreakStmt>(S)) { 558 UK = reachable_code::UK_Break; 559 break; 560 } 561 562 if (isTrivialDoWhile(B, S)) 563 return; 564 565 // Suppress trivial 'return' statements that are dead. 566 if (isTrivialReturn(B, S)) { 567 UK = reachable_code::UK_TrivialReturn; 568 break; 569 } 570 571 } while(false); 572 573 SourceRange R1, R2; 574 SourceLocation Loc = GetUnreachableLoc(S, R1, R2); 575 CB.HandleUnreachable(UK, Loc, R1, R2); 576 } 577 578 //===----------------------------------------------------------------------===// 579 // Reachability APIs. 580 //===----------------------------------------------------------------------===// 581 582 namespace clang { namespace reachable_code { 583 584 void Callback::anchor() { } 585 586 unsigned ScanReachableFromBlock(const CFGBlock *Start, 587 llvm::BitVector &Reachable) { 588 return scanFromBlock(Start, Reachable, /* SourceManager* */ 0, false); 589 } 590 591 void FindUnreachableCode(AnalysisDeclContext &AC, Preprocessor &PP, 592 Callback &CB) { 593 594 CFG *cfg = AC.getCFG(); 595 if (!cfg) 596 return; 597 598 // Scan for reachable blocks from the entrance of the CFG. 599 // If there are no unreachable blocks, we're done. 600 llvm::BitVector reachable(cfg->getNumBlockIDs()); 601 unsigned numReachable = 602 scanMaybeReachableFromBlock(&cfg->getEntry(), PP, reachable); 603 if (numReachable == cfg->getNumBlockIDs()) 604 return; 605 606 // If there aren't explicit EH edges, we should include the 'try' dispatch 607 // blocks as roots. 608 if (!AC.getCFGBuildOptions().AddEHEdges) { 609 for (CFG::try_block_iterator I = cfg->try_blocks_begin(), 610 E = cfg->try_blocks_end() ; I != E; ++I) { 611 numReachable += scanMaybeReachableFromBlock(*I, PP, reachable); 612 } 613 if (numReachable == cfg->getNumBlockIDs()) 614 return; 615 } 616 617 // There are some unreachable blocks. We need to find the root blocks that 618 // contain code that should be considered unreachable. 619 for (CFG::iterator I = cfg->begin(), E = cfg->end(); I != E; ++I) { 620 const CFGBlock *block = *I; 621 // A block may have been marked reachable during this loop. 622 if (reachable[block->getBlockID()]) 623 continue; 624 625 DeadCodeScan DS(reachable, PP); 626 numReachable += DS.scanBackwards(block, CB); 627 628 if (numReachable == cfg->getNumBlockIDs()) 629 return; 630 } 631 } 632 633 }} // end namespace clang::reachable_code 634