1 //===- CalledOnceCheck.cpp - Check 'called once' parameters ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Analysis/Analyses/CalledOnceCheck.h" 10 #include "clang/AST/Attr.h" 11 #include "clang/AST/Decl.h" 12 #include "clang/AST/DeclBase.h" 13 #include "clang/AST/Expr.h" 14 #include "clang/AST/ExprObjC.h" 15 #include "clang/AST/OperationKinds.h" 16 #include "clang/AST/ParentMap.h" 17 #include "clang/AST/RecursiveASTVisitor.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtObjC.h" 20 #include "clang/AST/StmtVisitor.h" 21 #include "clang/AST/Type.h" 22 #include "clang/Analysis/AnalysisDeclContext.h" 23 #include "clang/Analysis/CFG.h" 24 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" 25 #include "clang/Basic/Builtins.h" 26 #include "clang/Basic/IdentifierTable.h" 27 #include "clang/Basic/LLVM.h" 28 #include "llvm/ADT/BitVector.h" 29 #include "llvm/ADT/BitmaskEnum.h" 30 #include "llvm/ADT/Optional.h" 31 #include "llvm/ADT/PointerIntPair.h" 32 #include "llvm/ADT/STLExtras.h" 33 #include "llvm/ADT/Sequence.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/Compiler.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include <memory> 40 41 using namespace clang; 42 43 namespace { 44 static constexpr unsigned EXPECTED_MAX_NUMBER_OF_PARAMS = 2; 45 template <class T> 46 using ParamSizedVector = llvm::SmallVector<T, EXPECTED_MAX_NUMBER_OF_PARAMS>; 47 static constexpr unsigned EXPECTED_NUMBER_OF_BASIC_BLOCKS = 8; 48 template <class T> 49 using CFGSizedVector = llvm::SmallVector<T, EXPECTED_NUMBER_OF_BASIC_BLOCKS>; 50 constexpr llvm::StringLiteral CONVENTIONAL_NAMES[] = { 51 "completionHandler", "completion", "withCompletionHandler"}; 52 constexpr llvm::StringLiteral CONVENTIONAL_SUFFIXES[] = { 53 "WithCompletionHandler", "WithCompletion"}; 54 constexpr llvm::StringLiteral CONVENTIONAL_CONDITIONS[] = { 55 "error", "cancel", "shouldCall", "done", "OK", "success"}; 56 57 class ParameterStatus { 58 public: 59 // Status kind is basically the main part of parameter's status. 60 // The kind represents our knowledge (so far) about a tracked parameter 61 // in the context of this analysis. 62 // 63 // Since we want to report on missing and extraneous calls, we need to 64 // track the fact whether paramater was called or not. This automatically 65 // decides two kinds: `NotCalled` and `Called`. 66 // 67 // One of the erroneous situations is the case when parameter is called only 68 // on some of the paths. We could've considered it `NotCalled`, but we want 69 // to report double call warnings even if these two calls are not guaranteed 70 // to happen in every execution. We also don't want to have it as `Called` 71 // because not calling tracked parameter on all of the paths is an error 72 // on its own. For these reasons, we need to have a separate kind, 73 // `MaybeCalled`, and change `Called` to `DefinitelyCalled` to avoid 74 // confusion. 75 // 76 // Two violations of calling parameter more than once and not calling it on 77 // every path are not, however, mutually exclusive. In situations where both 78 // violations take place, we prefer to report ONLY double call. It's always 79 // harder to pinpoint a bug that has arisen when a user neglects to take the 80 // right action (and therefore, no action is taken), than when a user takes 81 // the wrong action. And, in order to remember that we already reported 82 // a double call, we need another kind: `Reported`. 83 // 84 // Our analysis is intra-procedural and, while in the perfect world, 85 // developers only use tracked parameters to call them, in the real world, 86 // the picture might be different. Parameters can be stored in global 87 // variables or leaked into other functions that we know nothing about. 88 // We try to be lenient and trust users. Another kind `Escaped` reflects 89 // such situations. We don't know if it gets called there or not, but we 90 // should always think of `Escaped` as the best possible option. 91 // 92 // Some of the paths in the analyzed functions might end with a call 93 // to noreturn functions. Such paths are not required to have parameter 94 // calls and we want to track that. For the purposes of better diagnostics, 95 // we don't want to reuse `Escaped` and, thus, have another kind `NoReturn`. 96 // 97 // Additionally, we have `NotVisited` kind that tells us nothing about 98 // a tracked parameter, but is used for tracking analyzed (aka visited) 99 // basic blocks. 100 // 101 // If we consider `|` to be a JOIN operation of two kinds coming from 102 // two different paths, the following properties must hold: 103 // 104 // 1. for any Kind K: K | K == K 105 // Joining two identical kinds should result in the same kind. 106 // 107 // 2. for any Kind K: Reported | K == Reported 108 // Doesn't matter on which path it was reported, it still is. 109 // 110 // 3. for any Kind K: NoReturn | K == K 111 // We can totally ignore noreturn paths during merges. 112 // 113 // 4. DefinitelyCalled | NotCalled == MaybeCalled 114 // Called on one path, not called on another - that's simply 115 // a definition for MaybeCalled. 116 // 117 // 5. for any Kind K in [DefinitelyCalled, NotCalled, MaybeCalled]: 118 // Escaped | K == K 119 // Escaped mirrors other statuses after joins. 120 // Every situation, when we join any of the listed kinds K, 121 // is a violation. For this reason, in order to assume the 122 // best outcome for this escape, we consider it to be the 123 // same as the other path. 124 // 125 // 6. for any Kind K in [DefinitelyCalled, NotCalled]: 126 // MaybeCalled | K == MaybeCalled 127 // MaybeCalled should basically stay after almost every join. 128 enum Kind { 129 // No-return paths should be absolutely transparent for the analysis. 130 // 0x0 is the identity element for selected join operation (binary or). 131 NoReturn = 0x0, /* 0000 */ 132 // Escaped marks situations when marked parameter escaped into 133 // another function (so we can assume that it was possibly called there). 134 Escaped = 0x1, /* 0001 */ 135 // Parameter was definitely called once at this point. 136 DefinitelyCalled = 0x3, /* 0011 */ 137 // Kinds less or equal to NON_ERROR_STATUS are not considered errors. 138 NON_ERROR_STATUS = DefinitelyCalled, 139 // Parameter was not yet called. 140 NotCalled = 0x5, /* 0101 */ 141 // Parameter was not called at least on one path leading to this point, 142 // while there is also at least one path that it gets called. 143 MaybeCalled = 0x7, /* 0111 */ 144 // Parameter was not yet analyzed. 145 NotVisited = 0x8, /* 1000 */ 146 // We already reported a violation and stopped tracking calls for this 147 // parameter. 148 Reported = 0x15, /* 1111 */ 149 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported) 150 }; 151 152 constexpr ParameterStatus() = default; 153 /* implicit */ ParameterStatus(Kind K) : StatusKind(K) { 154 assert(!seenAnyCalls(K) && "Can't initialize status without a call"); 155 } 156 ParameterStatus(Kind K, const Expr *Call) : StatusKind(K), Call(Call) { 157 assert(seenAnyCalls(K) && "This kind is not supposed to have a call"); 158 } 159 160 const Expr &getCall() const { 161 assert(seenAnyCalls(getKind()) && "ParameterStatus doesn't have a call"); 162 return *Call; 163 } 164 static bool seenAnyCalls(Kind K) { 165 return (K & DefinitelyCalled) == DefinitelyCalled && K != Reported; 166 } 167 bool seenAnyCalls() const { return seenAnyCalls(getKind()); } 168 169 static bool isErrorStatus(Kind K) { return K > NON_ERROR_STATUS; } 170 bool isErrorStatus() const { return isErrorStatus(getKind()); } 171 172 Kind getKind() const { return StatusKind; } 173 174 void join(const ParameterStatus &Other) { 175 // If we have a pointer already, let's keep it. 176 // For the purposes of the analysis, it doesn't really matter 177 // which call we report. 178 // 179 // If we don't have a pointer, let's take whatever gets joined. 180 if (!Call) { 181 Call = Other.Call; 182 } 183 // Join kinds. 184 StatusKind |= Other.getKind(); 185 } 186 187 bool operator==(const ParameterStatus &Other) const { 188 // We compare only kinds, pointers on their own is only additional 189 // information. 190 return getKind() == Other.getKind(); 191 } 192 193 private: 194 // It would've been a perfect place to use llvm::PointerIntPair, but 195 // unfortunately NumLowBitsAvailable for clang::Expr had been reduced to 2. 196 Kind StatusKind = NotVisited; 197 const Expr *Call = nullptr; 198 }; 199 200 /// State aggregates statuses of all tracked parameters. 201 class State { 202 public: 203 State(unsigned Size, ParameterStatus::Kind K = ParameterStatus::NotVisited) 204 : ParamData(Size, K) {} 205 206 /// Return status of a parameter with the given index. 207 /// \{ 208 ParameterStatus &getStatusFor(unsigned Index) { return ParamData[Index]; } 209 const ParameterStatus &getStatusFor(unsigned Index) const { 210 return ParamData[Index]; 211 } 212 /// \} 213 214 /// Return true if parameter with the given index can be called. 215 bool seenAnyCalls(unsigned Index) const { 216 return getStatusFor(Index).seenAnyCalls(); 217 } 218 /// Return a reference that we consider a call. 219 /// 220 /// Should only be used for parameters that can be called. 221 const Expr &getCallFor(unsigned Index) const { 222 return getStatusFor(Index).getCall(); 223 } 224 /// Return status kind of parameter with the given index. 225 ParameterStatus::Kind getKindFor(unsigned Index) const { 226 return getStatusFor(Index).getKind(); 227 } 228 229 bool isVisited() const { 230 return llvm::all_of(ParamData, [](const ParameterStatus &S) { 231 return S.getKind() != ParameterStatus::NotVisited; 232 }); 233 } 234 235 // Join other state into the current state. 236 void join(const State &Other) { 237 assert(ParamData.size() == Other.ParamData.size() && 238 "Couldn't join statuses with different sizes"); 239 for (auto Pair : llvm::zip(ParamData, Other.ParamData)) { 240 std::get<0>(Pair).join(std::get<1>(Pair)); 241 } 242 } 243 244 using iterator = ParamSizedVector<ParameterStatus>::iterator; 245 using const_iterator = ParamSizedVector<ParameterStatus>::const_iterator; 246 247 iterator begin() { return ParamData.begin(); } 248 iterator end() { return ParamData.end(); } 249 250 const_iterator begin() const { return ParamData.begin(); } 251 const_iterator end() const { return ParamData.end(); } 252 253 bool operator==(const State &Other) const { 254 return ParamData == Other.ParamData; 255 } 256 257 private: 258 ParamSizedVector<ParameterStatus> ParamData; 259 }; 260 261 /// A simple class that finds DeclRefExpr in the given expression. 262 /// 263 /// However, we don't want to find ANY nested DeclRefExpr skipping whatever 264 /// expressions on our way. Only certain expressions considered "no-op" 265 /// for our task are indeed skipped. 266 class DeclRefFinder 267 : public ConstStmtVisitor<DeclRefFinder, const DeclRefExpr *> { 268 public: 269 /// Find a DeclRefExpr in the given expression. 270 /// 271 /// In its most basic form (ShouldRetrieveFromComparisons == false), 272 /// this function can be simply reduced to the following question: 273 /// 274 /// - If expression E is used as a function argument, could we say 275 /// that DeclRefExpr nested in E is used as an argument? 276 /// 277 /// According to this rule, we can say that parens, casts and dereferencing 278 /// (dereferencing only applied to function pointers, but this is our case) 279 /// can be skipped. 280 /// 281 /// When we should look into comparisons the question changes to: 282 /// 283 /// - If expression E is used as a condition, could we say that 284 /// DeclRefExpr is being checked? 285 /// 286 /// And even though, these are two different questions, they have quite a lot 287 /// in common. Actually, we can say that whatever expression answers 288 /// positively the first question also fits the second question as well. 289 /// 290 /// In addition, we skip binary operators == and !=, and unary opeartor !. 291 static const DeclRefExpr *find(const Expr *E, 292 bool ShouldRetrieveFromComparisons = false) { 293 return DeclRefFinder(ShouldRetrieveFromComparisons).Visit(E); 294 } 295 296 const DeclRefExpr *VisitDeclRefExpr(const DeclRefExpr *DR) { return DR; } 297 298 const DeclRefExpr *VisitUnaryOperator(const UnaryOperator *UO) { 299 switch (UO->getOpcode()) { 300 case UO_LNot: 301 // We care about logical not only if we care about comparisons. 302 if (!ShouldRetrieveFromComparisons) 303 return nullptr; 304 LLVM_FALLTHROUGH; 305 // Function pointer/references can be dereferenced before a call. 306 // That doesn't make it, however, any different from a regular call. 307 // For this reason, dereference operation is a "no-op". 308 case UO_Deref: 309 return Visit(UO->getSubExpr()); 310 default: 311 return nullptr; 312 } 313 } 314 315 const DeclRefExpr *VisitBinaryOperator(const BinaryOperator *BO) { 316 if (!ShouldRetrieveFromComparisons) 317 return nullptr; 318 319 switch (BO->getOpcode()) { 320 case BO_EQ: 321 case BO_NE: { 322 const DeclRefExpr *LHS = Visit(BO->getLHS()); 323 return LHS ? LHS : Visit(BO->getRHS()); 324 } 325 default: 326 return nullptr; 327 } 328 } 329 330 const DeclRefExpr *VisitOpaqueValueExpr(const OpaqueValueExpr *OVE) { 331 return Visit(OVE->getSourceExpr()); 332 } 333 334 const DeclRefExpr *VisitCallExpr(const CallExpr *CE) { 335 if (!ShouldRetrieveFromComparisons) 336 return nullptr; 337 338 // We want to see through some of the boolean builtin functions 339 // that we are likely to see in conditions. 340 switch (CE->getBuiltinCallee()) { 341 case Builtin::BI__builtin_expect: 342 case Builtin::BI__builtin_expect_with_probability: { 343 assert(CE->getNumArgs() >= 2); 344 345 const DeclRefExpr *Candidate = Visit(CE->getArg(0)); 346 return Candidate != nullptr ? Candidate : Visit(CE->getArg(1)); 347 } 348 349 case Builtin::BI__builtin_unpredictable: 350 return Visit(CE->getArg(0)); 351 352 default: 353 return nullptr; 354 } 355 } 356 357 const DeclRefExpr *VisitExpr(const Expr *E) { 358 // It is a fallback method that gets called whenever the actual type 359 // of the given expression is not covered. 360 // 361 // We first check if we have anything to skip. And then repeat the whole 362 // procedure for a nested expression instead. 363 const Expr *DeclutteredExpr = E->IgnoreParenCasts(); 364 return E != DeclutteredExpr ? Visit(DeclutteredExpr) : nullptr; 365 } 366 367 private: 368 DeclRefFinder(bool ShouldRetrieveFromComparisons) 369 : ShouldRetrieveFromComparisons(ShouldRetrieveFromComparisons) {} 370 371 bool ShouldRetrieveFromComparisons; 372 }; 373 374 const DeclRefExpr *findDeclRefExpr(const Expr *In, 375 bool ShouldRetrieveFromComparisons = false) { 376 return DeclRefFinder::find(In, ShouldRetrieveFromComparisons); 377 } 378 379 const ParmVarDecl * 380 findReferencedParmVarDecl(const Expr *In, 381 bool ShouldRetrieveFromComparisons = false) { 382 if (const DeclRefExpr *DR = 383 findDeclRefExpr(In, ShouldRetrieveFromComparisons)) { 384 return dyn_cast<ParmVarDecl>(DR->getDecl()); 385 } 386 387 return nullptr; 388 } 389 390 /// Return conditions expression of a statement if it has one. 391 const Expr *getCondition(const Stmt *S) { 392 if (!S) { 393 return nullptr; 394 } 395 396 if (const auto *If = dyn_cast<IfStmt>(S)) { 397 return If->getCond(); 398 } 399 if (const auto *Ternary = dyn_cast<AbstractConditionalOperator>(S)) { 400 return Ternary->getCond(); 401 } 402 403 return nullptr; 404 } 405 406 /// A small helper class that collects all named identifiers in the given 407 /// expression. It traverses it recursively, so names from deeper levels 408 /// of the AST will end up in the results. 409 /// Results might have duplicate names, if this is a problem, convert to 410 /// string sets afterwards. 411 class NamesCollector : public RecursiveASTVisitor<NamesCollector> { 412 public: 413 static constexpr unsigned EXPECTED_NUMBER_OF_NAMES = 5; 414 using NameCollection = 415 llvm::SmallVector<llvm::StringRef, EXPECTED_NUMBER_OF_NAMES>; 416 417 static NameCollection collect(const Expr *From) { 418 NamesCollector Impl; 419 Impl.TraverseStmt(const_cast<Expr *>(From)); 420 return Impl.Result; 421 } 422 423 bool VisitDeclRefExpr(const DeclRefExpr *E) { 424 Result.push_back(E->getDecl()->getName()); 425 return true; 426 } 427 428 bool VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr *E) { 429 llvm::StringRef Name; 430 431 if (E->isImplicitProperty()) { 432 ObjCMethodDecl *PropertyMethodDecl = nullptr; 433 if (E->isMessagingGetter()) { 434 PropertyMethodDecl = E->getImplicitPropertyGetter(); 435 } else { 436 PropertyMethodDecl = E->getImplicitPropertySetter(); 437 } 438 assert(PropertyMethodDecl && 439 "Implicit property must have associated declaration"); 440 Name = PropertyMethodDecl->getSelector().getNameForSlot(0); 441 } else { 442 assert(E->isExplicitProperty()); 443 Name = E->getExplicitProperty()->getName(); 444 } 445 446 Result.push_back(Name); 447 return true; 448 } 449 450 private: 451 NamesCollector() = default; 452 NameCollection Result; 453 }; 454 455 /// Check whether the given expression mentions any of conventional names. 456 bool mentionsAnyOfConventionalNames(const Expr *E) { 457 NamesCollector::NameCollection MentionedNames = NamesCollector::collect(E); 458 459 return llvm::any_of(MentionedNames, [](llvm::StringRef ConditionName) { 460 return llvm::any_of( 461 CONVENTIONAL_CONDITIONS, 462 [ConditionName](const llvm::StringLiteral &Conventional) { 463 return ConditionName.contains_lower(Conventional); 464 }); 465 }); 466 } 467 468 /// Clarification is a simple pair of a reason why parameter is not called 469 /// on every path and a statement to blame. 470 struct Clarification { 471 NeverCalledReason Reason; 472 const Stmt *Location; 473 }; 474 475 /// A helper class that can produce a clarification based on the given pair 476 /// of basic blocks. 477 class NotCalledClarifier 478 : public ConstStmtVisitor<NotCalledClarifier, 479 llvm::Optional<Clarification>> { 480 public: 481 /// The main entrypoint for the class, the function that tries to find the 482 /// clarification of how to explain which sub-path starts with a CFG edge 483 /// from Conditional to SuccWithoutCall. 484 /// 485 /// This means that this function has one precondition: 486 /// SuccWithoutCall should be a successor block for Conditional. 487 /// 488 /// Because clarification is not needed for non-trivial pairs of blocks 489 /// (i.e. SuccWithoutCall is not the only successor), it returns meaningful 490 /// results only for such cases. For this very reason, the parent basic 491 /// block, Conditional, is named that way, so it is clear what kind of 492 /// block is expected. 493 static llvm::Optional<Clarification> 494 clarify(const CFGBlock *Conditional, const CFGBlock *SuccWithoutCall) { 495 if (const Stmt *Terminator = Conditional->getTerminatorStmt()) { 496 return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator); 497 } 498 return llvm::None; 499 } 500 501 llvm::Optional<Clarification> VisitIfStmt(const IfStmt *If) { 502 return VisitBranchingBlock(If, NeverCalledReason::IfThen); 503 } 504 505 llvm::Optional<Clarification> 506 VisitAbstractConditionalOperator(const AbstractConditionalOperator *Ternary) { 507 return VisitBranchingBlock(Ternary, NeverCalledReason::IfThen); 508 } 509 510 llvm::Optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) { 511 const Stmt *CaseToBlame = SuccInQuestion->getLabel(); 512 if (!CaseToBlame) { 513 // If interesting basic block is not labeled, it means that this 514 // basic block does not represent any of the cases. 515 return Clarification{NeverCalledReason::SwitchSkipped, Switch}; 516 } 517 518 for (const SwitchCase *Case = Switch->getSwitchCaseList(); Case; 519 Case = Case->getNextSwitchCase()) { 520 if (Case == CaseToBlame) { 521 return Clarification{NeverCalledReason::Switch, Case}; 522 } 523 } 524 525 llvm_unreachable("Found unexpected switch structure"); 526 } 527 528 llvm::Optional<Clarification> VisitForStmt(const ForStmt *For) { 529 return VisitBranchingBlock(For, NeverCalledReason::LoopEntered); 530 } 531 532 llvm::Optional<Clarification> VisitWhileStmt(const WhileStmt *While) { 533 return VisitBranchingBlock(While, NeverCalledReason::LoopEntered); 534 } 535 536 llvm::Optional<Clarification> 537 VisitBranchingBlock(const Stmt *Terminator, NeverCalledReason DefaultReason) { 538 assert(Parent->succ_size() == 2 && 539 "Branching block should have exactly two successors"); 540 unsigned SuccessorIndex = getSuccessorIndex(Parent, SuccInQuestion); 541 NeverCalledReason ActualReason = 542 updateForSuccessor(DefaultReason, SuccessorIndex); 543 return Clarification{ActualReason, Terminator}; 544 } 545 546 llvm::Optional<Clarification> VisitBinaryOperator(const BinaryOperator *) { 547 // We don't want to report on short-curcuit logical operations. 548 return llvm::None; 549 } 550 551 llvm::Optional<Clarification> VisitStmt(const Stmt *Terminator) { 552 // If we got here, we didn't have a visit function for more derived 553 // classes of statement that this terminator actually belongs to. 554 // 555 // This is not a good scenario and should not happen in practice, but 556 // at least we'll warn the user. 557 return Clarification{NeverCalledReason::FallbackReason, Terminator}; 558 } 559 560 static unsigned getSuccessorIndex(const CFGBlock *Parent, 561 const CFGBlock *Child) { 562 CFGBlock::const_succ_iterator It = llvm::find(Parent->succs(), Child); 563 assert(It != Parent->succ_end() && 564 "Given blocks should be in parent-child relationship"); 565 return It - Parent->succ_begin(); 566 } 567 568 static NeverCalledReason 569 updateForSuccessor(NeverCalledReason ReasonForTrueBranch, 570 unsigned SuccessorIndex) { 571 assert(SuccessorIndex <= 1); 572 unsigned RawReason = 573 static_cast<unsigned>(ReasonForTrueBranch) + SuccessorIndex; 574 assert(RawReason <= 575 static_cast<unsigned>(NeverCalledReason::LARGEST_VALUE)); 576 return static_cast<NeverCalledReason>(RawReason); 577 } 578 579 private: 580 NotCalledClarifier(const CFGBlock *Parent, const CFGBlock *SuccInQuestion) 581 : Parent(Parent), SuccInQuestion(SuccInQuestion) {} 582 583 const CFGBlock *Parent, *SuccInQuestion; 584 }; 585 586 class CalledOnceChecker : public ConstStmtVisitor<CalledOnceChecker> { 587 public: 588 static void check(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, 589 bool CheckConventionalParameters) { 590 CalledOnceChecker(AC, Handler, CheckConventionalParameters).check(); 591 } 592 593 private: 594 CalledOnceChecker(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, 595 bool CheckConventionalParameters) 596 : FunctionCFG(*AC.getCFG()), AC(AC), Handler(Handler), 597 CheckConventionalParameters(CheckConventionalParameters), 598 CurrentState(0) { 599 initDataStructures(); 600 assert((size() == 0 || !States.empty()) && 601 "Data structures are inconsistent"); 602 } 603 604 //===----------------------------------------------------------------------===// 605 // Initializing functions 606 //===----------------------------------------------------------------------===// 607 608 void initDataStructures() { 609 const Decl *AnalyzedDecl = AC.getDecl(); 610 611 if (const auto *Function = dyn_cast<FunctionDecl>(AnalyzedDecl)) { 612 findParamsToTrack(Function); 613 } else if (const auto *Method = dyn_cast<ObjCMethodDecl>(AnalyzedDecl)) { 614 findParamsToTrack(Method); 615 } else if (const auto *Block = dyn_cast<BlockDecl>(AnalyzedDecl)) { 616 findCapturesToTrack(Block); 617 findParamsToTrack(Block); 618 } 619 620 // Have something to track, let's init states for every block from the CFG. 621 if (size() != 0) { 622 States = 623 CFGSizedVector<State>(FunctionCFG.getNumBlockIDs(), State(size())); 624 } 625 } 626 627 void findCapturesToTrack(const BlockDecl *Block) { 628 for (const auto &Capture : Block->captures()) { 629 if (const auto *P = dyn_cast<ParmVarDecl>(Capture.getVariable())) { 630 // Parameter DeclContext is its owning function or method. 631 const DeclContext *ParamContext = P->getDeclContext(); 632 if (shouldBeCalledOnce(ParamContext, P)) { 633 TrackedParams.push_back(P); 634 } 635 } 636 } 637 } 638 639 template <class FunctionLikeDecl> 640 void findParamsToTrack(const FunctionLikeDecl *Function) { 641 for (unsigned Index : llvm::seq<unsigned>(0u, Function->param_size())) { 642 if (shouldBeCalledOnce(Function, Index)) { 643 TrackedParams.push_back(Function->getParamDecl(Index)); 644 } 645 } 646 } 647 648 //===----------------------------------------------------------------------===// 649 // Main logic 'check' functions 650 //===----------------------------------------------------------------------===// 651 652 void check() { 653 // Nothing to check here: we don't have marked parameters. 654 if (size() == 0 || isPossiblyEmptyImpl()) 655 return; 656 657 assert( 658 llvm::none_of(States, [](const State &S) { return S.isVisited(); }) && 659 "None of the blocks should be 'visited' before the analysis"); 660 661 // For our task, both backward and forward approaches suite well. 662 // However, in order to report better diagnostics, we decided to go with 663 // backward analysis. 664 // 665 // Let's consider the following CFG and how forward and backward analyses 666 // will work for it. 667 // 668 // FORWARD: | BACKWARD: 669 // #1 | #1 670 // +---------+ | +-----------+ 671 // | if | | |MaybeCalled| 672 // +---------+ | +-----------+ 673 // |NotCalled| | | if | 674 // +---------+ | +-----------+ 675 // / \ | / \ 676 // #2 / \ #3 | #2 / \ #3 677 // +----------------+ +---------+ | +----------------+ +---------+ 678 // | foo() | | ... | | |DefinitelyCalled| |NotCalled| 679 // +----------------+ +---------+ | +----------------+ +---------+ 680 // |DefinitelyCalled| |NotCalled| | | foo() | | ... | 681 // +----------------+ +---------+ | +----------------+ +---------+ 682 // \ / | \ / 683 // \ #4 / | \ #4 / 684 // +-----------+ | +---------+ 685 // | ... | | |NotCalled| 686 // +-----------+ | +---------+ 687 // |MaybeCalled| | | ... | 688 // +-----------+ | +---------+ 689 // 690 // The most natural way to report lacking call in the block #3 would be to 691 // message that the false branch of the if statement in the block #1 doesn't 692 // have a call. And while with the forward approach we'll need to find a 693 // least common ancestor or something like that to find the 'if' to blame, 694 // backward analysis gives it to us out of the box. 695 BackwardDataflowWorklist Worklist(FunctionCFG, AC); 696 697 // Let's visit EXIT. 698 const CFGBlock *Exit = &FunctionCFG.getExit(); 699 assignState(Exit, State(size(), ParameterStatus::NotCalled)); 700 Worklist.enqueuePredecessors(Exit); 701 702 while (const CFGBlock *BB = Worklist.dequeue()) { 703 assert(BB && "Worklist should filter out null blocks"); 704 check(BB); 705 assert(CurrentState.isVisited() && 706 "After the check, basic block should be visited"); 707 708 // Traverse successor basic blocks if the status of this block 709 // has changed. 710 if (assignState(BB, CurrentState)) { 711 Worklist.enqueuePredecessors(BB); 712 } 713 } 714 715 // Check that we have all tracked parameters at the last block. 716 // As we are performing a backward version of the analysis, 717 // it should be the ENTRY block. 718 checkEntry(&FunctionCFG.getEntry()); 719 } 720 721 void check(const CFGBlock *BB) { 722 // We start with a state 'inherited' from all the successors. 723 CurrentState = joinSuccessors(BB); 724 assert(CurrentState.isVisited() && 725 "Shouldn't start with a 'not visited' state"); 726 727 // This is the 'exit' situation, broken promises are probably OK 728 // in such scenarios. 729 if (BB->hasNoReturnElement()) { 730 markNoReturn(); 731 // This block still can have calls (even multiple calls) and 732 // for this reason there is no early return here. 733 } 734 735 // We use a backward dataflow propagation and for this reason we 736 // should traverse basic blocks bottom-up. 737 for (const CFGElement &Element : llvm::reverse(*BB)) { 738 if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) { 739 check(S->getStmt()); 740 } 741 } 742 } 743 void check(const Stmt *S) { Visit(S); } 744 745 void checkEntry(const CFGBlock *Entry) { 746 // We finalize this algorithm with the ENTRY block because 747 // we use a backward version of the analysis. This is where 748 // we can judge that some of the tracked parameters are not called on 749 // every path from ENTRY to EXIT. 750 751 const State &EntryStatus = getState(Entry); 752 llvm::BitVector NotCalledOnEveryPath(size(), false); 753 llvm::BitVector NotUsedOnEveryPath(size(), false); 754 755 // Check if there are no calls of the marked parameter at all 756 for (const auto &IndexedStatus : llvm::enumerate(EntryStatus)) { 757 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); 758 759 switch (IndexedStatus.value().getKind()) { 760 case ParameterStatus::NotCalled: 761 // If there were places where this parameter escapes (aka being used), 762 // we can provide a more useful diagnostic by pointing at the exact 763 // branches where it is not even mentioned. 764 if (!hasEverEscaped(IndexedStatus.index())) { 765 // This parameter is was not used at all, so we should report the 766 // most generic version of the warning. 767 if (isCaptured(Parameter)) { 768 // We want to specify that it was captured by the block. 769 Handler.handleCapturedNeverCalled(Parameter, AC.getDecl(), 770 !isExplicitlyMarked(Parameter)); 771 } else { 772 Handler.handleNeverCalled(Parameter, 773 !isExplicitlyMarked(Parameter)); 774 } 775 } else { 776 // Mark it as 'interesting' to figure out which paths don't even 777 // have escapes. 778 NotUsedOnEveryPath[IndexedStatus.index()] = true; 779 } 780 781 break; 782 case ParameterStatus::MaybeCalled: 783 // If we have 'maybe called' at this point, we have an error 784 // that there is at least one path where this parameter 785 // is not called. 786 // 787 // However, reporting the warning with only that information can be 788 // too vague for the users. For this reason, we mark such parameters 789 // as "interesting" for further analysis. 790 NotCalledOnEveryPath[IndexedStatus.index()] = true; 791 break; 792 default: 793 break; 794 } 795 } 796 797 // Early exit if we don't have parameters for extra analysis. 798 if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none()) 799 return; 800 801 // We are looking for a pair of blocks A, B so that the following is true: 802 // * A is a predecessor of B 803 // * B is marked as NotCalled 804 // * A has at least one successor marked as either 805 // Escaped or DefinitelyCalled 806 // 807 // In that situation, it is guaranteed that B is the first block of the path 808 // where the user doesn't call or use parameter in question. 809 // 810 // For this reason, branch A -> B can be used for reporting. 811 // 812 // This part of the algorithm is guarded by a condition that the function 813 // does indeed have a violation of contract. For this reason, we can 814 // spend more time to find a good spot to place the warning. 815 // 816 // The following algorithm has the worst case complexity of O(V + E), 817 // where V is the number of basic blocks in FunctionCFG, 818 // E is the number of edges between blocks in FunctionCFG. 819 for (const CFGBlock *BB : FunctionCFG) { 820 if (!BB) 821 continue; 822 823 const State &BlockState = getState(BB); 824 825 for (unsigned Index : llvm::seq(0u, size())) { 826 // We don't want to use 'isLosingCall' here because we want to report 827 // the following situation as well: 828 // 829 // MaybeCalled 830 // | ... | 831 // MaybeCalled NotCalled 832 // 833 // Even though successor is not 'DefinitelyCalled', it is still useful 834 // to report it, it is still a path without a call. 835 if (NotCalledOnEveryPath[Index] && 836 BlockState.getKindFor(Index) == ParameterStatus::MaybeCalled) { 837 838 findAndReportNotCalledBranches(BB, Index); 839 } else if (NotUsedOnEveryPath[Index] && 840 isLosingEscape(BlockState, BB, Index)) { 841 842 findAndReportNotCalledBranches(BB, Index, /* IsEscape = */ true); 843 } 844 } 845 } 846 } 847 848 /// Check potential call of a tracked parameter. 849 void checkDirectCall(const CallExpr *Call) { 850 if (auto Index = getIndexOfCallee(Call)) { 851 processCallFor(*Index, Call); 852 } 853 } 854 855 /// Check the call expression for being an indirect call of one of the tracked 856 /// parameters. It is indirect in the sense that this particular call is not 857 /// calling the parameter itself, but rather uses it as the argument. 858 template <class CallLikeExpr> 859 void checkIndirectCall(const CallLikeExpr *CallOrMessage) { 860 // CallExpr::arguments does not interact nicely with llvm::enumerate. 861 llvm::ArrayRef<const Expr *> Arguments = llvm::makeArrayRef( 862 CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); 863 864 // Let's check if any of the call arguments is a point of interest. 865 for (const auto &Argument : llvm::enumerate(Arguments)) { 866 if (auto Index = getIndexOfExpression(Argument.value())) { 867 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); 868 869 if (shouldBeCalledOnce(CallOrMessage, Argument.index())) { 870 // If the corresponding parameter is marked as 'called_once' we should 871 // consider it as a call. 872 processCallFor(*Index, CallOrMessage); 873 } else if (CurrentParamStatus.getKind() == ParameterStatus::NotCalled) { 874 // Otherwise, we mark this parameter as escaped, which can be 875 // interpreted both as called or not called depending on the context. 876 CurrentParamStatus = ParameterStatus::Escaped; 877 } 878 // Otherwise, let's keep the state as it is. 879 } 880 } 881 } 882 883 /// Process call of the parameter with the given index 884 void processCallFor(unsigned Index, const Expr *Call) { 885 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); 886 887 if (CurrentParamStatus.seenAnyCalls()) { 888 889 // At this point, this parameter was called, so this is a second call. 890 const ParmVarDecl *Parameter = getParameter(Index); 891 Handler.handleDoubleCall( 892 Parameter, &CurrentState.getCallFor(Index), Call, 893 !isExplicitlyMarked(Parameter), 894 // We are sure that the second call is definitely 895 // going to happen if the status is 'DefinitelyCalled'. 896 CurrentParamStatus.getKind() == ParameterStatus::DefinitelyCalled); 897 898 // Mark this parameter as already reported on, so we don't repeat 899 // warnings. 900 CurrentParamStatus = ParameterStatus::Reported; 901 902 } else if (CurrentParamStatus.getKind() != ParameterStatus::Reported) { 903 // If we didn't report anything yet, let's mark this parameter 904 // as called. 905 ParameterStatus Called(ParameterStatus::DefinitelyCalled, Call); 906 CurrentParamStatus = Called; 907 } 908 } 909 910 void findAndReportNotCalledBranches(const CFGBlock *Parent, unsigned Index, 911 bool IsEscape = false) { 912 for (const CFGBlock *Succ : Parent->succs()) { 913 if (!Succ) 914 continue; 915 916 if (getState(Succ).getKindFor(Index) == ParameterStatus::NotCalled) { 917 assert(Parent->succ_size() >= 2 && 918 "Block should have at least two successors at this point"); 919 if (auto Clarification = NotCalledClarifier::clarify(Parent, Succ)) { 920 const ParmVarDecl *Parameter = getParameter(Index); 921 Handler.handleNeverCalled(Parameter, Clarification->Location, 922 Clarification->Reason, !IsEscape, 923 !isExplicitlyMarked(Parameter)); 924 } 925 } 926 } 927 } 928 929 //===----------------------------------------------------------------------===// 930 // Predicate functions to check parameters 931 //===----------------------------------------------------------------------===// 932 933 /// Return true if parameter is explicitly marked as 'called_once'. 934 static bool isExplicitlyMarked(const ParmVarDecl *Parameter) { 935 return Parameter->hasAttr<CalledOnceAttr>(); 936 } 937 938 /// Return true if the given name matches conventional pattens. 939 static bool isConventional(llvm::StringRef Name) { 940 return llvm::count(CONVENTIONAL_NAMES, Name) != 0; 941 } 942 943 /// Return true if the given name has conventional suffixes. 944 static bool hasConventionalSuffix(llvm::StringRef Name) { 945 return llvm::any_of(CONVENTIONAL_SUFFIXES, [Name](llvm::StringRef Suffix) { 946 return Name.endswith(Suffix); 947 }); 948 } 949 950 /// Return true if the given type can be used for conventional parameters. 951 static bool isConventional(QualType Ty) { 952 if (!Ty->isBlockPointerType()) { 953 return false; 954 } 955 956 QualType BlockType = Ty->getAs<BlockPointerType>()->getPointeeType(); 957 // Completion handlers should have a block type with void return type. 958 return BlockType->getAs<FunctionType>()->getReturnType()->isVoidType(); 959 } 960 961 /// Return true if the only parameter of the function is conventional. 962 static bool isOnlyParameterConventional(const FunctionDecl *Function) { 963 IdentifierInfo *II = Function->getIdentifier(); 964 return Function->getNumParams() == 1 && II && 965 hasConventionalSuffix(II->getName()); 966 } 967 968 /// Return true/false if 'swift_async' attribute states that the given 969 /// parameter is conventionally called once. 970 /// Return llvm::None if the given declaration doesn't have 'swift_async' 971 /// attribute. 972 static llvm::Optional<bool> isConventionalSwiftAsync(const Decl *D, 973 unsigned ParamIndex) { 974 if (const SwiftAsyncAttr *A = D->getAttr<SwiftAsyncAttr>()) { 975 if (A->getKind() == SwiftAsyncAttr::None) { 976 return false; 977 } 978 979 return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex; 980 } 981 return llvm::None; 982 } 983 984 /// Return true if the specified selector piece matches conventions. 985 static bool isConventionalSelectorPiece(Selector MethodSelector, 986 unsigned PieceIndex, 987 QualType PieceType) { 988 if (!isConventional(PieceType)) { 989 return false; 990 } 991 992 if (MethodSelector.getNumArgs() == 1) { 993 assert(PieceIndex == 0); 994 return hasConventionalSuffix(MethodSelector.getNameForSlot(0)); 995 } 996 997 return isConventional(MethodSelector.getNameForSlot(PieceIndex)); 998 } 999 1000 bool shouldBeCalledOnce(const ParmVarDecl *Parameter) const { 1001 return isExplicitlyMarked(Parameter) || 1002 (CheckConventionalParameters && 1003 isConventional(Parameter->getName()) && 1004 isConventional(Parameter->getType())); 1005 } 1006 1007 bool shouldBeCalledOnce(const DeclContext *ParamContext, 1008 const ParmVarDecl *Param) { 1009 unsigned ParamIndex = Param->getFunctionScopeIndex(); 1010 if (const auto *Function = dyn_cast<FunctionDecl>(ParamContext)) { 1011 return shouldBeCalledOnce(Function, ParamIndex); 1012 } 1013 if (const auto *Method = dyn_cast<ObjCMethodDecl>(ParamContext)) { 1014 return shouldBeCalledOnce(Method, ParamIndex); 1015 } 1016 return shouldBeCalledOnce(Param); 1017 } 1018 1019 bool shouldBeCalledOnce(const BlockDecl *Block, unsigned ParamIndex) const { 1020 return shouldBeCalledOnce(Block->getParamDecl(ParamIndex)); 1021 } 1022 1023 bool shouldBeCalledOnce(const FunctionDecl *Function, 1024 unsigned ParamIndex) const { 1025 if (ParamIndex >= Function->getNumParams()) { 1026 return false; 1027 } 1028 // 'swift_async' goes first and overrides anything else. 1029 if (auto ConventionalAsync = 1030 isConventionalSwiftAsync(Function, ParamIndex)) { 1031 return ConventionalAsync.getValue(); 1032 } 1033 1034 return shouldBeCalledOnce(Function->getParamDecl(ParamIndex)) || 1035 (CheckConventionalParameters && 1036 isOnlyParameterConventional(Function)); 1037 } 1038 1039 bool shouldBeCalledOnce(const ObjCMethodDecl *Method, 1040 unsigned ParamIndex) const { 1041 Selector MethodSelector = Method->getSelector(); 1042 if (ParamIndex >= MethodSelector.getNumArgs()) { 1043 return false; 1044 } 1045 1046 // 'swift_async' goes first and overrides anything else. 1047 if (auto ConventionalAsync = isConventionalSwiftAsync(Method, ParamIndex)) { 1048 return ConventionalAsync.getValue(); 1049 } 1050 1051 const ParmVarDecl *Parameter = Method->getParamDecl(ParamIndex); 1052 return shouldBeCalledOnce(Parameter) || 1053 (CheckConventionalParameters && 1054 isConventionalSelectorPiece(MethodSelector, ParamIndex, 1055 Parameter->getType())); 1056 } 1057 1058 bool shouldBeCalledOnce(const CallExpr *Call, unsigned ParamIndex) const { 1059 const FunctionDecl *Function = Call->getDirectCallee(); 1060 return Function && shouldBeCalledOnce(Function, ParamIndex); 1061 } 1062 1063 bool shouldBeCalledOnce(const ObjCMessageExpr *Message, 1064 unsigned ParamIndex) const { 1065 const ObjCMethodDecl *Method = Message->getMethodDecl(); 1066 return Method && ParamIndex < Method->param_size() && 1067 shouldBeCalledOnce(Method, ParamIndex); 1068 } 1069 1070 //===----------------------------------------------------------------------===// 1071 // Utility methods 1072 //===----------------------------------------------------------------------===// 1073 1074 bool isCaptured(const ParmVarDecl *Parameter) const { 1075 if (const BlockDecl *Block = dyn_cast<BlockDecl>(AC.getDecl())) { 1076 return Block->capturesVariable(Parameter); 1077 } 1078 return false; 1079 } 1080 1081 /// Return true if the analyzed function is actually a default implementation 1082 /// of the method that has to be overriden. 1083 /// 1084 /// These functions can have tracked parameters, but wouldn't call them 1085 /// because they are not designed to perform any meaningful actions. 1086 /// 1087 /// There are a couple of flavors of such default implementations: 1088 /// 1. Empty methods or methods with a single return statement 1089 /// 2. Methods that have one block with a call to no return function 1090 /// 3. Methods with only assertion-like operations 1091 bool isPossiblyEmptyImpl() const { 1092 if (!isa<ObjCMethodDecl>(AC.getDecl())) { 1093 // We care only about functions that are not supposed to be called. 1094 // Only methods can be overriden. 1095 return false; 1096 } 1097 1098 // Case #1 (without return statements) 1099 if (FunctionCFG.size() == 2) { 1100 // Method has only two blocks: ENTRY and EXIT. 1101 // This is equivalent to empty function. 1102 return true; 1103 } 1104 1105 // Case #2 1106 if (FunctionCFG.size() == 3) { 1107 const CFGBlock &Entry = FunctionCFG.getEntry(); 1108 if (Entry.succ_empty()) { 1109 return false; 1110 } 1111 1112 const CFGBlock *OnlyBlock = *Entry.succ_begin(); 1113 // Method has only one block, let's see if it has a no-return 1114 // element. 1115 if (OnlyBlock && OnlyBlock->hasNoReturnElement()) { 1116 return true; 1117 } 1118 // Fallthrough, CFGs with only one block can fall into #1 and #3 as well. 1119 } 1120 1121 // Cases #1 (return statements) and #3. 1122 // 1123 // It is hard to detect that something is an assertion or came 1124 // from assertion. Here we use a simple heuristic: 1125 // 1126 // - If it came from a macro, it can be an assertion. 1127 // 1128 // Additionally, we can't assume a number of basic blocks or the CFG's 1129 // structure because assertions might include loops and conditions. 1130 return llvm::all_of(FunctionCFG, [](const CFGBlock *BB) { 1131 if (!BB) { 1132 // Unreachable blocks are totally fine. 1133 return true; 1134 } 1135 1136 // Return statements can have sub-expressions that are represented as 1137 // separate statements of a basic block. We should allow this. 1138 // This parent map will be initialized with a parent tree for all 1139 // subexpressions of the block's return statement (if it has one). 1140 std::unique_ptr<ParentMap> ReturnChildren; 1141 1142 return llvm::all_of( 1143 llvm::reverse(*BB), // we should start with return statements, if we 1144 // have any, i.e. from the bottom of the block 1145 [&ReturnChildren](const CFGElement &Element) { 1146 if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) { 1147 const Stmt *SuspiciousStmt = S->getStmt(); 1148 1149 if (isa<ReturnStmt>(SuspiciousStmt)) { 1150 // Let's initialize this structure to test whether 1151 // some further statement is a part of this return. 1152 ReturnChildren = std::make_unique<ParentMap>( 1153 const_cast<Stmt *>(SuspiciousStmt)); 1154 // Return statements are allowed as part of #1. 1155 return true; 1156 } 1157 1158 return SuspiciousStmt->getBeginLoc().isMacroID() || 1159 (ReturnChildren && 1160 ReturnChildren->hasParent(SuspiciousStmt)); 1161 } 1162 return true; 1163 }); 1164 }); 1165 } 1166 1167 /// Check if parameter with the given index has ever escaped. 1168 bool hasEverEscaped(unsigned Index) const { 1169 return llvm::any_of(States, [Index](const State &StateForOneBB) { 1170 return StateForOneBB.getKindFor(Index) == ParameterStatus::Escaped; 1171 }); 1172 } 1173 1174 /// Return status stored for the given basic block. 1175 /// \{ 1176 State &getState(const CFGBlock *BB) { 1177 assert(BB); 1178 return States[BB->getBlockID()]; 1179 } 1180 const State &getState(const CFGBlock *BB) const { 1181 assert(BB); 1182 return States[BB->getBlockID()]; 1183 } 1184 /// \} 1185 1186 /// Assign status to the given basic block. 1187 /// 1188 /// Returns true when the stored status changed. 1189 bool assignState(const CFGBlock *BB, const State &ToAssign) { 1190 State &Current = getState(BB); 1191 if (Current == ToAssign) { 1192 return false; 1193 } 1194 1195 Current = ToAssign; 1196 return true; 1197 } 1198 1199 /// Join all incoming statuses for the given basic block. 1200 State joinSuccessors(const CFGBlock *BB) const { 1201 auto Succs = 1202 llvm::make_filter_range(BB->succs(), [this](const CFGBlock *Succ) { 1203 return Succ && this->getState(Succ).isVisited(); 1204 }); 1205 // We came to this block from somewhere after all. 1206 assert(!Succs.empty() && 1207 "Basic block should have at least one visited successor"); 1208 1209 State Result = getState(*Succs.begin()); 1210 1211 for (const CFGBlock *Succ : llvm::drop_begin(Succs, 1)) { 1212 Result.join(getState(Succ)); 1213 } 1214 1215 if (const Expr *Condition = getCondition(BB->getTerminatorStmt())) { 1216 handleConditional(BB, Condition, Result); 1217 } 1218 1219 return Result; 1220 } 1221 1222 void handleConditional(const CFGBlock *BB, const Expr *Condition, 1223 State &ToAlter) const { 1224 handleParameterCheck(BB, Condition, ToAlter); 1225 if (SuppressOnConventionalErrorPaths) { 1226 handleConventionalCheck(BB, Condition, ToAlter); 1227 } 1228 } 1229 1230 void handleParameterCheck(const CFGBlock *BB, const Expr *Condition, 1231 State &ToAlter) const { 1232 // In this function, we try to deal with the following pattern: 1233 // 1234 // if (parameter) 1235 // parameter(...); 1236 // 1237 // It's not good to show a warning here because clearly 'parameter' 1238 // couldn't and shouldn't be called on the 'else' path. 1239 // 1240 // Let's check if this if statement has a check involving one of 1241 // the tracked parameters. 1242 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl( 1243 Condition, 1244 /* ShouldRetrieveFromComparisons = */ true)) { 1245 if (const auto Index = getIndex(*Parameter)) { 1246 ParameterStatus &CurrentStatus = ToAlter.getStatusFor(*Index); 1247 1248 // We don't want to deep dive into semantics of the check and 1249 // figure out if that check was for null or something else. 1250 // We simply trust the user that they know what they are doing. 1251 // 1252 // For this reason, in the following loop we look for the 1253 // best-looking option. 1254 for (const CFGBlock *Succ : BB->succs()) { 1255 if (!Succ) 1256 continue; 1257 1258 const ParameterStatus &StatusInSucc = 1259 getState(Succ).getStatusFor(*Index); 1260 1261 if (StatusInSucc.isErrorStatus()) { 1262 continue; 1263 } 1264 1265 // Let's use this status instead. 1266 CurrentStatus = StatusInSucc; 1267 1268 if (StatusInSucc.getKind() == ParameterStatus::DefinitelyCalled) { 1269 // This is the best option to have and we already found it. 1270 break; 1271 } 1272 1273 // If we found 'Escaped' first, we still might find 'DefinitelyCalled' 1274 // on the other branch. And we prefer the latter. 1275 } 1276 } 1277 } 1278 } 1279 1280 void handleConventionalCheck(const CFGBlock *BB, const Expr *Condition, 1281 State &ToAlter) const { 1282 // Even when the analysis is technically correct, it is a widespread pattern 1283 // not to call completion handlers in some scenarios. These usually have 1284 // typical conditional names, such as 'error' or 'cancel'. 1285 if (!mentionsAnyOfConventionalNames(Condition)) { 1286 return; 1287 } 1288 1289 for (const auto &IndexedStatus : llvm::enumerate(ToAlter)) { 1290 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); 1291 // Conventions do not apply to explicitly marked parameters. 1292 if (isExplicitlyMarked(Parameter)) { 1293 continue; 1294 } 1295 1296 ParameterStatus &CurrentStatus = IndexedStatus.value(); 1297 // If we did find that on one of the branches the user uses the callback 1298 // and doesn't on the other path, we believe that they know what they are 1299 // doing and trust them. 1300 // 1301 // There are two possible scenarios for that: 1302 // 1. Current status is 'MaybeCalled' and one of the branches is 1303 // 'DefinitelyCalled' 1304 // 2. Current status is 'NotCalled' and one of the branches is 'Escaped' 1305 if (isLosingCall(ToAlter, BB, IndexedStatus.index()) || 1306 isLosingEscape(ToAlter, BB, IndexedStatus.index())) { 1307 CurrentStatus = ParameterStatus::Escaped; 1308 } 1309 } 1310 } 1311 1312 bool isLosingCall(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1313 unsigned ParameterIndex) const { 1314 // Let's check if the block represents DefinitelyCalled -> MaybeCalled 1315 // transition. 1316 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, 1317 ParameterStatus::MaybeCalled, 1318 ParameterStatus::DefinitelyCalled); 1319 } 1320 1321 bool isLosingEscape(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1322 unsigned ParameterIndex) const { 1323 // Let's check if the block represents Escaped -> NotCalled transition. 1324 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, 1325 ParameterStatus::NotCalled, ParameterStatus::Escaped); 1326 } 1327 1328 bool isLosingJoin(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1329 unsigned ParameterIndex, ParameterStatus::Kind AfterJoin, 1330 ParameterStatus::Kind BeforeJoin) const { 1331 assert(!ParameterStatus::isErrorStatus(BeforeJoin) && 1332 ParameterStatus::isErrorStatus(AfterJoin) && 1333 "It's not a losing join if statuses do not represent " 1334 "correct-to-error transition"); 1335 1336 const ParameterStatus &CurrentStatus = 1337 StateAfterJoin.getStatusFor(ParameterIndex); 1338 1339 return CurrentStatus.getKind() == AfterJoin && 1340 anySuccessorHasStatus(JoinBlock, ParameterIndex, BeforeJoin); 1341 } 1342 1343 /// Return true if any of the successors of the given basic block has 1344 /// a specified status for the given parameter. 1345 bool anySuccessorHasStatus(const CFGBlock *Parent, unsigned ParameterIndex, 1346 ParameterStatus::Kind ToFind) const { 1347 return llvm::any_of( 1348 Parent->succs(), [this, ParameterIndex, ToFind](const CFGBlock *Succ) { 1349 return Succ && getState(Succ).getKindFor(ParameterIndex) == ToFind; 1350 }); 1351 } 1352 1353 /// Check given expression that was discovered to escape. 1354 void checkEscapee(const Expr *E) { 1355 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { 1356 checkEscapee(*Parameter); 1357 } 1358 } 1359 1360 /// Check given parameter that was discovered to escape. 1361 void checkEscapee(const ParmVarDecl &Parameter) { 1362 if (auto Index = getIndex(Parameter)) { 1363 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); 1364 1365 if (CurrentParamStatus.getKind() == ParameterStatus::NotCalled) { 1366 CurrentParamStatus = ParameterStatus::Escaped; 1367 } 1368 } 1369 } 1370 1371 /// Mark all parameters in the current state as 'no-return'. 1372 void markNoReturn() { 1373 for (ParameterStatus &PS : CurrentState) { 1374 PS = ParameterStatus::NoReturn; 1375 } 1376 } 1377 1378 /// Check if the given assignment represents suppression and act on it. 1379 void checkSuppression(const BinaryOperator *Assignment) { 1380 // Suppression has the following form: 1381 // parameter = 0; 1382 // 0 can be of any form (NULL, nil, etc.) 1383 if (auto Index = getIndexOfExpression(Assignment->getLHS())) { 1384 1385 // We don't care what is written in the RHS, it could be whatever 1386 // we can interpret as 0. 1387 if (auto Constant = 1388 Assignment->getRHS()->IgnoreParenCasts()->getIntegerConstantExpr( 1389 AC.getASTContext())) { 1390 1391 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); 1392 1393 if (0 == *Constant && CurrentParamStatus.seenAnyCalls()) { 1394 // Even though this suppression mechanism is introduced to tackle 1395 // false positives for multiple calls, the fact that the user has 1396 // to use suppression can also tell us that we couldn't figure out 1397 // how different paths cancel each other out. And if that is true, 1398 // we will most certainly have false positives about parameters not 1399 // being called on certain paths. 1400 // 1401 // For this reason, we abandon tracking this parameter altogether. 1402 CurrentParamStatus = ParameterStatus::Reported; 1403 } 1404 } 1405 } 1406 } 1407 1408 public: 1409 //===----------------------------------------------------------------------===// 1410 // Tree traversal methods 1411 //===----------------------------------------------------------------------===// 1412 1413 void VisitCallExpr(const CallExpr *Call) { 1414 // This call might be a direct call, i.e. a parameter call... 1415 checkDirectCall(Call); 1416 // ... or an indirect call, i.e. when parameter is an argument. 1417 checkIndirectCall(Call); 1418 } 1419 1420 void VisitObjCMessageExpr(const ObjCMessageExpr *Message) { 1421 // The most common situation that we are defending against here is 1422 // copying a tracked parameter. 1423 if (const Expr *Receiver = Message->getInstanceReceiver()) { 1424 checkEscapee(Receiver); 1425 } 1426 // Message expressions unlike calls, could not be direct. 1427 checkIndirectCall(Message); 1428 } 1429 1430 void VisitBlockExpr(const BlockExpr *Block) { 1431 for (const auto &Capture : Block->getBlockDecl()->captures()) { 1432 // If a block captures a tracked parameter, it should be 1433 // considered escaped. 1434 // On one hand, blocks that do that should definitely call it on 1435 // every path. However, it is not guaranteed that the block 1436 // itself gets called whenever it gets created. 1437 // 1438 // Because we don't want to track blocks and whether they get called, 1439 // we consider such parameters simply escaped. 1440 if (const auto *Param = dyn_cast<ParmVarDecl>(Capture.getVariable())) { 1441 checkEscapee(*Param); 1442 } 1443 } 1444 } 1445 1446 void VisitBinaryOperator(const BinaryOperator *Op) { 1447 if (Op->getOpcode() == clang::BO_Assign) { 1448 // Let's check if one of the tracked parameters is assigned into 1449 // something, and if it is we don't want to track extra variables, so we 1450 // consider it as an escapee. 1451 checkEscapee(Op->getRHS()); 1452 1453 // Let's check whether this assignment is a suppression. 1454 checkSuppression(Op); 1455 } 1456 } 1457 1458 void VisitDeclStmt(const DeclStmt *DS) { 1459 // Variable initialization is not assignment and should be handled 1460 // separately. 1461 // 1462 // Multiple declarations can be a part of declaration statement. 1463 for (const auto *Declaration : DS->getDeclGroup()) { 1464 if (const auto *Var = dyn_cast<VarDecl>(Declaration)) { 1465 if (Var->getInit()) { 1466 checkEscapee(Var->getInit()); 1467 } 1468 } 1469 } 1470 } 1471 1472 void VisitCStyleCastExpr(const CStyleCastExpr *Cast) { 1473 // We consider '(void)parameter' as a manual no-op escape. 1474 // It should be used to explicitly tell the analysis that this parameter 1475 // is intentionally not called on this path. 1476 if (Cast->getType().getCanonicalType()->isVoidType()) { 1477 checkEscapee(Cast->getSubExpr()); 1478 } 1479 } 1480 1481 void VisitObjCAtThrowStmt(const ObjCAtThrowStmt *) { 1482 // It is OK not to call marked parameters on exceptional paths. 1483 markNoReturn(); 1484 } 1485 1486 private: 1487 unsigned size() const { return TrackedParams.size(); } 1488 1489 llvm::Optional<unsigned> getIndexOfCallee(const CallExpr *Call) const { 1490 return getIndexOfExpression(Call->getCallee()); 1491 } 1492 1493 llvm::Optional<unsigned> getIndexOfExpression(const Expr *E) const { 1494 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { 1495 return getIndex(*Parameter); 1496 } 1497 1498 return llvm::None; 1499 } 1500 1501 llvm::Optional<unsigned> getIndex(const ParmVarDecl &Parameter) const { 1502 // Expected number of parameters that we actually track is 1. 1503 // 1504 // Also, the maximum number of declared parameters could not be on a scale 1505 // of hundreds of thousands. 1506 // 1507 // In this setting, linear search seems reasonable and even performs better 1508 // than bisection. 1509 ParamSizedVector<const ParmVarDecl *>::const_iterator It = 1510 llvm::find(TrackedParams, &Parameter); 1511 1512 if (It != TrackedParams.end()) { 1513 return It - TrackedParams.begin(); 1514 } 1515 1516 return llvm::None; 1517 } 1518 1519 const ParmVarDecl *getParameter(unsigned Index) const { 1520 assert(Index < TrackedParams.size()); 1521 return TrackedParams[Index]; 1522 } 1523 1524 const CFG &FunctionCFG; 1525 AnalysisDeclContext &AC; 1526 CalledOnceCheckHandler &Handler; 1527 bool CheckConventionalParameters; 1528 // As of now, we turn this behavior off. So, we still are going to report 1529 // missing calls on paths that look like it was intentional. 1530 // Technically such reports are true positives, but they can make some users 1531 // grumpy because of the sheer number of warnings. 1532 // It can be turned back on if we decide that we want to have the other way 1533 // around. 1534 bool SuppressOnConventionalErrorPaths = false; 1535 1536 State CurrentState; 1537 ParamSizedVector<const ParmVarDecl *> TrackedParams; 1538 CFGSizedVector<State> States; 1539 }; 1540 1541 } // end anonymous namespace 1542 1543 namespace clang { 1544 void checkCalledOnceParameters(AnalysisDeclContext &AC, 1545 CalledOnceCheckHandler &Handler, 1546 bool CheckConventionalParameters) { 1547 CalledOnceChecker::check(AC, Handler, CheckConventionalParameters); 1548 } 1549 } // end namespace clang 1550