1 //===- CalledOnceCheck.cpp - Check 'called once' parameters ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Analysis/Analyses/CalledOnceCheck.h" 10 #include "clang/AST/Attr.h" 11 #include "clang/AST/Decl.h" 12 #include "clang/AST/DeclBase.h" 13 #include "clang/AST/Expr.h" 14 #include "clang/AST/ExprObjC.h" 15 #include "clang/AST/OperationKinds.h" 16 #include "clang/AST/ParentMap.h" 17 #include "clang/AST/RecursiveASTVisitor.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtObjC.h" 20 #include "clang/AST/StmtVisitor.h" 21 #include "clang/AST/Type.h" 22 #include "clang/Analysis/AnalysisDeclContext.h" 23 #include "clang/Analysis/CFG.h" 24 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" 25 #include "clang/Basic/Builtins.h" 26 #include "clang/Basic/IdentifierTable.h" 27 #include "clang/Basic/LLVM.h" 28 #include "llvm/ADT/BitVector.h" 29 #include "llvm/ADT/BitmaskEnum.h" 30 #include "llvm/ADT/Optional.h" 31 #include "llvm/ADT/PointerIntPair.h" 32 #include "llvm/ADT/STLExtras.h" 33 #include "llvm/ADT/Sequence.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/Compiler.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include <memory> 40 41 using namespace clang; 42 43 namespace { 44 static constexpr unsigned EXPECTED_MAX_NUMBER_OF_PARAMS = 2; 45 template <class T> 46 using ParamSizedVector = llvm::SmallVector<T, EXPECTED_MAX_NUMBER_OF_PARAMS>; 47 static constexpr unsigned EXPECTED_NUMBER_OF_BASIC_BLOCKS = 8; 48 template <class T> 49 using CFGSizedVector = llvm::SmallVector<T, EXPECTED_NUMBER_OF_BASIC_BLOCKS>; 50 constexpr llvm::StringLiteral CONVENTIONAL_NAMES[] = { 51 "completionHandler", "completion", "withCompletionHandler", 52 "withCompletion", "completionBlock", "withCompletionBlock", 53 "replyTo", "reply", "withReplyTo"}; 54 constexpr llvm::StringLiteral CONVENTIONAL_SUFFIXES[] = { 55 "WithCompletionHandler", "WithCompletion", "WithCompletionBlock", 56 "WithReplyTo", "WithReply"}; 57 constexpr llvm::StringLiteral CONVENTIONAL_CONDITIONS[] = { 58 "error", "cancel", "shouldCall", "done", "OK", "success"}; 59 60 class ParameterStatus { 61 public: 62 // Status kind is basically the main part of parameter's status. 63 // The kind represents our knowledge (so far) about a tracked parameter 64 // in the context of this analysis. 65 // 66 // Since we want to report on missing and extraneous calls, we need to 67 // track the fact whether paramater was called or not. This automatically 68 // decides two kinds: `NotCalled` and `Called`. 69 // 70 // One of the erroneous situations is the case when parameter is called only 71 // on some of the paths. We could've considered it `NotCalled`, but we want 72 // to report double call warnings even if these two calls are not guaranteed 73 // to happen in every execution. We also don't want to have it as `Called` 74 // because not calling tracked parameter on all of the paths is an error 75 // on its own. For these reasons, we need to have a separate kind, 76 // `MaybeCalled`, and change `Called` to `DefinitelyCalled` to avoid 77 // confusion. 78 // 79 // Two violations of calling parameter more than once and not calling it on 80 // every path are not, however, mutually exclusive. In situations where both 81 // violations take place, we prefer to report ONLY double call. It's always 82 // harder to pinpoint a bug that has arisen when a user neglects to take the 83 // right action (and therefore, no action is taken), than when a user takes 84 // the wrong action. And, in order to remember that we already reported 85 // a double call, we need another kind: `Reported`. 86 // 87 // Our analysis is intra-procedural and, while in the perfect world, 88 // developers only use tracked parameters to call them, in the real world, 89 // the picture might be different. Parameters can be stored in global 90 // variables or leaked into other functions that we know nothing about. 91 // We try to be lenient and trust users. Another kind `Escaped` reflects 92 // such situations. We don't know if it gets called there or not, but we 93 // should always think of `Escaped` as the best possible option. 94 // 95 // Some of the paths in the analyzed functions might end with a call 96 // to noreturn functions. Such paths are not required to have parameter 97 // calls and we want to track that. For the purposes of better diagnostics, 98 // we don't want to reuse `Escaped` and, thus, have another kind `NoReturn`. 99 // 100 // Additionally, we have `NotVisited` kind that tells us nothing about 101 // a tracked parameter, but is used for tracking analyzed (aka visited) 102 // basic blocks. 103 // 104 // If we consider `|` to be a JOIN operation of two kinds coming from 105 // two different paths, the following properties must hold: 106 // 107 // 1. for any Kind K: K | K == K 108 // Joining two identical kinds should result in the same kind. 109 // 110 // 2. for any Kind K: Reported | K == Reported 111 // Doesn't matter on which path it was reported, it still is. 112 // 113 // 3. for any Kind K: NoReturn | K == K 114 // We can totally ignore noreturn paths during merges. 115 // 116 // 4. DefinitelyCalled | NotCalled == MaybeCalled 117 // Called on one path, not called on another - that's simply 118 // a definition for MaybeCalled. 119 // 120 // 5. for any Kind K in [DefinitelyCalled, NotCalled, MaybeCalled]: 121 // Escaped | K == K 122 // Escaped mirrors other statuses after joins. 123 // Every situation, when we join any of the listed kinds K, 124 // is a violation. For this reason, in order to assume the 125 // best outcome for this escape, we consider it to be the 126 // same as the other path. 127 // 128 // 6. for any Kind K in [DefinitelyCalled, NotCalled]: 129 // MaybeCalled | K == MaybeCalled 130 // MaybeCalled should basically stay after almost every join. 131 enum Kind { 132 // No-return paths should be absolutely transparent for the analysis. 133 // 0x0 is the identity element for selected join operation (binary or). 134 NoReturn = 0x0, /* 0000 */ 135 // Escaped marks situations when marked parameter escaped into 136 // another function (so we can assume that it was possibly called there). 137 Escaped = 0x1, /* 0001 */ 138 // Parameter was definitely called once at this point. 139 DefinitelyCalled = 0x3, /* 0011 */ 140 // Kinds less or equal to NON_ERROR_STATUS are not considered errors. 141 NON_ERROR_STATUS = DefinitelyCalled, 142 // Parameter was not yet called. 143 NotCalled = 0x5, /* 0101 */ 144 // Parameter was not called at least on one path leading to this point, 145 // while there is also at least one path that it gets called. 146 MaybeCalled = 0x7, /* 0111 */ 147 // Parameter was not yet analyzed. 148 NotVisited = 0x8, /* 1000 */ 149 // We already reported a violation and stopped tracking calls for this 150 // parameter. 151 Reported = 0x15, /* 1111 */ 152 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported) 153 }; 154 155 constexpr ParameterStatus() = default; 156 /* implicit */ ParameterStatus(Kind K) : StatusKind(K) { 157 assert(!seenAnyCalls(K) && "Can't initialize status without a call"); 158 } 159 ParameterStatus(Kind K, const Expr *Call) : StatusKind(K), Call(Call) { 160 assert(seenAnyCalls(K) && "This kind is not supposed to have a call"); 161 } 162 163 const Expr &getCall() const { 164 assert(seenAnyCalls(getKind()) && "ParameterStatus doesn't have a call"); 165 return *Call; 166 } 167 static bool seenAnyCalls(Kind K) { 168 return (K & DefinitelyCalled) == DefinitelyCalled && K != Reported; 169 } 170 bool seenAnyCalls() const { return seenAnyCalls(getKind()); } 171 172 static bool isErrorStatus(Kind K) { return K > NON_ERROR_STATUS; } 173 bool isErrorStatus() const { return isErrorStatus(getKind()); } 174 175 Kind getKind() const { return StatusKind; } 176 177 void join(const ParameterStatus &Other) { 178 // If we have a pointer already, let's keep it. 179 // For the purposes of the analysis, it doesn't really matter 180 // which call we report. 181 // 182 // If we don't have a pointer, let's take whatever gets joined. 183 if (!Call) { 184 Call = Other.Call; 185 } 186 // Join kinds. 187 StatusKind |= Other.getKind(); 188 } 189 190 bool operator==(const ParameterStatus &Other) const { 191 // We compare only kinds, pointers on their own is only additional 192 // information. 193 return getKind() == Other.getKind(); 194 } 195 196 private: 197 // It would've been a perfect place to use llvm::PointerIntPair, but 198 // unfortunately NumLowBitsAvailable for clang::Expr had been reduced to 2. 199 Kind StatusKind = NotVisited; 200 const Expr *Call = nullptr; 201 }; 202 203 /// State aggregates statuses of all tracked parameters. 204 class State { 205 public: 206 State(unsigned Size, ParameterStatus::Kind K = ParameterStatus::NotVisited) 207 : ParamData(Size, K) {} 208 209 /// Return status of a parameter with the given index. 210 /// \{ 211 ParameterStatus &getStatusFor(unsigned Index) { return ParamData[Index]; } 212 const ParameterStatus &getStatusFor(unsigned Index) const { 213 return ParamData[Index]; 214 } 215 /// \} 216 217 /// Return true if parameter with the given index can be called. 218 bool seenAnyCalls(unsigned Index) const { 219 return getStatusFor(Index).seenAnyCalls(); 220 } 221 /// Return a reference that we consider a call. 222 /// 223 /// Should only be used for parameters that can be called. 224 const Expr &getCallFor(unsigned Index) const { 225 return getStatusFor(Index).getCall(); 226 } 227 /// Return status kind of parameter with the given index. 228 ParameterStatus::Kind getKindFor(unsigned Index) const { 229 return getStatusFor(Index).getKind(); 230 } 231 232 bool isVisited() const { 233 return llvm::all_of(ParamData, [](const ParameterStatus &S) { 234 return S.getKind() != ParameterStatus::NotVisited; 235 }); 236 } 237 238 // Join other state into the current state. 239 void join(const State &Other) { 240 assert(ParamData.size() == Other.ParamData.size() && 241 "Couldn't join statuses with different sizes"); 242 for (auto Pair : llvm::zip(ParamData, Other.ParamData)) { 243 std::get<0>(Pair).join(std::get<1>(Pair)); 244 } 245 } 246 247 using iterator = ParamSizedVector<ParameterStatus>::iterator; 248 using const_iterator = ParamSizedVector<ParameterStatus>::const_iterator; 249 250 iterator begin() { return ParamData.begin(); } 251 iterator end() { return ParamData.end(); } 252 253 const_iterator begin() const { return ParamData.begin(); } 254 const_iterator end() const { return ParamData.end(); } 255 256 bool operator==(const State &Other) const { 257 return ParamData == Other.ParamData; 258 } 259 260 private: 261 ParamSizedVector<ParameterStatus> ParamData; 262 }; 263 264 /// A simple class that finds DeclRefExpr in the given expression. 265 /// 266 /// However, we don't want to find ANY nested DeclRefExpr skipping whatever 267 /// expressions on our way. Only certain expressions considered "no-op" 268 /// for our task are indeed skipped. 269 class DeclRefFinder 270 : public ConstStmtVisitor<DeclRefFinder, const DeclRefExpr *> { 271 public: 272 /// Find a DeclRefExpr in the given expression. 273 /// 274 /// In its most basic form (ShouldRetrieveFromComparisons == false), 275 /// this function can be simply reduced to the following question: 276 /// 277 /// - If expression E is used as a function argument, could we say 278 /// that DeclRefExpr nested in E is used as an argument? 279 /// 280 /// According to this rule, we can say that parens, casts and dereferencing 281 /// (dereferencing only applied to function pointers, but this is our case) 282 /// can be skipped. 283 /// 284 /// When we should look into comparisons the question changes to: 285 /// 286 /// - If expression E is used as a condition, could we say that 287 /// DeclRefExpr is being checked? 288 /// 289 /// And even though, these are two different questions, they have quite a lot 290 /// in common. Actually, we can say that whatever expression answers 291 /// positively the first question also fits the second question as well. 292 /// 293 /// In addition, we skip binary operators == and !=, and unary opeartor !. 294 static const DeclRefExpr *find(const Expr *E, 295 bool ShouldRetrieveFromComparisons = false) { 296 return DeclRefFinder(ShouldRetrieveFromComparisons).Visit(E); 297 } 298 299 const DeclRefExpr *VisitDeclRefExpr(const DeclRefExpr *DR) { return DR; } 300 301 const DeclRefExpr *VisitUnaryOperator(const UnaryOperator *UO) { 302 switch (UO->getOpcode()) { 303 case UO_LNot: 304 // We care about logical not only if we care about comparisons. 305 if (!ShouldRetrieveFromComparisons) 306 return nullptr; 307 LLVM_FALLTHROUGH; 308 // Function pointer/references can be dereferenced before a call. 309 // That doesn't make it, however, any different from a regular call. 310 // For this reason, dereference operation is a "no-op". 311 case UO_Deref: 312 return Visit(UO->getSubExpr()); 313 default: 314 return nullptr; 315 } 316 } 317 318 const DeclRefExpr *VisitBinaryOperator(const BinaryOperator *BO) { 319 if (!ShouldRetrieveFromComparisons) 320 return nullptr; 321 322 switch (BO->getOpcode()) { 323 case BO_EQ: 324 case BO_NE: { 325 const DeclRefExpr *LHS = Visit(BO->getLHS()); 326 return LHS ? LHS : Visit(BO->getRHS()); 327 } 328 default: 329 return nullptr; 330 } 331 } 332 333 const DeclRefExpr *VisitOpaqueValueExpr(const OpaqueValueExpr *OVE) { 334 return Visit(OVE->getSourceExpr()); 335 } 336 337 const DeclRefExpr *VisitCallExpr(const CallExpr *CE) { 338 if (!ShouldRetrieveFromComparisons) 339 return nullptr; 340 341 // We want to see through some of the boolean builtin functions 342 // that we are likely to see in conditions. 343 switch (CE->getBuiltinCallee()) { 344 case Builtin::BI__builtin_expect: 345 case Builtin::BI__builtin_expect_with_probability: { 346 assert(CE->getNumArgs() >= 2); 347 348 const DeclRefExpr *Candidate = Visit(CE->getArg(0)); 349 return Candidate != nullptr ? Candidate : Visit(CE->getArg(1)); 350 } 351 352 case Builtin::BI__builtin_unpredictable: 353 return Visit(CE->getArg(0)); 354 355 default: 356 return nullptr; 357 } 358 } 359 360 const DeclRefExpr *VisitExpr(const Expr *E) { 361 // It is a fallback method that gets called whenever the actual type 362 // of the given expression is not covered. 363 // 364 // We first check if we have anything to skip. And then repeat the whole 365 // procedure for a nested expression instead. 366 const Expr *DeclutteredExpr = E->IgnoreParenCasts(); 367 return E != DeclutteredExpr ? Visit(DeclutteredExpr) : nullptr; 368 } 369 370 private: 371 DeclRefFinder(bool ShouldRetrieveFromComparisons) 372 : ShouldRetrieveFromComparisons(ShouldRetrieveFromComparisons) {} 373 374 bool ShouldRetrieveFromComparisons; 375 }; 376 377 const DeclRefExpr *findDeclRefExpr(const Expr *In, 378 bool ShouldRetrieveFromComparisons = false) { 379 return DeclRefFinder::find(In, ShouldRetrieveFromComparisons); 380 } 381 382 const ParmVarDecl * 383 findReferencedParmVarDecl(const Expr *In, 384 bool ShouldRetrieveFromComparisons = false) { 385 if (const DeclRefExpr *DR = 386 findDeclRefExpr(In, ShouldRetrieveFromComparisons)) { 387 return dyn_cast<ParmVarDecl>(DR->getDecl()); 388 } 389 390 return nullptr; 391 } 392 393 /// Return conditions expression of a statement if it has one. 394 const Expr *getCondition(const Stmt *S) { 395 if (!S) { 396 return nullptr; 397 } 398 399 if (const auto *If = dyn_cast<IfStmt>(S)) { 400 return If->getCond(); 401 } 402 if (const auto *Ternary = dyn_cast<AbstractConditionalOperator>(S)) { 403 return Ternary->getCond(); 404 } 405 406 return nullptr; 407 } 408 409 /// A small helper class that collects all named identifiers in the given 410 /// expression. It traverses it recursively, so names from deeper levels 411 /// of the AST will end up in the results. 412 /// Results might have duplicate names, if this is a problem, convert to 413 /// string sets afterwards. 414 class NamesCollector : public RecursiveASTVisitor<NamesCollector> { 415 public: 416 static constexpr unsigned EXPECTED_NUMBER_OF_NAMES = 5; 417 using NameCollection = 418 llvm::SmallVector<llvm::StringRef, EXPECTED_NUMBER_OF_NAMES>; 419 420 static NameCollection collect(const Expr *From) { 421 NamesCollector Impl; 422 Impl.TraverseStmt(const_cast<Expr *>(From)); 423 return Impl.Result; 424 } 425 426 bool VisitDeclRefExpr(const DeclRefExpr *E) { 427 Result.push_back(E->getDecl()->getName()); 428 return true; 429 } 430 431 bool VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr *E) { 432 llvm::StringRef Name; 433 434 if (E->isImplicitProperty()) { 435 ObjCMethodDecl *PropertyMethodDecl = nullptr; 436 if (E->isMessagingGetter()) { 437 PropertyMethodDecl = E->getImplicitPropertyGetter(); 438 } else { 439 PropertyMethodDecl = E->getImplicitPropertySetter(); 440 } 441 assert(PropertyMethodDecl && 442 "Implicit property must have associated declaration"); 443 Name = PropertyMethodDecl->getSelector().getNameForSlot(0); 444 } else { 445 assert(E->isExplicitProperty()); 446 Name = E->getExplicitProperty()->getName(); 447 } 448 449 Result.push_back(Name); 450 return true; 451 } 452 453 private: 454 NamesCollector() = default; 455 NameCollection Result; 456 }; 457 458 /// Check whether the given expression mentions any of conventional names. 459 bool mentionsAnyOfConventionalNames(const Expr *E) { 460 NamesCollector::NameCollection MentionedNames = NamesCollector::collect(E); 461 462 return llvm::any_of(MentionedNames, [](llvm::StringRef ConditionName) { 463 return llvm::any_of( 464 CONVENTIONAL_CONDITIONS, 465 [ConditionName](const llvm::StringLiteral &Conventional) { 466 return ConditionName.contains_lower(Conventional); 467 }); 468 }); 469 } 470 471 /// Clarification is a simple pair of a reason why parameter is not called 472 /// on every path and a statement to blame. 473 struct Clarification { 474 NeverCalledReason Reason; 475 const Stmt *Location; 476 }; 477 478 /// A helper class that can produce a clarification based on the given pair 479 /// of basic blocks. 480 class NotCalledClarifier 481 : public ConstStmtVisitor<NotCalledClarifier, 482 llvm::Optional<Clarification>> { 483 public: 484 /// The main entrypoint for the class, the function that tries to find the 485 /// clarification of how to explain which sub-path starts with a CFG edge 486 /// from Conditional to SuccWithoutCall. 487 /// 488 /// This means that this function has one precondition: 489 /// SuccWithoutCall should be a successor block for Conditional. 490 /// 491 /// Because clarification is not needed for non-trivial pairs of blocks 492 /// (i.e. SuccWithoutCall is not the only successor), it returns meaningful 493 /// results only for such cases. For this very reason, the parent basic 494 /// block, Conditional, is named that way, so it is clear what kind of 495 /// block is expected. 496 static llvm::Optional<Clarification> 497 clarify(const CFGBlock *Conditional, const CFGBlock *SuccWithoutCall) { 498 if (const Stmt *Terminator = Conditional->getTerminatorStmt()) { 499 return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator); 500 } 501 return llvm::None; 502 } 503 504 llvm::Optional<Clarification> VisitIfStmt(const IfStmt *If) { 505 return VisitBranchingBlock(If, NeverCalledReason::IfThen); 506 } 507 508 llvm::Optional<Clarification> 509 VisitAbstractConditionalOperator(const AbstractConditionalOperator *Ternary) { 510 return VisitBranchingBlock(Ternary, NeverCalledReason::IfThen); 511 } 512 513 llvm::Optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) { 514 const Stmt *CaseToBlame = SuccInQuestion->getLabel(); 515 if (!CaseToBlame) { 516 // If interesting basic block is not labeled, it means that this 517 // basic block does not represent any of the cases. 518 return Clarification{NeverCalledReason::SwitchSkipped, Switch}; 519 } 520 521 for (const SwitchCase *Case = Switch->getSwitchCaseList(); Case; 522 Case = Case->getNextSwitchCase()) { 523 if (Case == CaseToBlame) { 524 return Clarification{NeverCalledReason::Switch, Case}; 525 } 526 } 527 528 llvm_unreachable("Found unexpected switch structure"); 529 } 530 531 llvm::Optional<Clarification> VisitForStmt(const ForStmt *For) { 532 return VisitBranchingBlock(For, NeverCalledReason::LoopEntered); 533 } 534 535 llvm::Optional<Clarification> VisitWhileStmt(const WhileStmt *While) { 536 return VisitBranchingBlock(While, NeverCalledReason::LoopEntered); 537 } 538 539 llvm::Optional<Clarification> 540 VisitBranchingBlock(const Stmt *Terminator, NeverCalledReason DefaultReason) { 541 assert(Parent->succ_size() == 2 && 542 "Branching block should have exactly two successors"); 543 unsigned SuccessorIndex = getSuccessorIndex(Parent, SuccInQuestion); 544 NeverCalledReason ActualReason = 545 updateForSuccessor(DefaultReason, SuccessorIndex); 546 return Clarification{ActualReason, Terminator}; 547 } 548 549 llvm::Optional<Clarification> VisitBinaryOperator(const BinaryOperator *) { 550 // We don't want to report on short-curcuit logical operations. 551 return llvm::None; 552 } 553 554 llvm::Optional<Clarification> VisitStmt(const Stmt *Terminator) { 555 // If we got here, we didn't have a visit function for more derived 556 // classes of statement that this terminator actually belongs to. 557 // 558 // This is not a good scenario and should not happen in practice, but 559 // at least we'll warn the user. 560 return Clarification{NeverCalledReason::FallbackReason, Terminator}; 561 } 562 563 static unsigned getSuccessorIndex(const CFGBlock *Parent, 564 const CFGBlock *Child) { 565 CFGBlock::const_succ_iterator It = llvm::find(Parent->succs(), Child); 566 assert(It != Parent->succ_end() && 567 "Given blocks should be in parent-child relationship"); 568 return It - Parent->succ_begin(); 569 } 570 571 static NeverCalledReason 572 updateForSuccessor(NeverCalledReason ReasonForTrueBranch, 573 unsigned SuccessorIndex) { 574 assert(SuccessorIndex <= 1); 575 unsigned RawReason = 576 static_cast<unsigned>(ReasonForTrueBranch) + SuccessorIndex; 577 assert(RawReason <= 578 static_cast<unsigned>(NeverCalledReason::LARGEST_VALUE)); 579 return static_cast<NeverCalledReason>(RawReason); 580 } 581 582 private: 583 NotCalledClarifier(const CFGBlock *Parent, const CFGBlock *SuccInQuestion) 584 : Parent(Parent), SuccInQuestion(SuccInQuestion) {} 585 586 const CFGBlock *Parent, *SuccInQuestion; 587 }; 588 589 class CalledOnceChecker : public ConstStmtVisitor<CalledOnceChecker> { 590 public: 591 static void check(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, 592 bool CheckConventionalParameters) { 593 CalledOnceChecker(AC, Handler, CheckConventionalParameters).check(); 594 } 595 596 private: 597 CalledOnceChecker(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, 598 bool CheckConventionalParameters) 599 : FunctionCFG(*AC.getCFG()), AC(AC), Handler(Handler), 600 CheckConventionalParameters(CheckConventionalParameters), 601 CurrentState(0) { 602 initDataStructures(); 603 assert((size() == 0 || !States.empty()) && 604 "Data structures are inconsistent"); 605 } 606 607 //===----------------------------------------------------------------------===// 608 // Initializing functions 609 //===----------------------------------------------------------------------===// 610 611 void initDataStructures() { 612 const Decl *AnalyzedDecl = AC.getDecl(); 613 614 if (const auto *Function = dyn_cast<FunctionDecl>(AnalyzedDecl)) { 615 findParamsToTrack(Function); 616 } else if (const auto *Method = dyn_cast<ObjCMethodDecl>(AnalyzedDecl)) { 617 findParamsToTrack(Method); 618 } else if (const auto *Block = dyn_cast<BlockDecl>(AnalyzedDecl)) { 619 findCapturesToTrack(Block); 620 findParamsToTrack(Block); 621 } 622 623 // Have something to track, let's init states for every block from the CFG. 624 if (size() != 0) { 625 States = 626 CFGSizedVector<State>(FunctionCFG.getNumBlockIDs(), State(size())); 627 } 628 } 629 630 void findCapturesToTrack(const BlockDecl *Block) { 631 for (const auto &Capture : Block->captures()) { 632 if (const auto *P = dyn_cast<ParmVarDecl>(Capture.getVariable())) { 633 // Parameter DeclContext is its owning function or method. 634 const DeclContext *ParamContext = P->getDeclContext(); 635 if (shouldBeCalledOnce(ParamContext, P)) { 636 TrackedParams.push_back(P); 637 } 638 } 639 } 640 } 641 642 template <class FunctionLikeDecl> 643 void findParamsToTrack(const FunctionLikeDecl *Function) { 644 for (unsigned Index : llvm::seq<unsigned>(0u, Function->param_size())) { 645 if (shouldBeCalledOnce(Function, Index)) { 646 TrackedParams.push_back(Function->getParamDecl(Index)); 647 } 648 } 649 } 650 651 //===----------------------------------------------------------------------===// 652 // Main logic 'check' functions 653 //===----------------------------------------------------------------------===// 654 655 void check() { 656 // Nothing to check here: we don't have marked parameters. 657 if (size() == 0 || isPossiblyEmptyImpl()) 658 return; 659 660 assert( 661 llvm::none_of(States, [](const State &S) { return S.isVisited(); }) && 662 "None of the blocks should be 'visited' before the analysis"); 663 664 // For our task, both backward and forward approaches suite well. 665 // However, in order to report better diagnostics, we decided to go with 666 // backward analysis. 667 // 668 // Let's consider the following CFG and how forward and backward analyses 669 // will work for it. 670 // 671 // FORWARD: | BACKWARD: 672 // #1 | #1 673 // +---------+ | +-----------+ 674 // | if | | |MaybeCalled| 675 // +---------+ | +-----------+ 676 // |NotCalled| | | if | 677 // +---------+ | +-----------+ 678 // / \ | / \ 679 // #2 / \ #3 | #2 / \ #3 680 // +----------------+ +---------+ | +----------------+ +---------+ 681 // | foo() | | ... | | |DefinitelyCalled| |NotCalled| 682 // +----------------+ +---------+ | +----------------+ +---------+ 683 // |DefinitelyCalled| |NotCalled| | | foo() | | ... | 684 // +----------------+ +---------+ | +----------------+ +---------+ 685 // \ / | \ / 686 // \ #4 / | \ #4 / 687 // +-----------+ | +---------+ 688 // | ... | | |NotCalled| 689 // +-----------+ | +---------+ 690 // |MaybeCalled| | | ... | 691 // +-----------+ | +---------+ 692 // 693 // The most natural way to report lacking call in the block #3 would be to 694 // message that the false branch of the if statement in the block #1 doesn't 695 // have a call. And while with the forward approach we'll need to find a 696 // least common ancestor or something like that to find the 'if' to blame, 697 // backward analysis gives it to us out of the box. 698 BackwardDataflowWorklist Worklist(FunctionCFG, AC); 699 700 // Let's visit EXIT. 701 const CFGBlock *Exit = &FunctionCFG.getExit(); 702 assignState(Exit, State(size(), ParameterStatus::NotCalled)); 703 Worklist.enqueuePredecessors(Exit); 704 705 while (const CFGBlock *BB = Worklist.dequeue()) { 706 assert(BB && "Worklist should filter out null blocks"); 707 check(BB); 708 assert(CurrentState.isVisited() && 709 "After the check, basic block should be visited"); 710 711 // Traverse successor basic blocks if the status of this block 712 // has changed. 713 if (assignState(BB, CurrentState)) { 714 Worklist.enqueuePredecessors(BB); 715 } 716 } 717 718 // Check that we have all tracked parameters at the last block. 719 // As we are performing a backward version of the analysis, 720 // it should be the ENTRY block. 721 checkEntry(&FunctionCFG.getEntry()); 722 } 723 724 void check(const CFGBlock *BB) { 725 // We start with a state 'inherited' from all the successors. 726 CurrentState = joinSuccessors(BB); 727 assert(CurrentState.isVisited() && 728 "Shouldn't start with a 'not visited' state"); 729 730 // This is the 'exit' situation, broken promises are probably OK 731 // in such scenarios. 732 if (BB->hasNoReturnElement()) { 733 markNoReturn(); 734 // This block still can have calls (even multiple calls) and 735 // for this reason there is no early return here. 736 } 737 738 // We use a backward dataflow propagation and for this reason we 739 // should traverse basic blocks bottom-up. 740 for (const CFGElement &Element : llvm::reverse(*BB)) { 741 if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) { 742 check(S->getStmt()); 743 } 744 } 745 } 746 void check(const Stmt *S) { Visit(S); } 747 748 void checkEntry(const CFGBlock *Entry) { 749 // We finalize this algorithm with the ENTRY block because 750 // we use a backward version of the analysis. This is where 751 // we can judge that some of the tracked parameters are not called on 752 // every path from ENTRY to EXIT. 753 754 const State &EntryStatus = getState(Entry); 755 llvm::BitVector NotCalledOnEveryPath(size(), false); 756 llvm::BitVector NotUsedOnEveryPath(size(), false); 757 758 // Check if there are no calls of the marked parameter at all 759 for (const auto &IndexedStatus : llvm::enumerate(EntryStatus)) { 760 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); 761 762 switch (IndexedStatus.value().getKind()) { 763 case ParameterStatus::NotCalled: 764 // If there were places where this parameter escapes (aka being used), 765 // we can provide a more useful diagnostic by pointing at the exact 766 // branches where it is not even mentioned. 767 if (!hasEverEscaped(IndexedStatus.index())) { 768 // This parameter is was not used at all, so we should report the 769 // most generic version of the warning. 770 if (isCaptured(Parameter)) { 771 // We want to specify that it was captured by the block. 772 Handler.handleCapturedNeverCalled(Parameter, AC.getDecl(), 773 !isExplicitlyMarked(Parameter)); 774 } else { 775 Handler.handleNeverCalled(Parameter, 776 !isExplicitlyMarked(Parameter)); 777 } 778 } else { 779 // Mark it as 'interesting' to figure out which paths don't even 780 // have escapes. 781 NotUsedOnEveryPath[IndexedStatus.index()] = true; 782 } 783 784 break; 785 case ParameterStatus::MaybeCalled: 786 // If we have 'maybe called' at this point, we have an error 787 // that there is at least one path where this parameter 788 // is not called. 789 // 790 // However, reporting the warning with only that information can be 791 // too vague for the users. For this reason, we mark such parameters 792 // as "interesting" for further analysis. 793 NotCalledOnEveryPath[IndexedStatus.index()] = true; 794 break; 795 default: 796 break; 797 } 798 } 799 800 // Early exit if we don't have parameters for extra analysis. 801 if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none()) 802 return; 803 804 // We are looking for a pair of blocks A, B so that the following is true: 805 // * A is a predecessor of B 806 // * B is marked as NotCalled 807 // * A has at least one successor marked as either 808 // Escaped or DefinitelyCalled 809 // 810 // In that situation, it is guaranteed that B is the first block of the path 811 // where the user doesn't call or use parameter in question. 812 // 813 // For this reason, branch A -> B can be used for reporting. 814 // 815 // This part of the algorithm is guarded by a condition that the function 816 // does indeed have a violation of contract. For this reason, we can 817 // spend more time to find a good spot to place the warning. 818 // 819 // The following algorithm has the worst case complexity of O(V + E), 820 // where V is the number of basic blocks in FunctionCFG, 821 // E is the number of edges between blocks in FunctionCFG. 822 for (const CFGBlock *BB : FunctionCFG) { 823 if (!BB) 824 continue; 825 826 const State &BlockState = getState(BB); 827 828 for (unsigned Index : llvm::seq(0u, size())) { 829 // We don't want to use 'isLosingCall' here because we want to report 830 // the following situation as well: 831 // 832 // MaybeCalled 833 // | ... | 834 // MaybeCalled NotCalled 835 // 836 // Even though successor is not 'DefinitelyCalled', it is still useful 837 // to report it, it is still a path without a call. 838 if (NotCalledOnEveryPath[Index] && 839 BlockState.getKindFor(Index) == ParameterStatus::MaybeCalled) { 840 841 findAndReportNotCalledBranches(BB, Index); 842 } else if (NotUsedOnEveryPath[Index] && 843 isLosingEscape(BlockState, BB, Index)) { 844 845 findAndReportNotCalledBranches(BB, Index, /* IsEscape = */ true); 846 } 847 } 848 } 849 } 850 851 /// Check potential call of a tracked parameter. 852 void checkDirectCall(const CallExpr *Call) { 853 if (auto Index = getIndexOfCallee(Call)) { 854 processCallFor(*Index, Call); 855 } 856 } 857 858 /// Check the call expression for being an indirect call of one of the tracked 859 /// parameters. It is indirect in the sense that this particular call is not 860 /// calling the parameter itself, but rather uses it as the argument. 861 template <class CallLikeExpr> 862 void checkIndirectCall(const CallLikeExpr *CallOrMessage) { 863 // CallExpr::arguments does not interact nicely with llvm::enumerate. 864 llvm::ArrayRef<const Expr *> Arguments = llvm::makeArrayRef( 865 CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); 866 867 // Let's check if any of the call arguments is a point of interest. 868 for (const auto &Argument : llvm::enumerate(Arguments)) { 869 if (auto Index = getIndexOfExpression(Argument.value())) { 870 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); 871 872 if (shouldBeCalledOnce(CallOrMessage, Argument.index())) { 873 // If the corresponding parameter is marked as 'called_once' we should 874 // consider it as a call. 875 processCallFor(*Index, CallOrMessage); 876 } else if (CurrentParamStatus.getKind() == ParameterStatus::NotCalled) { 877 // Otherwise, we mark this parameter as escaped, which can be 878 // interpreted both as called or not called depending on the context. 879 CurrentParamStatus = ParameterStatus::Escaped; 880 } 881 // Otherwise, let's keep the state as it is. 882 } 883 } 884 } 885 886 /// Process call of the parameter with the given index 887 void processCallFor(unsigned Index, const Expr *Call) { 888 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); 889 890 if (CurrentParamStatus.seenAnyCalls()) { 891 892 // At this point, this parameter was called, so this is a second call. 893 const ParmVarDecl *Parameter = getParameter(Index); 894 Handler.handleDoubleCall( 895 Parameter, &CurrentState.getCallFor(Index), Call, 896 !isExplicitlyMarked(Parameter), 897 // We are sure that the second call is definitely 898 // going to happen if the status is 'DefinitelyCalled'. 899 CurrentParamStatus.getKind() == ParameterStatus::DefinitelyCalled); 900 901 // Mark this parameter as already reported on, so we don't repeat 902 // warnings. 903 CurrentParamStatus = ParameterStatus::Reported; 904 905 } else if (CurrentParamStatus.getKind() != ParameterStatus::Reported) { 906 // If we didn't report anything yet, let's mark this parameter 907 // as called. 908 ParameterStatus Called(ParameterStatus::DefinitelyCalled, Call); 909 CurrentParamStatus = Called; 910 } 911 } 912 913 void findAndReportNotCalledBranches(const CFGBlock *Parent, unsigned Index, 914 bool IsEscape = false) { 915 for (const CFGBlock *Succ : Parent->succs()) { 916 if (!Succ) 917 continue; 918 919 if (getState(Succ).getKindFor(Index) == ParameterStatus::NotCalled) { 920 assert(Parent->succ_size() >= 2 && 921 "Block should have at least two successors at this point"); 922 if (auto Clarification = NotCalledClarifier::clarify(Parent, Succ)) { 923 const ParmVarDecl *Parameter = getParameter(Index); 924 Handler.handleNeverCalled(Parameter, Clarification->Location, 925 Clarification->Reason, !IsEscape, 926 !isExplicitlyMarked(Parameter)); 927 } 928 } 929 } 930 } 931 932 //===----------------------------------------------------------------------===// 933 // Predicate functions to check parameters 934 //===----------------------------------------------------------------------===// 935 936 /// Return true if parameter is explicitly marked as 'called_once'. 937 static bool isExplicitlyMarked(const ParmVarDecl *Parameter) { 938 return Parameter->hasAttr<CalledOnceAttr>(); 939 } 940 941 /// Return true if the given name matches conventional pattens. 942 static bool isConventional(llvm::StringRef Name) { 943 return llvm::count(CONVENTIONAL_NAMES, Name) != 0; 944 } 945 946 /// Return true if the given name has conventional suffixes. 947 static bool hasConventionalSuffix(llvm::StringRef Name) { 948 return llvm::any_of(CONVENTIONAL_SUFFIXES, [Name](llvm::StringRef Suffix) { 949 return Name.endswith(Suffix); 950 }); 951 } 952 953 /// Return true if the given type can be used for conventional parameters. 954 static bool isConventional(QualType Ty) { 955 if (!Ty->isBlockPointerType()) { 956 return false; 957 } 958 959 QualType BlockType = Ty->getAs<BlockPointerType>()->getPointeeType(); 960 // Completion handlers should have a block type with void return type. 961 return BlockType->getAs<FunctionType>()->getReturnType()->isVoidType(); 962 } 963 964 /// Return true if the only parameter of the function is conventional. 965 static bool isOnlyParameterConventional(const FunctionDecl *Function) { 966 IdentifierInfo *II = Function->getIdentifier(); 967 return Function->getNumParams() == 1 && II && 968 hasConventionalSuffix(II->getName()); 969 } 970 971 /// Return true/false if 'swift_async' attribute states that the given 972 /// parameter is conventionally called once. 973 /// Return llvm::None if the given declaration doesn't have 'swift_async' 974 /// attribute. 975 static llvm::Optional<bool> isConventionalSwiftAsync(const Decl *D, 976 unsigned ParamIndex) { 977 if (const SwiftAsyncAttr *A = D->getAttr<SwiftAsyncAttr>()) { 978 if (A->getKind() == SwiftAsyncAttr::None) { 979 return false; 980 } 981 982 return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex; 983 } 984 return llvm::None; 985 } 986 987 /// Return true if the specified selector piece matches conventions. 988 static bool isConventionalSelectorPiece(Selector MethodSelector, 989 unsigned PieceIndex, 990 QualType PieceType) { 991 if (!isConventional(PieceType)) { 992 return false; 993 } 994 995 if (MethodSelector.getNumArgs() == 1) { 996 assert(PieceIndex == 0); 997 return hasConventionalSuffix(MethodSelector.getNameForSlot(0)); 998 } 999 1000 llvm::StringRef PieceName = MethodSelector.getNameForSlot(PieceIndex); 1001 return isConventional(PieceName) || hasConventionalSuffix(PieceName); 1002 } 1003 1004 bool shouldBeCalledOnce(const ParmVarDecl *Parameter) const { 1005 return isExplicitlyMarked(Parameter) || 1006 (CheckConventionalParameters && 1007 (isConventional(Parameter->getName()) || 1008 hasConventionalSuffix(Parameter->getName())) && 1009 isConventional(Parameter->getType())); 1010 } 1011 1012 bool shouldBeCalledOnce(const DeclContext *ParamContext, 1013 const ParmVarDecl *Param) { 1014 unsigned ParamIndex = Param->getFunctionScopeIndex(); 1015 if (const auto *Function = dyn_cast<FunctionDecl>(ParamContext)) { 1016 return shouldBeCalledOnce(Function, ParamIndex); 1017 } 1018 if (const auto *Method = dyn_cast<ObjCMethodDecl>(ParamContext)) { 1019 return shouldBeCalledOnce(Method, ParamIndex); 1020 } 1021 return shouldBeCalledOnce(Param); 1022 } 1023 1024 bool shouldBeCalledOnce(const BlockDecl *Block, unsigned ParamIndex) const { 1025 return shouldBeCalledOnce(Block->getParamDecl(ParamIndex)); 1026 } 1027 1028 bool shouldBeCalledOnce(const FunctionDecl *Function, 1029 unsigned ParamIndex) const { 1030 if (ParamIndex >= Function->getNumParams()) { 1031 return false; 1032 } 1033 // 'swift_async' goes first and overrides anything else. 1034 if (auto ConventionalAsync = 1035 isConventionalSwiftAsync(Function, ParamIndex)) { 1036 return ConventionalAsync.getValue(); 1037 } 1038 1039 return shouldBeCalledOnce(Function->getParamDecl(ParamIndex)) || 1040 (CheckConventionalParameters && 1041 isOnlyParameterConventional(Function)); 1042 } 1043 1044 bool shouldBeCalledOnce(const ObjCMethodDecl *Method, 1045 unsigned ParamIndex) const { 1046 Selector MethodSelector = Method->getSelector(); 1047 if (ParamIndex >= MethodSelector.getNumArgs()) { 1048 return false; 1049 } 1050 1051 // 'swift_async' goes first and overrides anything else. 1052 if (auto ConventionalAsync = isConventionalSwiftAsync(Method, ParamIndex)) { 1053 return ConventionalAsync.getValue(); 1054 } 1055 1056 const ParmVarDecl *Parameter = Method->getParamDecl(ParamIndex); 1057 return shouldBeCalledOnce(Parameter) || 1058 (CheckConventionalParameters && 1059 isConventionalSelectorPiece(MethodSelector, ParamIndex, 1060 Parameter->getType())); 1061 } 1062 1063 bool shouldBeCalledOnce(const CallExpr *Call, unsigned ParamIndex) const { 1064 const FunctionDecl *Function = Call->getDirectCallee(); 1065 return Function && shouldBeCalledOnce(Function, ParamIndex); 1066 } 1067 1068 bool shouldBeCalledOnce(const ObjCMessageExpr *Message, 1069 unsigned ParamIndex) const { 1070 const ObjCMethodDecl *Method = Message->getMethodDecl(); 1071 return Method && ParamIndex < Method->param_size() && 1072 shouldBeCalledOnce(Method, ParamIndex); 1073 } 1074 1075 //===----------------------------------------------------------------------===// 1076 // Utility methods 1077 //===----------------------------------------------------------------------===// 1078 1079 bool isCaptured(const ParmVarDecl *Parameter) const { 1080 if (const BlockDecl *Block = dyn_cast<BlockDecl>(AC.getDecl())) { 1081 return Block->capturesVariable(Parameter); 1082 } 1083 return false; 1084 } 1085 1086 /// Return true if the analyzed function is actually a default implementation 1087 /// of the method that has to be overriden. 1088 /// 1089 /// These functions can have tracked parameters, but wouldn't call them 1090 /// because they are not designed to perform any meaningful actions. 1091 /// 1092 /// There are a couple of flavors of such default implementations: 1093 /// 1. Empty methods or methods with a single return statement 1094 /// 2. Methods that have one block with a call to no return function 1095 /// 3. Methods with only assertion-like operations 1096 bool isPossiblyEmptyImpl() const { 1097 if (!isa<ObjCMethodDecl>(AC.getDecl())) { 1098 // We care only about functions that are not supposed to be called. 1099 // Only methods can be overriden. 1100 return false; 1101 } 1102 1103 // Case #1 (without return statements) 1104 if (FunctionCFG.size() == 2) { 1105 // Method has only two blocks: ENTRY and EXIT. 1106 // This is equivalent to empty function. 1107 return true; 1108 } 1109 1110 // Case #2 1111 if (FunctionCFG.size() == 3) { 1112 const CFGBlock &Entry = FunctionCFG.getEntry(); 1113 if (Entry.succ_empty()) { 1114 return false; 1115 } 1116 1117 const CFGBlock *OnlyBlock = *Entry.succ_begin(); 1118 // Method has only one block, let's see if it has a no-return 1119 // element. 1120 if (OnlyBlock && OnlyBlock->hasNoReturnElement()) { 1121 return true; 1122 } 1123 // Fallthrough, CFGs with only one block can fall into #1 and #3 as well. 1124 } 1125 1126 // Cases #1 (return statements) and #3. 1127 // 1128 // It is hard to detect that something is an assertion or came 1129 // from assertion. Here we use a simple heuristic: 1130 // 1131 // - If it came from a macro, it can be an assertion. 1132 // 1133 // Additionally, we can't assume a number of basic blocks or the CFG's 1134 // structure because assertions might include loops and conditions. 1135 return llvm::all_of(FunctionCFG, [](const CFGBlock *BB) { 1136 if (!BB) { 1137 // Unreachable blocks are totally fine. 1138 return true; 1139 } 1140 1141 // Return statements can have sub-expressions that are represented as 1142 // separate statements of a basic block. We should allow this. 1143 // This parent map will be initialized with a parent tree for all 1144 // subexpressions of the block's return statement (if it has one). 1145 std::unique_ptr<ParentMap> ReturnChildren; 1146 1147 return llvm::all_of( 1148 llvm::reverse(*BB), // we should start with return statements, if we 1149 // have any, i.e. from the bottom of the block 1150 [&ReturnChildren](const CFGElement &Element) { 1151 if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) { 1152 const Stmt *SuspiciousStmt = S->getStmt(); 1153 1154 if (isa<ReturnStmt>(SuspiciousStmt)) { 1155 // Let's initialize this structure to test whether 1156 // some further statement is a part of this return. 1157 ReturnChildren = std::make_unique<ParentMap>( 1158 const_cast<Stmt *>(SuspiciousStmt)); 1159 // Return statements are allowed as part of #1. 1160 return true; 1161 } 1162 1163 return SuspiciousStmt->getBeginLoc().isMacroID() || 1164 (ReturnChildren && 1165 ReturnChildren->hasParent(SuspiciousStmt)); 1166 } 1167 return true; 1168 }); 1169 }); 1170 } 1171 1172 /// Check if parameter with the given index has ever escaped. 1173 bool hasEverEscaped(unsigned Index) const { 1174 return llvm::any_of(States, [Index](const State &StateForOneBB) { 1175 return StateForOneBB.getKindFor(Index) == ParameterStatus::Escaped; 1176 }); 1177 } 1178 1179 /// Return status stored for the given basic block. 1180 /// \{ 1181 State &getState(const CFGBlock *BB) { 1182 assert(BB); 1183 return States[BB->getBlockID()]; 1184 } 1185 const State &getState(const CFGBlock *BB) const { 1186 assert(BB); 1187 return States[BB->getBlockID()]; 1188 } 1189 /// \} 1190 1191 /// Assign status to the given basic block. 1192 /// 1193 /// Returns true when the stored status changed. 1194 bool assignState(const CFGBlock *BB, const State &ToAssign) { 1195 State &Current = getState(BB); 1196 if (Current == ToAssign) { 1197 return false; 1198 } 1199 1200 Current = ToAssign; 1201 return true; 1202 } 1203 1204 /// Join all incoming statuses for the given basic block. 1205 State joinSuccessors(const CFGBlock *BB) const { 1206 auto Succs = 1207 llvm::make_filter_range(BB->succs(), [this](const CFGBlock *Succ) { 1208 return Succ && this->getState(Succ).isVisited(); 1209 }); 1210 // We came to this block from somewhere after all. 1211 assert(!Succs.empty() && 1212 "Basic block should have at least one visited successor"); 1213 1214 State Result = getState(*Succs.begin()); 1215 1216 for (const CFGBlock *Succ : llvm::drop_begin(Succs, 1)) { 1217 Result.join(getState(Succ)); 1218 } 1219 1220 if (const Expr *Condition = getCondition(BB->getTerminatorStmt())) { 1221 handleConditional(BB, Condition, Result); 1222 } 1223 1224 return Result; 1225 } 1226 1227 void handleConditional(const CFGBlock *BB, const Expr *Condition, 1228 State &ToAlter) const { 1229 handleParameterCheck(BB, Condition, ToAlter); 1230 if (SuppressOnConventionalErrorPaths) { 1231 handleConventionalCheck(BB, Condition, ToAlter); 1232 } 1233 } 1234 1235 void handleParameterCheck(const CFGBlock *BB, const Expr *Condition, 1236 State &ToAlter) const { 1237 // In this function, we try to deal with the following pattern: 1238 // 1239 // if (parameter) 1240 // parameter(...); 1241 // 1242 // It's not good to show a warning here because clearly 'parameter' 1243 // couldn't and shouldn't be called on the 'else' path. 1244 // 1245 // Let's check if this if statement has a check involving one of 1246 // the tracked parameters. 1247 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl( 1248 Condition, 1249 /* ShouldRetrieveFromComparisons = */ true)) { 1250 if (const auto Index = getIndex(*Parameter)) { 1251 ParameterStatus &CurrentStatus = ToAlter.getStatusFor(*Index); 1252 1253 // We don't want to deep dive into semantics of the check and 1254 // figure out if that check was for null or something else. 1255 // We simply trust the user that they know what they are doing. 1256 // 1257 // For this reason, in the following loop we look for the 1258 // best-looking option. 1259 for (const CFGBlock *Succ : BB->succs()) { 1260 if (!Succ) 1261 continue; 1262 1263 const ParameterStatus &StatusInSucc = 1264 getState(Succ).getStatusFor(*Index); 1265 1266 if (StatusInSucc.isErrorStatus()) { 1267 continue; 1268 } 1269 1270 // Let's use this status instead. 1271 CurrentStatus = StatusInSucc; 1272 1273 if (StatusInSucc.getKind() == ParameterStatus::DefinitelyCalled) { 1274 // This is the best option to have and we already found it. 1275 break; 1276 } 1277 1278 // If we found 'Escaped' first, we still might find 'DefinitelyCalled' 1279 // on the other branch. And we prefer the latter. 1280 } 1281 } 1282 } 1283 } 1284 1285 void handleConventionalCheck(const CFGBlock *BB, const Expr *Condition, 1286 State &ToAlter) const { 1287 // Even when the analysis is technically correct, it is a widespread pattern 1288 // not to call completion handlers in some scenarios. These usually have 1289 // typical conditional names, such as 'error' or 'cancel'. 1290 if (!mentionsAnyOfConventionalNames(Condition)) { 1291 return; 1292 } 1293 1294 for (const auto &IndexedStatus : llvm::enumerate(ToAlter)) { 1295 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); 1296 // Conventions do not apply to explicitly marked parameters. 1297 if (isExplicitlyMarked(Parameter)) { 1298 continue; 1299 } 1300 1301 ParameterStatus &CurrentStatus = IndexedStatus.value(); 1302 // If we did find that on one of the branches the user uses the callback 1303 // and doesn't on the other path, we believe that they know what they are 1304 // doing and trust them. 1305 // 1306 // There are two possible scenarios for that: 1307 // 1. Current status is 'MaybeCalled' and one of the branches is 1308 // 'DefinitelyCalled' 1309 // 2. Current status is 'NotCalled' and one of the branches is 'Escaped' 1310 if (isLosingCall(ToAlter, BB, IndexedStatus.index()) || 1311 isLosingEscape(ToAlter, BB, IndexedStatus.index())) { 1312 CurrentStatus = ParameterStatus::Escaped; 1313 } 1314 } 1315 } 1316 1317 bool isLosingCall(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1318 unsigned ParameterIndex) const { 1319 // Let's check if the block represents DefinitelyCalled -> MaybeCalled 1320 // transition. 1321 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, 1322 ParameterStatus::MaybeCalled, 1323 ParameterStatus::DefinitelyCalled); 1324 } 1325 1326 bool isLosingEscape(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1327 unsigned ParameterIndex) const { 1328 // Let's check if the block represents Escaped -> NotCalled transition. 1329 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, 1330 ParameterStatus::NotCalled, ParameterStatus::Escaped); 1331 } 1332 1333 bool isLosingJoin(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1334 unsigned ParameterIndex, ParameterStatus::Kind AfterJoin, 1335 ParameterStatus::Kind BeforeJoin) const { 1336 assert(!ParameterStatus::isErrorStatus(BeforeJoin) && 1337 ParameterStatus::isErrorStatus(AfterJoin) && 1338 "It's not a losing join if statuses do not represent " 1339 "correct-to-error transition"); 1340 1341 const ParameterStatus &CurrentStatus = 1342 StateAfterJoin.getStatusFor(ParameterIndex); 1343 1344 return CurrentStatus.getKind() == AfterJoin && 1345 anySuccessorHasStatus(JoinBlock, ParameterIndex, BeforeJoin); 1346 } 1347 1348 /// Return true if any of the successors of the given basic block has 1349 /// a specified status for the given parameter. 1350 bool anySuccessorHasStatus(const CFGBlock *Parent, unsigned ParameterIndex, 1351 ParameterStatus::Kind ToFind) const { 1352 return llvm::any_of( 1353 Parent->succs(), [this, ParameterIndex, ToFind](const CFGBlock *Succ) { 1354 return Succ && getState(Succ).getKindFor(ParameterIndex) == ToFind; 1355 }); 1356 } 1357 1358 /// Check given expression that was discovered to escape. 1359 void checkEscapee(const Expr *E) { 1360 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { 1361 checkEscapee(*Parameter); 1362 } 1363 } 1364 1365 /// Check given parameter that was discovered to escape. 1366 void checkEscapee(const ParmVarDecl &Parameter) { 1367 if (auto Index = getIndex(Parameter)) { 1368 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); 1369 1370 if (CurrentParamStatus.getKind() == ParameterStatus::NotCalled) { 1371 CurrentParamStatus = ParameterStatus::Escaped; 1372 } 1373 } 1374 } 1375 1376 /// Mark all parameters in the current state as 'no-return'. 1377 void markNoReturn() { 1378 for (ParameterStatus &PS : CurrentState) { 1379 PS = ParameterStatus::NoReturn; 1380 } 1381 } 1382 1383 /// Check if the given assignment represents suppression and act on it. 1384 void checkSuppression(const BinaryOperator *Assignment) { 1385 // Suppression has the following form: 1386 // parameter = 0; 1387 // 0 can be of any form (NULL, nil, etc.) 1388 if (auto Index = getIndexOfExpression(Assignment->getLHS())) { 1389 1390 // We don't care what is written in the RHS, it could be whatever 1391 // we can interpret as 0. 1392 if (auto Constant = 1393 Assignment->getRHS()->IgnoreParenCasts()->getIntegerConstantExpr( 1394 AC.getASTContext())) { 1395 1396 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); 1397 1398 if (0 == *Constant && CurrentParamStatus.seenAnyCalls()) { 1399 // Even though this suppression mechanism is introduced to tackle 1400 // false positives for multiple calls, the fact that the user has 1401 // to use suppression can also tell us that we couldn't figure out 1402 // how different paths cancel each other out. And if that is true, 1403 // we will most certainly have false positives about parameters not 1404 // being called on certain paths. 1405 // 1406 // For this reason, we abandon tracking this parameter altogether. 1407 CurrentParamStatus = ParameterStatus::Reported; 1408 } 1409 } 1410 } 1411 } 1412 1413 public: 1414 //===----------------------------------------------------------------------===// 1415 // Tree traversal methods 1416 //===----------------------------------------------------------------------===// 1417 1418 void VisitCallExpr(const CallExpr *Call) { 1419 // This call might be a direct call, i.e. a parameter call... 1420 checkDirectCall(Call); 1421 // ... or an indirect call, i.e. when parameter is an argument. 1422 checkIndirectCall(Call); 1423 } 1424 1425 void VisitObjCMessageExpr(const ObjCMessageExpr *Message) { 1426 // The most common situation that we are defending against here is 1427 // copying a tracked parameter. 1428 if (const Expr *Receiver = Message->getInstanceReceiver()) { 1429 checkEscapee(Receiver); 1430 } 1431 // Message expressions unlike calls, could not be direct. 1432 checkIndirectCall(Message); 1433 } 1434 1435 void VisitBlockExpr(const BlockExpr *Block) { 1436 for (const auto &Capture : Block->getBlockDecl()->captures()) { 1437 // If a block captures a tracked parameter, it should be 1438 // considered escaped. 1439 // On one hand, blocks that do that should definitely call it on 1440 // every path. However, it is not guaranteed that the block 1441 // itself gets called whenever it gets created. 1442 // 1443 // Because we don't want to track blocks and whether they get called, 1444 // we consider such parameters simply escaped. 1445 if (const auto *Param = dyn_cast<ParmVarDecl>(Capture.getVariable())) { 1446 checkEscapee(*Param); 1447 } 1448 } 1449 } 1450 1451 void VisitBinaryOperator(const BinaryOperator *Op) { 1452 if (Op->getOpcode() == clang::BO_Assign) { 1453 // Let's check if one of the tracked parameters is assigned into 1454 // something, and if it is we don't want to track extra variables, so we 1455 // consider it as an escapee. 1456 checkEscapee(Op->getRHS()); 1457 1458 // Let's check whether this assignment is a suppression. 1459 checkSuppression(Op); 1460 } 1461 } 1462 1463 void VisitDeclStmt(const DeclStmt *DS) { 1464 // Variable initialization is not assignment and should be handled 1465 // separately. 1466 // 1467 // Multiple declarations can be a part of declaration statement. 1468 for (const auto *Declaration : DS->getDeclGroup()) { 1469 if (const auto *Var = dyn_cast<VarDecl>(Declaration)) { 1470 if (Var->getInit()) { 1471 checkEscapee(Var->getInit()); 1472 } 1473 } 1474 } 1475 } 1476 1477 void VisitCStyleCastExpr(const CStyleCastExpr *Cast) { 1478 // We consider '(void)parameter' as a manual no-op escape. 1479 // It should be used to explicitly tell the analysis that this parameter 1480 // is intentionally not called on this path. 1481 if (Cast->getType().getCanonicalType()->isVoidType()) { 1482 checkEscapee(Cast->getSubExpr()); 1483 } 1484 } 1485 1486 void VisitObjCAtThrowStmt(const ObjCAtThrowStmt *) { 1487 // It is OK not to call marked parameters on exceptional paths. 1488 markNoReturn(); 1489 } 1490 1491 private: 1492 unsigned size() const { return TrackedParams.size(); } 1493 1494 llvm::Optional<unsigned> getIndexOfCallee(const CallExpr *Call) const { 1495 return getIndexOfExpression(Call->getCallee()); 1496 } 1497 1498 llvm::Optional<unsigned> getIndexOfExpression(const Expr *E) const { 1499 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { 1500 return getIndex(*Parameter); 1501 } 1502 1503 return llvm::None; 1504 } 1505 1506 llvm::Optional<unsigned> getIndex(const ParmVarDecl &Parameter) const { 1507 // Expected number of parameters that we actually track is 1. 1508 // 1509 // Also, the maximum number of declared parameters could not be on a scale 1510 // of hundreds of thousands. 1511 // 1512 // In this setting, linear search seems reasonable and even performs better 1513 // than bisection. 1514 ParamSizedVector<const ParmVarDecl *>::const_iterator It = 1515 llvm::find(TrackedParams, &Parameter); 1516 1517 if (It != TrackedParams.end()) { 1518 return It - TrackedParams.begin(); 1519 } 1520 1521 return llvm::None; 1522 } 1523 1524 const ParmVarDecl *getParameter(unsigned Index) const { 1525 assert(Index < TrackedParams.size()); 1526 return TrackedParams[Index]; 1527 } 1528 1529 const CFG &FunctionCFG; 1530 AnalysisDeclContext &AC; 1531 CalledOnceCheckHandler &Handler; 1532 bool CheckConventionalParameters; 1533 // As of now, we turn this behavior off. So, we still are going to report 1534 // missing calls on paths that look like it was intentional. 1535 // Technically such reports are true positives, but they can make some users 1536 // grumpy because of the sheer number of warnings. 1537 // It can be turned back on if we decide that we want to have the other way 1538 // around. 1539 bool SuppressOnConventionalErrorPaths = false; 1540 1541 State CurrentState; 1542 ParamSizedVector<const ParmVarDecl *> TrackedParams; 1543 CFGSizedVector<State> States; 1544 }; 1545 1546 } // end anonymous namespace 1547 1548 namespace clang { 1549 void checkCalledOnceParameters(AnalysisDeclContext &AC, 1550 CalledOnceCheckHandler &Handler, 1551 bool CheckConventionalParameters) { 1552 CalledOnceChecker::check(AC, Handler, CheckConventionalParameters); 1553 } 1554 } // end namespace clang 1555