1 //===--- CloneDetection.cpp - Finds code clones in an AST -------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// This file implements classes for searching and anlyzing source code clones. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Analysis/CloneDetection.h" 15 16 #include "clang/AST/ASTContext.h" 17 #include "clang/AST/RecursiveASTVisitor.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtVisitor.h" 20 #include "clang/Lex/Lexer.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Support/MD5.h" 23 #include "llvm/Support/raw_ostream.h" 24 25 using namespace clang; 26 27 StmtSequence::StmtSequence(const CompoundStmt *Stmt, const Decl *D, 28 unsigned StartIndex, unsigned EndIndex) 29 : S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) { 30 assert(Stmt && "Stmt must not be a nullptr"); 31 assert(StartIndex < EndIndex && "Given array should not be empty"); 32 assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt"); 33 } 34 35 StmtSequence::StmtSequence(const Stmt *Stmt, const Decl *D) 36 : S(Stmt), D(D), StartIndex(0), EndIndex(0) {} 37 38 StmtSequence::StmtSequence() 39 : S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {} 40 41 bool StmtSequence::contains(const StmtSequence &Other) const { 42 // If both sequences reside in different declarations, they can never contain 43 // each other. 44 if (D != Other.D) 45 return false; 46 47 const SourceManager &SM = getASTContext().getSourceManager(); 48 49 // Otherwise check if the start and end locations of the current sequence 50 // surround the other sequence. 51 bool StartIsInBounds = 52 SM.isBeforeInTranslationUnit(getStartLoc(), Other.getStartLoc()) || 53 getStartLoc() == Other.getStartLoc(); 54 if (!StartIsInBounds) 55 return false; 56 57 bool EndIsInBounds = 58 SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) || 59 Other.getEndLoc() == getEndLoc(); 60 return EndIsInBounds; 61 } 62 63 StmtSequence::iterator StmtSequence::begin() const { 64 if (!holdsSequence()) { 65 return &S; 66 } 67 auto CS = cast<CompoundStmt>(S); 68 return CS->body_begin() + StartIndex; 69 } 70 71 StmtSequence::iterator StmtSequence::end() const { 72 if (!holdsSequence()) { 73 return reinterpret_cast<StmtSequence::iterator>(&S) + 1; 74 } 75 auto CS = cast<CompoundStmt>(S); 76 return CS->body_begin() + EndIndex; 77 } 78 79 ASTContext &StmtSequence::getASTContext() const { 80 assert(D); 81 return D->getASTContext(); 82 } 83 84 SourceLocation StmtSequence::getStartLoc() const { 85 return front()->getLocStart(); 86 } 87 88 SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } 89 90 SourceRange StmtSequence::getSourceRange() const { 91 return SourceRange(getStartLoc(), getEndLoc()); 92 } 93 94 /// Prints the macro name that contains the given SourceLocation into the given 95 /// raw_string_ostream. 96 static void printMacroName(llvm::raw_string_ostream &MacroStack, 97 ASTContext &Context, SourceLocation Loc) { 98 MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(), 99 Context.getLangOpts()); 100 101 // Add an empty space at the end as a padding to prevent 102 // that macro names concatenate to the names of other macros. 103 MacroStack << " "; 104 } 105 106 /// Returns a string that represents all macro expansions that expanded into the 107 /// given SourceLocation. 108 /// 109 /// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations 110 /// A and B are expanded from the same macros in the same order. 111 static std::string getMacroStack(SourceLocation Loc, ASTContext &Context) { 112 std::string MacroStack; 113 llvm::raw_string_ostream MacroStackStream(MacroStack); 114 SourceManager &SM = Context.getSourceManager(); 115 116 // Iterate over all macros that expanded into the given SourceLocation. 117 while (Loc.isMacroID()) { 118 // Add the macro name to the stream. 119 printMacroName(MacroStackStream, Context, Loc); 120 Loc = SM.getImmediateMacroCallerLoc(Loc); 121 } 122 MacroStackStream.flush(); 123 return MacroStack; 124 } 125 126 namespace { 127 typedef unsigned DataPiece; 128 129 /// Collects the data of a single Stmt. 130 /// 131 /// This class defines what a code clone is: If it collects for two statements 132 /// the same data, then those two statements are considered to be clones of each 133 /// other. 134 /// 135 /// All collected data is forwarded to the given data consumer of the type T. 136 /// The data consumer class needs to provide a member method with the signature: 137 /// update(StringRef Str) 138 template <typename T> 139 class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector<T>> { 140 141 ASTContext &Context; 142 /// The data sink to which all data is forwarded. 143 T &DataConsumer; 144 145 public: 146 /// Collects data of the given Stmt. 147 /// \param S The given statement. 148 /// \param Context The ASTContext of S. 149 /// \param DataConsumer The data sink to which all data is forwarded. 150 StmtDataCollector(const Stmt *S, ASTContext &Context, T &DataConsumer) 151 : Context(Context), DataConsumer(DataConsumer) { 152 this->Visit(S); 153 } 154 155 // Below are utility methods for appending different data to the vector. 156 157 void addData(DataPiece Integer) { 158 DataConsumer.update( 159 StringRef(reinterpret_cast<char *>(&Integer), sizeof(Integer))); 160 } 161 162 void addData(llvm::StringRef Str) { DataConsumer.update(Str); } 163 164 void addData(const QualType &QT) { addData(QT.getAsString()); } 165 166 // The functions below collect the class specific data of each Stmt subclass. 167 168 // Utility macro for defining a visit method for a given class. This method 169 // calls back to the ConstStmtVisitor to visit all parent classes. 170 #define DEF_ADD_DATA(CLASS, CODE) \ 171 void Visit##CLASS(const CLASS *S) { \ 172 CODE; \ 173 ConstStmtVisitor<StmtDataCollector>::Visit##CLASS(S); \ 174 } 175 176 DEF_ADD_DATA(Stmt, { 177 addData(S->getStmtClass()); 178 // This ensures that macro generated code isn't identical to macro-generated 179 // code. 180 addData(getMacroStack(S->getLocStart(), Context)); 181 addData(getMacroStack(S->getLocEnd(), Context)); 182 }) 183 DEF_ADD_DATA(Expr, { addData(S->getType()); }) 184 185 //--- Builtin functionality ----------------------------------------------// 186 DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); }) 187 DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); }) 188 DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); }) 189 DEF_ADD_DATA(TypeTraitExpr, { 190 addData(S->getTrait()); 191 for (unsigned i = 0; i < S->getNumArgs(); ++i) 192 addData(S->getArg(i)->getType()); 193 }) 194 195 //--- Calls --------------------------------------------------------------// 196 DEF_ADD_DATA(CallExpr, { 197 // Function pointers don't have a callee and we just skip hashing it. 198 if (const FunctionDecl *D = S->getDirectCallee()) { 199 // If the function is a template specialization, we also need to handle 200 // the template arguments as they are not included in the qualified name. 201 if (auto Args = D->getTemplateSpecializationArgs()) { 202 std::string ArgString; 203 204 // Print all template arguments into ArgString 205 llvm::raw_string_ostream OS(ArgString); 206 for (unsigned i = 0; i < Args->size(); ++i) { 207 Args->get(i).print(Context.getLangOpts(), OS); 208 // Add a padding character so that 'foo<X, XX>()' != 'foo<XX, X>()'. 209 OS << '\n'; 210 } 211 OS.flush(); 212 213 addData(ArgString); 214 } 215 addData(D->getQualifiedNameAsString()); 216 } 217 }) 218 219 //--- Exceptions ---------------------------------------------------------// 220 DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); }) 221 222 //--- C++ OOP Stmts ------------------------------------------------------// 223 DEF_ADD_DATA(CXXDeleteExpr, { 224 addData(S->isArrayFormAsWritten()); 225 addData(S->isGlobalDelete()); 226 }) 227 228 //--- Casts --------------------------------------------------------------// 229 DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); }) 230 231 //--- Miscellaneous Exprs ------------------------------------------------// 232 DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); }) 233 DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); }) 234 235 //--- Control flow -------------------------------------------------------// 236 DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); }) 237 DEF_ADD_DATA(IndirectGotoStmt, { 238 if (S->getConstantTarget()) 239 addData(S->getConstantTarget()->getName()); 240 }) 241 DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); }) 242 DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); }) 243 DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); }) 244 245 //--- Objective-C --------------------------------------------------------// 246 DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); }) 247 DEF_ADD_DATA(ObjCPropertyRefExpr, { 248 addData(S->isSuperReceiver()); 249 addData(S->isImplicitProperty()); 250 }) 251 DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); }) 252 253 //--- Miscellaneous Stmts ------------------------------------------------// 254 DEF_ADD_DATA(CXXFoldExpr, { 255 addData(S->isRightFold()); 256 addData(S->getOperator()); 257 }) 258 DEF_ADD_DATA(GenericSelectionExpr, { 259 for (unsigned i = 0; i < S->getNumAssocs(); ++i) { 260 addData(S->getAssocType(i)); 261 } 262 }) 263 DEF_ADD_DATA(LambdaExpr, { 264 for (const LambdaCapture &C : S->captures()) { 265 addData(C.isPackExpansion()); 266 addData(C.getCaptureKind()); 267 if (C.capturesVariable()) 268 addData(C.getCapturedVar()->getType()); 269 } 270 addData(S->isGenericLambda()); 271 addData(S->isMutable()); 272 }) 273 DEF_ADD_DATA(DeclStmt, { 274 auto numDecls = std::distance(S->decl_begin(), S->decl_end()); 275 addData(static_cast<DataPiece>(numDecls)); 276 for (const Decl *D : S->decls()) { 277 if (const VarDecl *VD = dyn_cast<VarDecl>(D)) { 278 addData(VD->getType()); 279 } 280 } 281 }) 282 DEF_ADD_DATA(AsmStmt, { 283 addData(S->isSimple()); 284 addData(S->isVolatile()); 285 addData(S->generateAsmString(Context)); 286 for (unsigned i = 0; i < S->getNumInputs(); ++i) { 287 addData(S->getInputConstraint(i)); 288 } 289 for (unsigned i = 0; i < S->getNumOutputs(); ++i) { 290 addData(S->getOutputConstraint(i)); 291 } 292 for (unsigned i = 0; i < S->getNumClobbers(); ++i) { 293 addData(S->getClobber(i)); 294 } 295 }) 296 DEF_ADD_DATA(AttributedStmt, { 297 for (const Attr *A : S->getAttrs()) { 298 addData(std::string(A->getSpelling())); 299 } 300 }) 301 }; 302 } // end anonymous namespace 303 304 void CloneDetector::analyzeCodeBody(const Decl *D) { 305 assert(D); 306 assert(D->hasBody()); 307 308 Sequences.push_back(StmtSequence(D->getBody(), D)); 309 } 310 311 /// Returns true if and only if \p Stmt contains at least one other 312 /// sequence in the \p Group. 313 static bool containsAnyInGroup(StmtSequence &Seq, 314 CloneDetector::CloneGroup &Group) { 315 for (StmtSequence &GroupSeq : Group) { 316 if (Seq.contains(GroupSeq)) 317 return true; 318 } 319 return false; 320 } 321 322 /// Returns true if and only if all sequences in \p OtherGroup are 323 /// contained by a sequence in \p Group. 324 static bool containsGroup(CloneDetector::CloneGroup &Group, 325 CloneDetector::CloneGroup &OtherGroup) { 326 // We have less sequences in the current group than we have in the other, 327 // so we will never fulfill the requirement for returning true. This is only 328 // possible because we know that a sequence in Group can contain at most 329 // one sequence in OtherGroup. 330 if (Group.size() < OtherGroup.size()) 331 return false; 332 333 for (StmtSequence &Stmt : Group) { 334 if (!containsAnyInGroup(Stmt, OtherGroup)) 335 return false; 336 } 337 return true; 338 } 339 340 void OnlyLargestCloneConstraint::constrain( 341 std::vector<CloneDetector::CloneGroup> &Result) { 342 std::vector<unsigned> IndexesToRemove; 343 344 // Compare every group in the result with the rest. If one groups contains 345 // another group, we only need to return the bigger group. 346 // Note: This doesn't scale well, so if possible avoid calling any heavy 347 // function from this loop to minimize the performance impact. 348 for (unsigned i = 0; i < Result.size(); ++i) { 349 for (unsigned j = 0; j < Result.size(); ++j) { 350 // Don't compare a group with itself. 351 if (i == j) 352 continue; 353 354 if (containsGroup(Result[j], Result[i])) { 355 IndexesToRemove.push_back(i); 356 break; 357 } 358 } 359 } 360 361 // Erasing a list of indexes from the vector should be done with decreasing 362 // indexes. As IndexesToRemove is constructed with increasing values, we just 363 // reverse iterate over it to get the desired order. 364 for (auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) { 365 Result.erase(Result.begin() + *I); 366 } 367 } 368 369 static size_t createHash(llvm::MD5 &Hash) { 370 size_t HashCode; 371 372 // Create the final hash code for the current Stmt. 373 llvm::MD5::MD5Result HashResult; 374 Hash.final(HashResult); 375 376 // Copy as much as possible of the generated hash code to the Stmt's hash 377 // code. 378 std::memcpy(&HashCode, &HashResult, 379 std::min(sizeof(HashCode), sizeof(HashResult))); 380 381 return HashCode; 382 } 383 384 size_t RecursiveCloneTypeIIConstraint::saveHash( 385 const Stmt *S, const Decl *D, 386 std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) { 387 llvm::MD5 Hash; 388 ASTContext &Context = D->getASTContext(); 389 390 StmtDataCollector<llvm::MD5>(S, Context, Hash); 391 392 auto CS = dyn_cast<CompoundStmt>(S); 393 SmallVector<size_t, 8> ChildHashes; 394 395 for (const Stmt *Child : S->children()) { 396 if (Child == nullptr) { 397 ChildHashes.push_back(0); 398 continue; 399 } 400 size_t ChildHash = saveHash(Child, D, StmtsByHash); 401 Hash.update( 402 StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash))); 403 ChildHashes.push_back(ChildHash); 404 } 405 406 if (CS) { 407 for (unsigned Length = 2; Length <= CS->size(); ++Length) { 408 for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) { 409 llvm::MD5 Hash; 410 for (unsigned i = Pos; i < Pos + Length; ++i) { 411 size_t ChildHash = ChildHashes[i]; 412 Hash.update(StringRef(reinterpret_cast<char *>(&ChildHash), 413 sizeof(ChildHash))); 414 } 415 StmtsByHash.push_back(std::make_pair( 416 createHash(Hash), StmtSequence(CS, D, Pos, Pos + Length))); 417 } 418 } 419 } 420 421 size_t HashCode = createHash(Hash); 422 StmtsByHash.push_back(std::make_pair(HashCode, StmtSequence(S, D))); 423 return HashCode; 424 } 425 426 namespace { 427 /// Wrapper around FoldingSetNodeID that it can be used as the template 428 /// argument of the StmtDataCollector. 429 class FoldingSetNodeIDWrapper { 430 431 llvm::FoldingSetNodeID &FS; 432 433 public: 434 FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {} 435 436 void update(StringRef Str) { FS.AddString(Str); } 437 }; 438 } // end anonymous namespace 439 440 /// Writes the relevant data from all statements and child statements 441 /// in the given StmtSequence into the given FoldingSetNodeID. 442 static void CollectStmtSequenceData(const StmtSequence &Sequence, 443 FoldingSetNodeIDWrapper &OutputData) { 444 for (const Stmt *S : Sequence) { 445 StmtDataCollector<FoldingSetNodeIDWrapper>(S, Sequence.getASTContext(), 446 OutputData); 447 448 for (const Stmt *Child : S->children()) { 449 if (!Child) 450 continue; 451 452 CollectStmtSequenceData(StmtSequence(Child, Sequence.getContainingDecl()), 453 OutputData); 454 } 455 } 456 } 457 458 /// Returns true if both sequences are clones of each other. 459 static bool areSequencesClones(const StmtSequence &LHS, 460 const StmtSequence &RHS) { 461 // We collect the data from all statements in the sequence as we did before 462 // when generating a hash value for each sequence. But this time we don't 463 // hash the collected data and compare the whole data set instead. This 464 // prevents any false-positives due to hash code collisions. 465 llvm::FoldingSetNodeID DataLHS, DataRHS; 466 FoldingSetNodeIDWrapper LHSWrapper(DataLHS); 467 FoldingSetNodeIDWrapper RHSWrapper(DataRHS); 468 469 CollectStmtSequenceData(LHS, LHSWrapper); 470 CollectStmtSequenceData(RHS, RHSWrapper); 471 472 return DataLHS == DataRHS; 473 } 474 475 void RecursiveCloneTypeIIConstraint::constrain( 476 std::vector<CloneDetector::CloneGroup> &Sequences) { 477 // FIXME: Maybe we can do this in-place and don't need this additional vector. 478 std::vector<CloneDetector::CloneGroup> Result; 479 480 for (CloneDetector::CloneGroup &Group : Sequences) { 481 // We assume in the following code that the Group is non-empty, so we 482 // skip all empty groups. 483 if (Group.empty()) 484 continue; 485 486 std::vector<std::pair<size_t, StmtSequence>> StmtsByHash; 487 488 // Generate hash codes for all children of S and save them in StmtsByHash. 489 for (const StmtSequence &S : Group) { 490 saveHash(S.front(), S.getContainingDecl(), StmtsByHash); 491 } 492 493 // Sort hash_codes in StmtsByHash. 494 std::stable_sort(StmtsByHash.begin(), StmtsByHash.end(), 495 [](std::pair<size_t, StmtSequence> LHS, 496 std::pair<size_t, StmtSequence> RHS) { 497 return LHS.first < RHS.first; 498 }); 499 500 // Check for each StmtSequence if its successor has the same hash value. 501 // We don't check the last StmtSequence as it has no successor. 502 // Note: The 'size - 1 ' in the condition is safe because we check for an 503 // empty Group vector at the beginning of this function. 504 for (unsigned i = 0; i < StmtsByHash.size() - 1; ++i) { 505 const auto Current = StmtsByHash[i]; 506 507 // It's likely that we just found an sequence of StmtSequences that 508 // represent a CloneGroup, so we create a new group and start checking and 509 // adding the StmtSequences in this sequence. 510 CloneDetector::CloneGroup NewGroup; 511 512 size_t PrototypeHash = Current.first; 513 514 for (; i < StmtsByHash.size(); ++i) { 515 // A different hash value means we have reached the end of the sequence. 516 if (PrototypeHash != StmtsByHash[i].first || 517 !areSequencesClones(StmtsByHash[i].second, Current.second)) { 518 // The current sequence could be the start of a new CloneGroup. So we 519 // decrement i so that we visit it again in the outer loop. 520 // Note: i can never be 0 at this point because we are just comparing 521 // the hash of the Current StmtSequence with itself in the 'if' above. 522 assert(i != 0); 523 --i; 524 break; 525 } 526 // Same hash value means we should add the StmtSequence to the current 527 // group. 528 NewGroup.push_back(StmtsByHash[i].second); 529 } 530 531 // We created a new clone group with matching hash codes and move it to 532 // the result vector. 533 Result.push_back(NewGroup); 534 } 535 } 536 // Sequences is the output parameter, so we copy our result into it. 537 Sequences = Result; 538 } 539 540 size_t MinComplexityConstraint::calculateStmtComplexity( 541 const StmtSequence &Seq, const std::string &ParentMacroStack) { 542 if (Seq.empty()) 543 return 0; 544 545 size_t Complexity = 1; 546 547 ASTContext &Context = Seq.getASTContext(); 548 549 // Look up what macros expanded into the current statement. 550 std::string StartMacroStack = getMacroStack(Seq.getStartLoc(), Context); 551 std::string EndMacroStack = getMacroStack(Seq.getEndLoc(), Context); 552 553 // First, check if ParentMacroStack is not empty which means we are currently 554 // dealing with a parent statement which was expanded from a macro. 555 // If this parent statement was expanded from the same macros as this 556 // statement, we reduce the initial complexity of this statement to zero. 557 // This causes that a group of statements that were generated by a single 558 // macro expansion will only increase the total complexity by one. 559 // Note: This is not the final complexity of this statement as we still 560 // add the complexity of the child statements to the complexity value. 561 if (!ParentMacroStack.empty() && (StartMacroStack == ParentMacroStack && 562 EndMacroStack == ParentMacroStack)) { 563 Complexity = 0; 564 } 565 566 // Iterate over the Stmts in the StmtSequence and add their complexity values 567 // to the current complexity value. 568 if (Seq.holdsSequence()) { 569 for (const Stmt *S : Seq) { 570 Complexity += calculateStmtComplexity( 571 StmtSequence(S, Seq.getContainingDecl()), StartMacroStack); 572 } 573 } else { 574 for (const Stmt *S : Seq.front()->children()) { 575 Complexity += calculateStmtComplexity( 576 StmtSequence(S, Seq.getContainingDecl()), StartMacroStack); 577 } 578 } 579 return Complexity; 580 } 581 582 void MatchingVariablePatternConstraint::constrain( 583 std::vector<CloneDetector::CloneGroup> &CloneGroups) { 584 CloneConstraint::splitCloneGroups( 585 CloneGroups, [](const StmtSequence &A, const StmtSequence &B) { 586 VariablePattern PatternA(A); 587 VariablePattern PatternB(B); 588 return PatternA.countPatternDifferences(PatternB) == 0; 589 }); 590 } 591 592 void CloneConstraint::splitCloneGroups( 593 std::vector<CloneDetector::CloneGroup> &CloneGroups, 594 std::function<bool(const StmtSequence &, const StmtSequence &)> Compare) { 595 std::vector<CloneDetector::CloneGroup> Result; 596 for (auto &HashGroup : CloneGroups) { 597 // Contains all indexes in HashGroup that were already added to a 598 // CloneGroup. 599 std::vector<char> Indexes; 600 Indexes.resize(HashGroup.size()); 601 602 for (unsigned i = 0; i < HashGroup.size(); ++i) { 603 // Skip indexes that are already part of a CloneGroup. 604 if (Indexes[i]) 605 continue; 606 607 // Pick the first unhandled StmtSequence and consider it as the 608 // beginning 609 // of a new CloneGroup for now. 610 // We don't add i to Indexes because we never iterate back. 611 StmtSequence Prototype = HashGroup[i]; 612 CloneDetector::CloneGroup PotentialGroup = {Prototype}; 613 ++Indexes[i]; 614 615 // Check all following StmtSequences for clones. 616 for (unsigned j = i + 1; j < HashGroup.size(); ++j) { 617 // Skip indexes that are already part of a CloneGroup. 618 if (Indexes[j]) 619 continue; 620 621 // If a following StmtSequence belongs to our CloneGroup, we add it to 622 // it. 623 const StmtSequence &Candidate = HashGroup[j]; 624 625 if (!Compare(Prototype, Candidate)) 626 continue; 627 628 PotentialGroup.push_back(Candidate); 629 // Make sure we never visit this StmtSequence again. 630 ++Indexes[j]; 631 } 632 633 // Otherwise, add it to the result and continue searching for more 634 // groups. 635 Result.push_back(PotentialGroup); 636 } 637 638 assert(std::all_of(Indexes.begin(), Indexes.end(), 639 [](char c) { return c == 1; })); 640 } 641 CloneGroups = Result; 642 } 643 644 void VariablePattern::addVariableOccurence(const VarDecl *VarDecl, 645 const Stmt *Mention) { 646 // First check if we already reference this variable 647 for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) { 648 if (Variables[KindIndex] == VarDecl) { 649 // If yes, add a new occurence that points to the existing entry in 650 // the Variables vector. 651 Occurences.emplace_back(KindIndex, Mention); 652 return; 653 } 654 } 655 // If this variable wasn't already referenced, add it to the list of 656 // referenced variables and add a occurence that points to this new entry. 657 Occurences.emplace_back(Variables.size(), Mention); 658 Variables.push_back(VarDecl); 659 } 660 661 void VariablePattern::addVariables(const Stmt *S) { 662 // Sometimes we get a nullptr (such as from IfStmts which often have nullptr 663 // children). We skip such statements as they don't reference any 664 // variables. 665 if (!S) 666 return; 667 668 // Check if S is a reference to a variable. If yes, add it to the pattern. 669 if (auto D = dyn_cast<DeclRefExpr>(S)) { 670 if (auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl())) 671 addVariableOccurence(VD, D); 672 } 673 674 // Recursively check all children of the given statement. 675 for (const Stmt *Child : S->children()) { 676 addVariables(Child); 677 } 678 } 679 680 unsigned VariablePattern::countPatternDifferences( 681 const VariablePattern &Other, 682 VariablePattern::SuspiciousClonePair *FirstMismatch) { 683 unsigned NumberOfDifferences = 0; 684 685 assert(Other.Occurences.size() == Occurences.size()); 686 for (unsigned i = 0; i < Occurences.size(); ++i) { 687 auto ThisOccurence = Occurences[i]; 688 auto OtherOccurence = Other.Occurences[i]; 689 if (ThisOccurence.KindID == OtherOccurence.KindID) 690 continue; 691 692 ++NumberOfDifferences; 693 694 // If FirstMismatch is not a nullptr, we need to store information about 695 // the first difference between the two patterns. 696 if (FirstMismatch == nullptr) 697 continue; 698 699 // Only proceed if we just found the first difference as we only store 700 // information about the first difference. 701 if (NumberOfDifferences != 1) 702 continue; 703 704 const VarDecl *FirstSuggestion = nullptr; 705 // If there is a variable available in the list of referenced variables 706 // which wouldn't break the pattern if it is used in place of the 707 // current variable, we provide this variable as the suggested fix. 708 if (OtherOccurence.KindID < Variables.size()) 709 FirstSuggestion = Variables[OtherOccurence.KindID]; 710 711 // Store information about the first clone. 712 FirstMismatch->FirstCloneInfo = 713 VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo( 714 Variables[ThisOccurence.KindID], ThisOccurence.Mention, 715 FirstSuggestion); 716 717 // Same as above but with the other clone. We do this for both clones as 718 // we don't know which clone is the one containing the unintended 719 // pattern error. 720 const VarDecl *SecondSuggestion = nullptr; 721 if (ThisOccurence.KindID < Other.Variables.size()) 722 SecondSuggestion = Other.Variables[ThisOccurence.KindID]; 723 724 // Store information about the second clone. 725 FirstMismatch->SecondCloneInfo = 726 VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo( 727 Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention, 728 SecondSuggestion); 729 730 // SuspiciousClonePair guarantees that the first clone always has a 731 // suggested variable associated with it. As we know that one of the two 732 // clones in the pair always has suggestion, we swap the two clones 733 // in case the first clone has no suggested variable which means that 734 // the second clone has a suggested variable and should be first. 735 if (!FirstMismatch->FirstCloneInfo.Suggestion) 736 std::swap(FirstMismatch->FirstCloneInfo, FirstMismatch->SecondCloneInfo); 737 738 // This ensures that we always have at least one suggestion in a pair. 739 assert(FirstMismatch->FirstCloneInfo.Suggestion); 740 } 741 742 return NumberOfDifferences; 743 } 744