1 //===--- CloneDetection.cpp - Finds code clones in an AST -------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// This file implements classes for searching and anlyzing source code clones. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Analysis/CloneDetection.h" 15 16 #include "clang/AST/ASTContext.h" 17 #include "clang/AST/RecursiveASTVisitor.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtVisitor.h" 20 #include "clang/Lex/Lexer.h" 21 #include "llvm/Support/MD5.h" 22 #include "llvm/Support/raw_ostream.h" 23 #include "llvm/Support/Path.h" 24 25 using namespace clang; 26 27 StmtSequence::StmtSequence(const CompoundStmt *Stmt, const Decl *D, 28 unsigned StartIndex, unsigned EndIndex) 29 : S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) { 30 assert(Stmt && "Stmt must not be a nullptr"); 31 assert(StartIndex < EndIndex && "Given array should not be empty"); 32 assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt"); 33 } 34 35 StmtSequence::StmtSequence(const Stmt *Stmt, const Decl *D) 36 : S(Stmt), D(D), StartIndex(0), EndIndex(0) {} 37 38 StmtSequence::StmtSequence() 39 : S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {} 40 41 bool StmtSequence::contains(const StmtSequence &Other) const { 42 // If both sequences reside in different declarations, they can never contain 43 // each other. 44 if (D != Other.D) 45 return false; 46 47 const SourceManager &SM = getASTContext().getSourceManager(); 48 49 // Otherwise check if the start and end locations of the current sequence 50 // surround the other sequence. 51 bool StartIsInBounds = 52 SM.isBeforeInTranslationUnit(getStartLoc(), Other.getStartLoc()) || 53 getStartLoc() == Other.getStartLoc(); 54 if (!StartIsInBounds) 55 return false; 56 57 bool EndIsInBounds = 58 SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) || 59 Other.getEndLoc() == getEndLoc(); 60 return EndIsInBounds; 61 } 62 63 StmtSequence::iterator StmtSequence::begin() const { 64 if (!holdsSequence()) { 65 return &S; 66 } 67 auto CS = cast<CompoundStmt>(S); 68 return CS->body_begin() + StartIndex; 69 } 70 71 StmtSequence::iterator StmtSequence::end() const { 72 if (!holdsSequence()) { 73 return reinterpret_cast<StmtSequence::iterator>(&S) + 1; 74 } 75 auto CS = cast<CompoundStmt>(S); 76 return CS->body_begin() + EndIndex; 77 } 78 79 ASTContext &StmtSequence::getASTContext() const { 80 assert(D); 81 return D->getASTContext(); 82 } 83 84 SourceLocation StmtSequence::getStartLoc() const { 85 return front()->getLocStart(); 86 } 87 88 SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } 89 90 SourceRange StmtSequence::getSourceRange() const { 91 return SourceRange(getStartLoc(), getEndLoc()); 92 } 93 94 /// Prints the macro name that contains the given SourceLocation into the given 95 /// raw_string_ostream. 96 static void printMacroName(llvm::raw_string_ostream &MacroStack, 97 ASTContext &Context, SourceLocation Loc) { 98 MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(), 99 Context.getLangOpts()); 100 101 // Add an empty space at the end as a padding to prevent 102 // that macro names concatenate to the names of other macros. 103 MacroStack << " "; 104 } 105 106 /// Returns a string that represents all macro expansions that expanded into the 107 /// given SourceLocation. 108 /// 109 /// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations 110 /// A and B are expanded from the same macros in the same order. 111 static std::string getMacroStack(SourceLocation Loc, ASTContext &Context) { 112 std::string MacroStack; 113 llvm::raw_string_ostream MacroStackStream(MacroStack); 114 SourceManager &SM = Context.getSourceManager(); 115 116 // Iterate over all macros that expanded into the given SourceLocation. 117 while (Loc.isMacroID()) { 118 // Add the macro name to the stream. 119 printMacroName(MacroStackStream, Context, Loc); 120 Loc = SM.getImmediateMacroCallerLoc(Loc); 121 } 122 MacroStackStream.flush(); 123 return MacroStack; 124 } 125 126 namespace { 127 typedef unsigned DataPiece; 128 129 /// Collects the data of a single Stmt. 130 /// 131 /// This class defines what a code clone is: If it collects for two statements 132 /// the same data, then those two statements are considered to be clones of each 133 /// other. 134 /// 135 /// All collected data is forwarded to the given data consumer of the type T. 136 /// The data consumer class needs to provide a member method with the signature: 137 /// update(StringRef Str) 138 template <typename T> 139 class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector<T>> { 140 141 ASTContext &Context; 142 /// The data sink to which all data is forwarded. 143 T &DataConsumer; 144 145 public: 146 /// Collects data of the given Stmt. 147 /// \param S The given statement. 148 /// \param Context The ASTContext of S. 149 /// \param DataConsumer The data sink to which all data is forwarded. 150 StmtDataCollector(const Stmt *S, ASTContext &Context, T &DataConsumer) 151 : Context(Context), DataConsumer(DataConsumer) { 152 this->Visit(S); 153 } 154 155 // Below are utility methods for appending different data to the vector. 156 157 void addData(DataPiece Integer) { 158 DataConsumer.update( 159 StringRef(reinterpret_cast<char *>(&Integer), sizeof(Integer))); 160 } 161 162 void addData(llvm::StringRef Str) { DataConsumer.update(Str); } 163 164 void addData(const QualType &QT) { addData(QT.getAsString()); } 165 166 // The functions below collect the class specific data of each Stmt subclass. 167 168 // Utility macro for defining a visit method for a given class. This method 169 // calls back to the ConstStmtVisitor to visit all parent classes. 170 #define DEF_ADD_DATA(CLASS, CODE) \ 171 void Visit##CLASS(const CLASS *S) { \ 172 CODE; \ 173 ConstStmtVisitor<StmtDataCollector>::Visit##CLASS(S); \ 174 } 175 176 DEF_ADD_DATA(Stmt, { 177 addData(S->getStmtClass()); 178 // This ensures that macro generated code isn't identical to macro-generated 179 // code. 180 addData(getMacroStack(S->getLocStart(), Context)); 181 addData(getMacroStack(S->getLocEnd(), Context)); 182 }) 183 DEF_ADD_DATA(Expr, { addData(S->getType()); }) 184 185 //--- Builtin functionality ----------------------------------------------// 186 DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); }) 187 DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); }) 188 DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); }) 189 DEF_ADD_DATA(TypeTraitExpr, { 190 addData(S->getTrait()); 191 for (unsigned i = 0; i < S->getNumArgs(); ++i) 192 addData(S->getArg(i)->getType()); 193 }) 194 195 //--- Calls --------------------------------------------------------------// 196 DEF_ADD_DATA(CallExpr, { 197 // Function pointers don't have a callee and we just skip hashing it. 198 if (const FunctionDecl *D = S->getDirectCallee()) { 199 // If the function is a template specialization, we also need to handle 200 // the template arguments as they are not included in the qualified name. 201 if (auto Args = D->getTemplateSpecializationArgs()) { 202 std::string ArgString; 203 204 // Print all template arguments into ArgString 205 llvm::raw_string_ostream OS(ArgString); 206 for (unsigned i = 0; i < Args->size(); ++i) { 207 Args->get(i).print(Context.getLangOpts(), OS); 208 // Add a padding character so that 'foo<X, XX>()' != 'foo<XX, X>()'. 209 OS << '\n'; 210 } 211 OS.flush(); 212 213 addData(ArgString); 214 } 215 addData(D->getQualifiedNameAsString()); 216 } 217 }) 218 219 //--- Exceptions ---------------------------------------------------------// 220 DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); }) 221 222 //--- C++ OOP Stmts ------------------------------------------------------// 223 DEF_ADD_DATA(CXXDeleteExpr, { 224 addData(S->isArrayFormAsWritten()); 225 addData(S->isGlobalDelete()); 226 }) 227 228 //--- Casts --------------------------------------------------------------// 229 DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); }) 230 231 //--- Miscellaneous Exprs ------------------------------------------------// 232 DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); }) 233 DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); }) 234 235 //--- Control flow -------------------------------------------------------// 236 DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); }) 237 DEF_ADD_DATA(IndirectGotoStmt, { 238 if (S->getConstantTarget()) 239 addData(S->getConstantTarget()->getName()); 240 }) 241 DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); }) 242 DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); }) 243 DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); }) 244 245 //--- Objective-C --------------------------------------------------------// 246 DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); }) 247 DEF_ADD_DATA(ObjCPropertyRefExpr, { 248 addData(S->isSuperReceiver()); 249 addData(S->isImplicitProperty()); 250 }) 251 DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); }) 252 253 //--- Miscellaneous Stmts ------------------------------------------------// 254 DEF_ADD_DATA(CXXFoldExpr, { 255 addData(S->isRightFold()); 256 addData(S->getOperator()); 257 }) 258 DEF_ADD_DATA(GenericSelectionExpr, { 259 for (unsigned i = 0; i < S->getNumAssocs(); ++i) { 260 addData(S->getAssocType(i)); 261 } 262 }) 263 DEF_ADD_DATA(LambdaExpr, { 264 for (const LambdaCapture &C : S->captures()) { 265 addData(C.isPackExpansion()); 266 addData(C.getCaptureKind()); 267 if (C.capturesVariable()) 268 addData(C.getCapturedVar()->getType()); 269 } 270 addData(S->isGenericLambda()); 271 addData(S->isMutable()); 272 }) 273 DEF_ADD_DATA(DeclStmt, { 274 auto numDecls = std::distance(S->decl_begin(), S->decl_end()); 275 addData(static_cast<DataPiece>(numDecls)); 276 for (const Decl *D : S->decls()) { 277 if (const VarDecl *VD = dyn_cast<VarDecl>(D)) { 278 addData(VD->getType()); 279 } 280 } 281 }) 282 DEF_ADD_DATA(AsmStmt, { 283 addData(S->isSimple()); 284 addData(S->isVolatile()); 285 addData(S->generateAsmString(Context)); 286 for (unsigned i = 0; i < S->getNumInputs(); ++i) { 287 addData(S->getInputConstraint(i)); 288 } 289 for (unsigned i = 0; i < S->getNumOutputs(); ++i) { 290 addData(S->getOutputConstraint(i)); 291 } 292 for (unsigned i = 0; i < S->getNumClobbers(); ++i) { 293 addData(S->getClobber(i)); 294 } 295 }) 296 DEF_ADD_DATA(AttributedStmt, { 297 for (const Attr *A : S->getAttrs()) { 298 addData(std::string(A->getSpelling())); 299 } 300 }) 301 }; 302 } // end anonymous namespace 303 304 void CloneDetector::analyzeCodeBody(const Decl *D) { 305 assert(D); 306 assert(D->hasBody()); 307 308 Sequences.push_back(StmtSequence(D->getBody(), D)); 309 } 310 311 /// Returns true if and only if \p Stmt contains at least one other 312 /// sequence in the \p Group. 313 static bool containsAnyInGroup(StmtSequence &Seq, 314 CloneDetector::CloneGroup &Group) { 315 for (StmtSequence &GroupSeq : Group) { 316 if (Seq.contains(GroupSeq)) 317 return true; 318 } 319 return false; 320 } 321 322 /// Returns true if and only if all sequences in \p OtherGroup are 323 /// contained by a sequence in \p Group. 324 static bool containsGroup(CloneDetector::CloneGroup &Group, 325 CloneDetector::CloneGroup &OtherGroup) { 326 // We have less sequences in the current group than we have in the other, 327 // so we will never fulfill the requirement for returning true. This is only 328 // possible because we know that a sequence in Group can contain at most 329 // one sequence in OtherGroup. 330 if (Group.size() < OtherGroup.size()) 331 return false; 332 333 for (StmtSequence &Stmt : Group) { 334 if (!containsAnyInGroup(Stmt, OtherGroup)) 335 return false; 336 } 337 return true; 338 } 339 340 void OnlyLargestCloneConstraint::constrain( 341 std::vector<CloneDetector::CloneGroup> &Result) { 342 std::vector<unsigned> IndexesToRemove; 343 344 // Compare every group in the result with the rest. If one groups contains 345 // another group, we only need to return the bigger group. 346 // Note: This doesn't scale well, so if possible avoid calling any heavy 347 // function from this loop to minimize the performance impact. 348 for (unsigned i = 0; i < Result.size(); ++i) { 349 for (unsigned j = 0; j < Result.size(); ++j) { 350 // Don't compare a group with itself. 351 if (i == j) 352 continue; 353 354 if (containsGroup(Result[j], Result[i])) { 355 IndexesToRemove.push_back(i); 356 break; 357 } 358 } 359 } 360 361 // Erasing a list of indexes from the vector should be done with decreasing 362 // indexes. As IndexesToRemove is constructed with increasing values, we just 363 // reverse iterate over it to get the desired order. 364 for (auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) { 365 Result.erase(Result.begin() + *I); 366 } 367 } 368 369 bool FilenamePatternConstraint::isAutoGenerated(const CloneDetector::CloneGroup &Group) { 370 std::string Error; 371 if (IgnoredFilesPattern.empty() || Group.empty() || 372 !IgnoredFilesRegex->isValid(Error)) 373 return false; 374 375 for (const StmtSequence &S : Group) { 376 const SourceManager &SM = S.getASTContext().getSourceManager(); 377 StringRef Filename = llvm::sys::path::filename(SM.getFilename( 378 S.getContainingDecl()->getLocation())); 379 if (IgnoredFilesRegex->match(Filename)) 380 return true; 381 } 382 383 return false; 384 } 385 386 static size_t createHash(llvm::MD5 &Hash) { 387 size_t HashCode; 388 389 // Create the final hash code for the current Stmt. 390 llvm::MD5::MD5Result HashResult; 391 Hash.final(HashResult); 392 393 // Copy as much as possible of the generated hash code to the Stmt's hash 394 // code. 395 std::memcpy(&HashCode, &HashResult, 396 std::min(sizeof(HashCode), sizeof(HashResult))); 397 398 return HashCode; 399 } 400 401 size_t RecursiveCloneTypeIIConstraint::saveHash( 402 const Stmt *S, const Decl *D, 403 std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) { 404 llvm::MD5 Hash; 405 ASTContext &Context = D->getASTContext(); 406 407 StmtDataCollector<llvm::MD5>(S, Context, Hash); 408 409 auto CS = dyn_cast<CompoundStmt>(S); 410 SmallVector<size_t, 8> ChildHashes; 411 412 for (const Stmt *Child : S->children()) { 413 if (Child == nullptr) { 414 ChildHashes.push_back(0); 415 continue; 416 } 417 size_t ChildHash = saveHash(Child, D, StmtsByHash); 418 Hash.update( 419 StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash))); 420 ChildHashes.push_back(ChildHash); 421 } 422 423 if (CS) { 424 for (unsigned Length = 2; Length <= CS->size(); ++Length) { 425 for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) { 426 llvm::MD5 Hash; 427 for (unsigned i = Pos; i < Pos + Length; ++i) { 428 size_t ChildHash = ChildHashes[i]; 429 Hash.update(StringRef(reinterpret_cast<char *>(&ChildHash), 430 sizeof(ChildHash))); 431 } 432 StmtsByHash.push_back(std::make_pair( 433 createHash(Hash), StmtSequence(CS, D, Pos, Pos + Length))); 434 } 435 } 436 } 437 438 size_t HashCode = createHash(Hash); 439 StmtsByHash.push_back(std::make_pair(HashCode, StmtSequence(S, D))); 440 return HashCode; 441 } 442 443 namespace { 444 /// Wrapper around FoldingSetNodeID that it can be used as the template 445 /// argument of the StmtDataCollector. 446 class FoldingSetNodeIDWrapper { 447 448 llvm::FoldingSetNodeID &FS; 449 450 public: 451 FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {} 452 453 void update(StringRef Str) { FS.AddString(Str); } 454 }; 455 } // end anonymous namespace 456 457 /// Writes the relevant data from all statements and child statements 458 /// in the given StmtSequence into the given FoldingSetNodeID. 459 static void CollectStmtSequenceData(const StmtSequence &Sequence, 460 FoldingSetNodeIDWrapper &OutputData) { 461 for (const Stmt *S : Sequence) { 462 StmtDataCollector<FoldingSetNodeIDWrapper>(S, Sequence.getASTContext(), 463 OutputData); 464 465 for (const Stmt *Child : S->children()) { 466 if (!Child) 467 continue; 468 469 CollectStmtSequenceData(StmtSequence(Child, Sequence.getContainingDecl()), 470 OutputData); 471 } 472 } 473 } 474 475 /// Returns true if both sequences are clones of each other. 476 static bool areSequencesClones(const StmtSequence &LHS, 477 const StmtSequence &RHS) { 478 // We collect the data from all statements in the sequence as we did before 479 // when generating a hash value for each sequence. But this time we don't 480 // hash the collected data and compare the whole data set instead. This 481 // prevents any false-positives due to hash code collisions. 482 llvm::FoldingSetNodeID DataLHS, DataRHS; 483 FoldingSetNodeIDWrapper LHSWrapper(DataLHS); 484 FoldingSetNodeIDWrapper RHSWrapper(DataRHS); 485 486 CollectStmtSequenceData(LHS, LHSWrapper); 487 CollectStmtSequenceData(RHS, RHSWrapper); 488 489 return DataLHS == DataRHS; 490 } 491 492 void RecursiveCloneTypeIIConstraint::constrain( 493 std::vector<CloneDetector::CloneGroup> &Sequences) { 494 // FIXME: Maybe we can do this in-place and don't need this additional vector. 495 std::vector<CloneDetector::CloneGroup> Result; 496 497 for (CloneDetector::CloneGroup &Group : Sequences) { 498 // We assume in the following code that the Group is non-empty, so we 499 // skip all empty groups. 500 if (Group.empty()) 501 continue; 502 503 std::vector<std::pair<size_t, StmtSequence>> StmtsByHash; 504 505 // Generate hash codes for all children of S and save them in StmtsByHash. 506 for (const StmtSequence &S : Group) { 507 saveHash(S.front(), S.getContainingDecl(), StmtsByHash); 508 } 509 510 // Sort hash_codes in StmtsByHash. 511 std::stable_sort(StmtsByHash.begin(), StmtsByHash.end(), 512 [](std::pair<size_t, StmtSequence> LHS, 513 std::pair<size_t, StmtSequence> RHS) { 514 return LHS.first < RHS.first; 515 }); 516 517 // Check for each StmtSequence if its successor has the same hash value. 518 // We don't check the last StmtSequence as it has no successor. 519 // Note: The 'size - 1 ' in the condition is safe because we check for an 520 // empty Group vector at the beginning of this function. 521 for (unsigned i = 0; i < StmtsByHash.size() - 1; ++i) { 522 const auto Current = StmtsByHash[i]; 523 524 // It's likely that we just found an sequence of StmtSequences that 525 // represent a CloneGroup, so we create a new group and start checking and 526 // adding the StmtSequences in this sequence. 527 CloneDetector::CloneGroup NewGroup; 528 529 size_t PrototypeHash = Current.first; 530 531 for (; i < StmtsByHash.size(); ++i) { 532 // A different hash value means we have reached the end of the sequence. 533 if (PrototypeHash != StmtsByHash[i].first || 534 !areSequencesClones(StmtsByHash[i].second, Current.second)) { 535 // The current sequence could be the start of a new CloneGroup. So we 536 // decrement i so that we visit it again in the outer loop. 537 // Note: i can never be 0 at this point because we are just comparing 538 // the hash of the Current StmtSequence with itself in the 'if' above. 539 assert(i != 0); 540 --i; 541 break; 542 } 543 // Same hash value means we should add the StmtSequence to the current 544 // group. 545 NewGroup.push_back(StmtsByHash[i].second); 546 } 547 548 // We created a new clone group with matching hash codes and move it to 549 // the result vector. 550 Result.push_back(NewGroup); 551 } 552 } 553 // Sequences is the output parameter, so we copy our result into it. 554 Sequences = Result; 555 } 556 557 size_t MinComplexityConstraint::calculateStmtComplexity( 558 const StmtSequence &Seq, const std::string &ParentMacroStack) { 559 if (Seq.empty()) 560 return 0; 561 562 size_t Complexity = 1; 563 564 ASTContext &Context = Seq.getASTContext(); 565 566 // Look up what macros expanded into the current statement. 567 std::string StartMacroStack = getMacroStack(Seq.getStartLoc(), Context); 568 std::string EndMacroStack = getMacroStack(Seq.getEndLoc(), Context); 569 570 // First, check if ParentMacroStack is not empty which means we are currently 571 // dealing with a parent statement which was expanded from a macro. 572 // If this parent statement was expanded from the same macros as this 573 // statement, we reduce the initial complexity of this statement to zero. 574 // This causes that a group of statements that were generated by a single 575 // macro expansion will only increase the total complexity by one. 576 // Note: This is not the final complexity of this statement as we still 577 // add the complexity of the child statements to the complexity value. 578 if (!ParentMacroStack.empty() && (StartMacroStack == ParentMacroStack && 579 EndMacroStack == ParentMacroStack)) { 580 Complexity = 0; 581 } 582 583 // Iterate over the Stmts in the StmtSequence and add their complexity values 584 // to the current complexity value. 585 if (Seq.holdsSequence()) { 586 for (const Stmt *S : Seq) { 587 Complexity += calculateStmtComplexity( 588 StmtSequence(S, Seq.getContainingDecl()), StartMacroStack); 589 } 590 } else { 591 for (const Stmt *S : Seq.front()->children()) { 592 Complexity += calculateStmtComplexity( 593 StmtSequence(S, Seq.getContainingDecl()), StartMacroStack); 594 } 595 } 596 return Complexity; 597 } 598 599 void MatchingVariablePatternConstraint::constrain( 600 std::vector<CloneDetector::CloneGroup> &CloneGroups) { 601 CloneConstraint::splitCloneGroups( 602 CloneGroups, [](const StmtSequence &A, const StmtSequence &B) { 603 VariablePattern PatternA(A); 604 VariablePattern PatternB(B); 605 return PatternA.countPatternDifferences(PatternB) == 0; 606 }); 607 } 608 609 void CloneConstraint::splitCloneGroups( 610 std::vector<CloneDetector::CloneGroup> &CloneGroups, 611 std::function<bool(const StmtSequence &, const StmtSequence &)> Compare) { 612 std::vector<CloneDetector::CloneGroup> Result; 613 for (auto &HashGroup : CloneGroups) { 614 // Contains all indexes in HashGroup that were already added to a 615 // CloneGroup. 616 std::vector<char> Indexes; 617 Indexes.resize(HashGroup.size()); 618 619 for (unsigned i = 0; i < HashGroup.size(); ++i) { 620 // Skip indexes that are already part of a CloneGroup. 621 if (Indexes[i]) 622 continue; 623 624 // Pick the first unhandled StmtSequence and consider it as the 625 // beginning 626 // of a new CloneGroup for now. 627 // We don't add i to Indexes because we never iterate back. 628 StmtSequence Prototype = HashGroup[i]; 629 CloneDetector::CloneGroup PotentialGroup = {Prototype}; 630 ++Indexes[i]; 631 632 // Check all following StmtSequences for clones. 633 for (unsigned j = i + 1; j < HashGroup.size(); ++j) { 634 // Skip indexes that are already part of a CloneGroup. 635 if (Indexes[j]) 636 continue; 637 638 // If a following StmtSequence belongs to our CloneGroup, we add it. 639 const StmtSequence &Candidate = HashGroup[j]; 640 641 if (!Compare(Prototype, Candidate)) 642 continue; 643 644 PotentialGroup.push_back(Candidate); 645 // Make sure we never visit this StmtSequence again. 646 ++Indexes[j]; 647 } 648 649 // Otherwise, add it to the result and continue searching for more 650 // groups. 651 Result.push_back(PotentialGroup); 652 } 653 654 assert(std::all_of(Indexes.begin(), Indexes.end(), 655 [](char c) { return c == 1; })); 656 } 657 CloneGroups = Result; 658 } 659 660 void VariablePattern::addVariableOccurence(const VarDecl *VarDecl, 661 const Stmt *Mention) { 662 // First check if we already reference this variable 663 for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) { 664 if (Variables[KindIndex] == VarDecl) { 665 // If yes, add a new occurence that points to the existing entry in 666 // the Variables vector. 667 Occurences.emplace_back(KindIndex, Mention); 668 return; 669 } 670 } 671 // If this variable wasn't already referenced, add it to the list of 672 // referenced variables and add a occurence that points to this new entry. 673 Occurences.emplace_back(Variables.size(), Mention); 674 Variables.push_back(VarDecl); 675 } 676 677 void VariablePattern::addVariables(const Stmt *S) { 678 // Sometimes we get a nullptr (such as from IfStmts which often have nullptr 679 // children). We skip such statements as they don't reference any 680 // variables. 681 if (!S) 682 return; 683 684 // Check if S is a reference to a variable. If yes, add it to the pattern. 685 if (auto D = dyn_cast<DeclRefExpr>(S)) { 686 if (auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl())) 687 addVariableOccurence(VD, D); 688 } 689 690 // Recursively check all children of the given statement. 691 for (const Stmt *Child : S->children()) { 692 addVariables(Child); 693 } 694 } 695 696 unsigned VariablePattern::countPatternDifferences( 697 const VariablePattern &Other, 698 VariablePattern::SuspiciousClonePair *FirstMismatch) { 699 unsigned NumberOfDifferences = 0; 700 701 assert(Other.Occurences.size() == Occurences.size()); 702 for (unsigned i = 0; i < Occurences.size(); ++i) { 703 auto ThisOccurence = Occurences[i]; 704 auto OtherOccurence = Other.Occurences[i]; 705 if (ThisOccurence.KindID == OtherOccurence.KindID) 706 continue; 707 708 ++NumberOfDifferences; 709 710 // If FirstMismatch is not a nullptr, we need to store information about 711 // the first difference between the two patterns. 712 if (FirstMismatch == nullptr) 713 continue; 714 715 // Only proceed if we just found the first difference as we only store 716 // information about the first difference. 717 if (NumberOfDifferences != 1) 718 continue; 719 720 const VarDecl *FirstSuggestion = nullptr; 721 // If there is a variable available in the list of referenced variables 722 // which wouldn't break the pattern if it is used in place of the 723 // current variable, we provide this variable as the suggested fix. 724 if (OtherOccurence.KindID < Variables.size()) 725 FirstSuggestion = Variables[OtherOccurence.KindID]; 726 727 // Store information about the first clone. 728 FirstMismatch->FirstCloneInfo = 729 VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo( 730 Variables[ThisOccurence.KindID], ThisOccurence.Mention, 731 FirstSuggestion); 732 733 // Same as above but with the other clone. We do this for both clones as 734 // we don't know which clone is the one containing the unintended 735 // pattern error. 736 const VarDecl *SecondSuggestion = nullptr; 737 if (ThisOccurence.KindID < Other.Variables.size()) 738 SecondSuggestion = Other.Variables[ThisOccurence.KindID]; 739 740 // Store information about the second clone. 741 FirstMismatch->SecondCloneInfo = 742 VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo( 743 Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention, 744 SecondSuggestion); 745 746 // SuspiciousClonePair guarantees that the first clone always has a 747 // suggested variable associated with it. As we know that one of the two 748 // clones in the pair always has suggestion, we swap the two clones 749 // in case the first clone has no suggested variable which means that 750 // the second clone has a suggested variable and should be first. 751 if (!FirstMismatch->FirstCloneInfo.Suggestion) 752 std::swap(FirstMismatch->FirstCloneInfo, FirstMismatch->SecondCloneInfo); 753 754 // This ensures that we always have at least one suggestion in a pair. 755 assert(FirstMismatch->FirstCloneInfo.Suggestion); 756 } 757 758 return NumberOfDifferences; 759 } 760