1 //===- ASTDiff.cpp - AST differencing implementation-----------*- C++ -*- -===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains definitons for the AST differencing interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Tooling/ASTDiff/ASTDiff.h" 15 16 #include "clang/AST/RecursiveASTVisitor.h" 17 #include "clang/Lex/Lexer.h" 18 #include "llvm/ADT/PriorityQueue.h" 19 20 #include <limits> 21 #include <memory> 22 #include <unordered_set> 23 24 using namespace llvm; 25 using namespace clang; 26 27 namespace clang { 28 namespace diff { 29 30 class ASTDiff::Impl { 31 public: 32 SyntaxTreeImpl &T1, &T2; 33 bool IsMappingDone = false; 34 Mapping TheMapping; 35 36 Impl(SyntaxTreeImpl &T1, SyntaxTreeImpl &T2, const ComparisonOptions &Options) 37 : T1(T1), T2(T2), Options(Options) {} 38 39 /// Matches nodes one-by-one based on their similarity. 40 void computeMapping(); 41 42 std::vector<Match> getMatches(Mapping &M); 43 44 /// Finds an edit script that converts T1 to T2. 45 std::vector<Change> computeChanges(Mapping &M); 46 47 void printChangeImpl(raw_ostream &OS, const Change &Chg) const; 48 void printMatchImpl(raw_ostream &OS, const Match &M) const; 49 50 // Returns a mapping of isomorphic subtrees. 51 Mapping matchTopDown() const; 52 53 private: 54 // Returns true if the two subtrees are identical. 55 bool isomorphic(NodeId Id1, NodeId Id2) const; 56 57 bool canBeAddedToMapping(const Mapping &M, NodeId Id1, NodeId Id2) const; 58 59 // Returns false if the nodes must not be mached. 60 bool isMatchingPossible(NodeId Id1, NodeId Id2) const; 61 62 // Adds all corresponding subtrees of the two nodes to the mapping. 63 // The two nodes must be isomorphic. 64 void addIsomorphicSubTrees(Mapping &M, NodeId Id1, NodeId Id2) const; 65 66 // Uses an optimal albeit slow algorithm to compute a mapping between two 67 // subtrees, but only if both have fewer nodes than MaxSize. 68 void addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const; 69 70 // Computes the ratio of common descendants between the two nodes. 71 // Descendants are only considered to be equal when they are mapped in M. 72 double getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; 73 74 // Returns the node that has the highest degree of similarity. 75 NodeId findCandidate(const Mapping &M, NodeId Id1) const; 76 77 // Tries to match any yet unmapped nodes, in a bottom-up fashion. 78 void matchBottomUp(Mapping &M) const; 79 80 const ComparisonOptions &Options; 81 82 friend class ZhangShashaMatcher; 83 }; 84 85 template <class T> 86 static bool isNodeExcluded(const SourceManager &SrcMgr, T *N) { 87 if (!N) 88 return true; 89 SourceLocation SLoc = N->getLocStart(); 90 return SLoc.isValid() && SrcMgr.isInSystemHeader(SLoc); 91 } 92 93 namespace { 94 /// Counts the number of nodes that will be compared. 95 struct NodeCountVisitor : public RecursiveASTVisitor<NodeCountVisitor> { 96 int Count = 0; 97 const SyntaxTreeImpl &Root; 98 NodeCountVisitor(const SyntaxTreeImpl &Root) : Root(Root) {} 99 bool TraverseDecl(Decl *D) { 100 if (isNodeExcluded(Root.AST.getSourceManager(), D)) 101 return true; 102 ++Count; 103 RecursiveASTVisitor<NodeCountVisitor>::TraverseDecl(D); 104 return true; 105 } 106 bool TraverseStmt(Stmt *S) { 107 if (isNodeExcluded(Root.AST.getSourceManager(), S)) 108 return true; 109 ++Count; 110 RecursiveASTVisitor<NodeCountVisitor>::TraverseStmt(S); 111 return true; 112 } 113 bool TraverseType(QualType T) { return true; } 114 }; 115 } // end anonymous namespace 116 117 namespace { 118 // Sets Height, Parent and Children for each node. 119 struct PreorderVisitor : public RecursiveASTVisitor<PreorderVisitor> { 120 int Id = 0, Depth = 0; 121 NodeId Parent; 122 SyntaxTreeImpl &Root; 123 124 PreorderVisitor(SyntaxTreeImpl &Root) : Root(Root) {} 125 126 template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) { 127 NodeId MyId = Id; 128 Node &N = Root.getMutableNode(MyId); 129 N.Parent = Parent; 130 N.Depth = Depth; 131 N.ASTNode = DynTypedNode::create(*ASTNode); 132 assert(!N.ASTNode.getNodeKind().isNone() && 133 "Expected nodes to have a valid kind."); 134 if (Parent.isValid()) { 135 Node &P = Root.getMutableNode(Parent); 136 P.Children.push_back(MyId); 137 } 138 Parent = MyId; 139 ++Id; 140 ++Depth; 141 return std::make_tuple(MyId, Root.getNode(MyId).Parent); 142 } 143 void PostTraverse(std::tuple<NodeId, NodeId> State) { 144 NodeId MyId, PreviousParent; 145 std::tie(MyId, PreviousParent) = State; 146 assert(MyId.isValid() && "Expecting to only traverse valid nodes."); 147 Parent = PreviousParent; 148 --Depth; 149 Node &N = Root.getMutableNode(MyId); 150 N.RightMostDescendant = Id; 151 if (N.isLeaf()) 152 Root.Leaves.push_back(MyId); 153 N.Height = 1; 154 for (NodeId Child : N.Children) 155 N.Height = std::max(N.Height, 1 + Root.getNode(Child).Height); 156 } 157 bool TraverseDecl(Decl *D) { 158 if (isNodeExcluded(Root.AST.getSourceManager(), D)) 159 return true; 160 auto SavedState = PreTraverse(D); 161 RecursiveASTVisitor<PreorderVisitor>::TraverseDecl(D); 162 PostTraverse(SavedState); 163 return true; 164 } 165 bool TraverseStmt(Stmt *S) { 166 if (isNodeExcluded(Root.AST.getSourceManager(), S)) 167 return true; 168 auto SavedState = PreTraverse(S); 169 RecursiveASTVisitor<PreorderVisitor>::TraverseStmt(S); 170 PostTraverse(SavedState); 171 return true; 172 } 173 bool TraverseType(QualType T) { return true; } 174 }; 175 } // end anonymous namespace 176 177 SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, const ASTContext &AST) 178 : SyntaxTreeImpl(Parent, AST.getTranslationUnitDecl(), AST) {} 179 180 SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, Decl *N, 181 const ASTContext &AST) 182 : Parent(Parent), AST(AST) { 183 NodeCountVisitor NodeCounter(*this); 184 NodeCounter.TraverseDecl(N); 185 Nodes.resize(NodeCounter.Count); 186 PreorderVisitor PreorderWalker(*this); 187 PreorderWalker.TraverseDecl(N); 188 initTree(); 189 } 190 191 SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, Stmt *N, 192 const ASTContext &AST) 193 : Parent(Parent), AST(AST) { 194 NodeCountVisitor NodeCounter(*this); 195 NodeCounter.TraverseStmt(N); 196 Nodes.resize(NodeCounter.Count); 197 PreorderVisitor PreorderWalker(*this); 198 PreorderWalker.TraverseStmt(N); 199 initTree(); 200 } 201 202 void SyntaxTreeImpl::initTree() { 203 setLeftMostDescendants(); 204 int PostorderId = 0; 205 PostorderIds.resize(getSize()); 206 std::function<void(NodeId)> PostorderTraverse = [&](NodeId Id) { 207 for (NodeId Child : getNode(Id).Children) 208 PostorderTraverse(Child); 209 PostorderIds[Id] = PostorderId; 210 ++PostorderId; 211 }; 212 PostorderTraverse(root()); 213 } 214 215 void SyntaxTreeImpl::setLeftMostDescendants() { 216 for (NodeId Leaf : Leaves) { 217 getMutableNode(Leaf).LeftMostDescendant = Leaf; 218 NodeId Parent, Cur = Leaf; 219 while ((Parent = getNode(Cur).Parent).isValid() && 220 getNode(Parent).Children[0] == Cur) { 221 Cur = Parent; 222 getMutableNode(Cur).LeftMostDescendant = Leaf; 223 } 224 } 225 } 226 227 static std::vector<NodeId> getSubtreePostorder(const SyntaxTreeImpl &Tree, 228 NodeId Root) { 229 std::vector<NodeId> Postorder; 230 std::function<void(NodeId)> Traverse = [&](NodeId Id) { 231 const Node &N = Tree.getNode(Id); 232 for (NodeId Child : N.Children) 233 Traverse(Child); 234 Postorder.push_back(Id); 235 }; 236 Traverse(Root); 237 return Postorder; 238 } 239 240 static std::vector<NodeId> getSubtreeBfs(const SyntaxTreeImpl &Tree, 241 NodeId Root) { 242 std::vector<NodeId> Ids; 243 size_t Expanded = 0; 244 Ids.push_back(Root); 245 while (Expanded < Ids.size()) 246 for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children) 247 Ids.push_back(Child); 248 return Ids; 249 } 250 251 int SyntaxTreeImpl::getNumberOfDescendants(NodeId Id) const { 252 return getNode(Id).RightMostDescendant - Id + 1; 253 } 254 255 bool SyntaxTreeImpl::isInSubtree(NodeId Id, NodeId SubtreeRoot) const { 256 NodeId Lower = SubtreeRoot; 257 NodeId Upper = getNode(SubtreeRoot).RightMostDescendant; 258 return Id >= Lower && Id <= Upper; 259 } 260 261 std::string SyntaxTreeImpl::getNodeValueImpl(NodeId Id) const { 262 return getNodeValueImpl(getNode(Id).ASTNode); 263 } 264 265 std::string SyntaxTreeImpl::getNodeValueImpl(const DynTypedNode &DTN) const { 266 if (auto *X = DTN.get<BinaryOperator>()) 267 return X->getOpcodeStr(); 268 if (auto *X = DTN.get<AccessSpecDecl>()) { 269 CharSourceRange Range(X->getSourceRange(), false); 270 return Lexer::getSourceText(Range, AST.getSourceManager(), 271 AST.getLangOpts()); 272 } 273 if (auto *X = DTN.get<IntegerLiteral>()) { 274 SmallString<256> Str; 275 X->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false); 276 return Str.str(); 277 } 278 if (auto *X = DTN.get<StringLiteral>()) 279 return X->getString(); 280 if (auto *X = DTN.get<ValueDecl>()) 281 return X->getNameAsString() + "(" + X->getType().getAsString() + ")"; 282 if (DTN.get<DeclStmt>() || DTN.get<TranslationUnitDecl>()) 283 return ""; 284 std::string Value; 285 if (auto *X = DTN.get<DeclRefExpr>()) { 286 if (X->hasQualifier()) { 287 llvm::raw_string_ostream OS(Value); 288 PrintingPolicy PP(AST.getLangOpts()); 289 X->getQualifier()->print(OS, PP); 290 } 291 Value += X->getDecl()->getNameAsString(); 292 return Value; 293 } 294 if (auto *X = DTN.get<NamedDecl>()) 295 Value += X->getNameAsString() + ";"; 296 if (auto *X = DTN.get<TypedefNameDecl>()) 297 return Value + X->getUnderlyingType().getAsString() + ";"; 298 if (DTN.get<NamespaceDecl>()) 299 return Value; 300 if (auto *X = DTN.get<TypeDecl>()) 301 if (X->getTypeForDecl()) 302 Value += 303 X->getTypeForDecl()->getCanonicalTypeInternal().getAsString() + ";"; 304 if (DTN.get<Decl>()) 305 return Value; 306 if (DTN.get<Stmt>()) 307 return ""; 308 llvm_unreachable("Fatal: unhandled AST node.\n"); 309 } 310 311 void SyntaxTreeImpl::printTree() const { printTree(root()); } 312 void SyntaxTreeImpl::printTree(NodeId Root) const { 313 printTree(llvm::outs(), Root); 314 } 315 316 void SyntaxTreeImpl::printTree(raw_ostream &OS, NodeId Root) const { 317 const Node &N = getNode(Root); 318 for (int I = 0; I < N.Depth; ++I) 319 OS << " "; 320 printNode(OS, Root); 321 OS << "\n"; 322 for (NodeId Child : N.Children) 323 printTree(OS, Child); 324 } 325 326 void SyntaxTreeImpl::printNode(raw_ostream &OS, NodeId Id) const { 327 if (Id.isInvalid()) { 328 OS << "None"; 329 return; 330 } 331 OS << getNode(Id).getTypeLabel(); 332 if (getNodeValueImpl(Id) != "") 333 OS << ": " << getNodeValueImpl(Id); 334 OS << "(" << PostorderIds[Id] << ")"; 335 } 336 337 void SyntaxTreeImpl::printNodeAsJson(raw_ostream &OS, NodeId Id) const { 338 auto N = getNode(Id); 339 OS << R"({"type":")" << N.getTypeLabel() << R"(")"; 340 if (getNodeValueImpl(Id) != "") 341 OS << R"(,"value":")" << getNodeValueImpl(Id) << R"(")"; 342 OS << R"(,"children":[)"; 343 if (N.Children.size() > 0) { 344 printNodeAsJson(OS, N.Children[0]); 345 for (size_t I = 1, E = N.Children.size(); I < E; ++I) { 346 OS << ","; 347 printNodeAsJson(OS, N.Children[I]); 348 } 349 } 350 OS << "]}"; 351 } 352 353 void SyntaxTreeImpl::printAsJsonImpl(raw_ostream &OS) const { 354 OS << R"({"root":)"; 355 printNodeAsJson(OS, root()); 356 OS << "}\n"; 357 } 358 359 /// Identifies a node in a subtree by its postorder offset, starting at 1. 360 struct SNodeId { 361 int Id = 0; 362 363 explicit SNodeId(int Id) : Id(Id) {} 364 explicit SNodeId() = default; 365 366 operator int() const { return Id; } 367 SNodeId &operator++() { return ++Id, *this; } 368 SNodeId &operator--() { return --Id, *this; } 369 SNodeId operator+(int Other) const { return SNodeId(Id + Other); } 370 }; 371 372 class Subtree { 373 private: 374 /// The parent tree. 375 const SyntaxTreeImpl &Tree; 376 /// Maps SNodeIds to original ids. 377 std::vector<NodeId> RootIds; 378 /// Maps subtree nodes to their leftmost descendants wtihin the subtree. 379 std::vector<SNodeId> LeftMostDescendants; 380 381 public: 382 std::vector<SNodeId> KeyRoots; 383 384 Subtree(const SyntaxTreeImpl &Tree, NodeId SubtreeRoot) : Tree(Tree) { 385 RootIds = getSubtreePostorder(Tree, SubtreeRoot); 386 int NumLeaves = setLeftMostDescendants(); 387 computeKeyRoots(NumLeaves); 388 } 389 int getSize() const { return RootIds.size(); } 390 NodeId getIdInRoot(SNodeId Id) const { 391 assert(Id > 0 && Id <= getSize() && "Invalid subtree node index."); 392 return RootIds[Id - 1]; 393 } 394 const Node &getNode(SNodeId Id) const { 395 return Tree.getNode(getIdInRoot(Id)); 396 } 397 SNodeId getLeftMostDescendant(SNodeId Id) const { 398 assert(Id > 0 && Id <= getSize() && "Invalid subtree node index."); 399 return LeftMostDescendants[Id - 1]; 400 } 401 /// Returns the postorder index of the leftmost descendant in the subtree. 402 NodeId getPostorderOffset() const { 403 return Tree.PostorderIds[getIdInRoot(SNodeId(1))]; 404 } 405 406 private: 407 /// Returns the number of leafs in the subtree. 408 int setLeftMostDescendants() { 409 int NumLeaves = 0; 410 LeftMostDescendants.resize(getSize()); 411 for (int I = 0; I < getSize(); ++I) { 412 SNodeId SI(I + 1); 413 const Node &N = getNode(SI); 414 NumLeaves += N.isLeaf(); 415 assert(I == Tree.PostorderIds[getIdInRoot(SI)] - getPostorderOffset() && 416 "Postorder traversal in subtree should correspond to traversal in " 417 "the root tree by a constant offset."); 418 LeftMostDescendants[I] = SNodeId(Tree.PostorderIds[N.LeftMostDescendant] - 419 getPostorderOffset()); 420 } 421 return NumLeaves; 422 } 423 void computeKeyRoots(int Leaves) { 424 KeyRoots.resize(Leaves); 425 std::unordered_set<int> Visited; 426 int K = Leaves - 1; 427 for (SNodeId I(getSize()); I > 0; --I) { 428 SNodeId LeftDesc = getLeftMostDescendant(I); 429 if (Visited.count(LeftDesc)) 430 continue; 431 assert(K >= 0 && "K should be non-negative"); 432 KeyRoots[K] = I; 433 Visited.insert(LeftDesc); 434 --K; 435 } 436 } 437 }; 438 439 /// Implementation of Zhang and Shasha's Algorithm for tree edit distance. 440 /// Computes an optimal mapping between two trees using only insertion, 441 /// deletion and update as edit actions (similar to the Levenshtein distance). 442 class ZhangShashaMatcher { 443 const ASTDiff::Impl &DiffImpl; 444 Subtree S1; 445 Subtree S2; 446 std::unique_ptr<std::unique_ptr<double[]>[]> TreeDist, ForestDist; 447 448 public: 449 ZhangShashaMatcher(const ASTDiff::Impl &DiffImpl, const SyntaxTreeImpl &T1, 450 const SyntaxTreeImpl &T2, NodeId Id1, NodeId Id2) 451 : DiffImpl(DiffImpl), S1(T1, Id1), S2(T2, Id2) { 452 TreeDist = llvm::make_unique<std::unique_ptr<double[]>[]>( 453 size_t(S1.getSize()) + 1); 454 ForestDist = llvm::make_unique<std::unique_ptr<double[]>[]>( 455 size_t(S1.getSize()) + 1); 456 for (int I = 0, E = S1.getSize() + 1; I < E; ++I) { 457 TreeDist[I] = llvm::make_unique<double[]>(size_t(S2.getSize()) + 1); 458 ForestDist[I] = llvm::make_unique<double[]>(size_t(S2.getSize()) + 1); 459 } 460 } 461 462 std::vector<std::pair<NodeId, NodeId>> getMatchingNodes() { 463 std::vector<std::pair<NodeId, NodeId>> Matches; 464 std::vector<std::pair<SNodeId, SNodeId>> TreePairs; 465 466 computeTreeDist(); 467 468 bool RootNodePair = true; 469 470 TreePairs.emplace_back(SNodeId(S1.getSize()), SNodeId(S2.getSize())); 471 472 while (!TreePairs.empty()) { 473 SNodeId LastRow, LastCol, FirstRow, FirstCol, Row, Col; 474 std::tie(LastRow, LastCol) = TreePairs.back(); 475 TreePairs.pop_back(); 476 477 if (!RootNodePair) { 478 computeForestDist(LastRow, LastCol); 479 } 480 481 RootNodePair = false; 482 483 FirstRow = S1.getLeftMostDescendant(LastRow); 484 FirstCol = S2.getLeftMostDescendant(LastCol); 485 486 Row = LastRow; 487 Col = LastCol; 488 489 while (Row > FirstRow || Col > FirstCol) { 490 if (Row > FirstRow && 491 ForestDist[Row - 1][Col] + 1 == ForestDist[Row][Col]) { 492 --Row; 493 } else if (Col > FirstCol && 494 ForestDist[Row][Col - 1] + 1 == ForestDist[Row][Col]) { 495 --Col; 496 } else { 497 SNodeId LMD1 = S1.getLeftMostDescendant(Row); 498 SNodeId LMD2 = S2.getLeftMostDescendant(Col); 499 if (LMD1 == S1.getLeftMostDescendant(LastRow) && 500 LMD2 == S2.getLeftMostDescendant(LastCol)) { 501 NodeId Id1 = S1.getIdInRoot(Row); 502 NodeId Id2 = S2.getIdInRoot(Col); 503 assert(DiffImpl.isMatchingPossible(Id1, Id2) && 504 "These nodes must not be matched."); 505 Matches.emplace_back(Id1, Id2); 506 --Row; 507 --Col; 508 } else { 509 TreePairs.emplace_back(Row, Col); 510 Row = LMD1; 511 Col = LMD2; 512 } 513 } 514 } 515 } 516 return Matches; 517 } 518 519 private: 520 /// Simple cost model for edit actions. 521 /// The values range between 0 and 1, or infinity if this edit action should 522 /// always be avoided. 523 524 /// These costs could be modified to better model the estimated cost of / 525 /// inserting / deleting the current node. 526 static constexpr double DeletionCost = 1; 527 static constexpr double InsertionCost = 1; 528 529 double getUpdateCost(SNodeId Id1, SNodeId Id2) { 530 const DynTypedNode &DTN1 = S1.getNode(Id1).ASTNode, 531 &DTN2 = S2.getNode(Id2).ASTNode; 532 if (!DiffImpl.Options.isMatchingAllowed(DTN1, DTN2)) 533 return std::numeric_limits<double>::max(); 534 return DiffImpl.Options.getNodeDistance(*DiffImpl.T1.Parent, DTN1, 535 *DiffImpl.T2.Parent, DTN2); 536 } 537 538 void computeTreeDist() { 539 for (SNodeId Id1 : S1.KeyRoots) 540 for (SNodeId Id2 : S2.KeyRoots) 541 computeForestDist(Id1, Id2); 542 } 543 544 void computeForestDist(SNodeId Id1, SNodeId Id2) { 545 assert(Id1 > 0 && Id2 > 0 && "Expecting offsets greater than 0."); 546 SNodeId LMD1 = S1.getLeftMostDescendant(Id1); 547 SNodeId LMD2 = S2.getLeftMostDescendant(Id2); 548 549 ForestDist[LMD1][LMD2] = 0; 550 for (SNodeId D1 = LMD1 + 1; D1 <= Id1; ++D1) { 551 ForestDist[D1][LMD2] = ForestDist[D1 - 1][LMD2] + DeletionCost; 552 for (SNodeId D2 = LMD2 + 1; D2 <= Id2; ++D2) { 553 ForestDist[LMD1][D2] = ForestDist[LMD1][D2 - 1] + InsertionCost; 554 SNodeId DLMD1 = S1.getLeftMostDescendant(D1); 555 SNodeId DLMD2 = S2.getLeftMostDescendant(D2); 556 if (DLMD1 == LMD1 && DLMD2 == LMD2) { 557 double UpdateCost = getUpdateCost(D1, D2); 558 ForestDist[D1][D2] = 559 std::min({ForestDist[D1 - 1][D2] + DeletionCost, 560 ForestDist[D1][D2 - 1] + InsertionCost, 561 ForestDist[D1 - 1][D2 - 1] + UpdateCost}); 562 TreeDist[D1][D2] = ForestDist[D1][D2]; 563 } else { 564 ForestDist[D1][D2] = 565 std::min({ForestDist[D1 - 1][D2] + DeletionCost, 566 ForestDist[D1][D2 - 1] + InsertionCost, 567 ForestDist[DLMD1][DLMD2] + TreeDist[D1][D2]}); 568 } 569 } 570 } 571 } 572 }; 573 574 namespace { 575 // Compares nodes by their depth. 576 struct HeightLess { 577 const SyntaxTreeImpl &Tree; 578 HeightLess(const SyntaxTreeImpl &Tree) : Tree(Tree) {} 579 bool operator()(NodeId Id1, NodeId Id2) const { 580 return Tree.getNode(Id1).Height < Tree.getNode(Id2).Height; 581 } 582 }; 583 } // end anonymous namespace 584 585 // Priority queue for nodes, sorted descendingly by their height. 586 class PriorityList { 587 const SyntaxTreeImpl &Tree; 588 HeightLess Cmp; 589 std::vector<NodeId> Container; 590 PriorityQueue<NodeId, std::vector<NodeId>, HeightLess> List; 591 592 public: 593 PriorityList(const SyntaxTreeImpl &Tree) 594 : Tree(Tree), Cmp(Tree), List(Cmp, Container) {} 595 596 void push(NodeId id) { List.push(id); } 597 598 std::vector<NodeId> pop() { 599 int Max = peekMax(); 600 std::vector<NodeId> Result; 601 if (Max == 0) 602 return Result; 603 while (peekMax() == Max) { 604 Result.push_back(List.top()); 605 List.pop(); 606 } 607 // TODO this is here to get a stable output, not a good heuristic 608 std::sort(Result.begin(), Result.end()); 609 return Result; 610 } 611 int peekMax() const { 612 if (List.empty()) 613 return 0; 614 return Tree.getNode(List.top()).Height; 615 } 616 void open(NodeId Id) { 617 for (NodeId Child : Tree.getNode(Id).Children) 618 push(Child); 619 } 620 }; 621 622 bool ASTDiff::Impl::isomorphic(NodeId Id1, NodeId Id2) const { 623 const Node &N1 = T1.getNode(Id1); 624 const Node &N2 = T2.getNode(Id2); 625 if (N1.Children.size() != N2.Children.size() || 626 !isMatchingPossible(Id1, Id2) || 627 Options.getNodeDistance(*T1.Parent, N1.ASTNode, *T2.Parent, N2.ASTNode) != 628 0) 629 return false; 630 for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id) 631 if (!isomorphic(N1.Children[Id], N2.Children[Id])) 632 return false; 633 return true; 634 } 635 636 bool ASTDiff::Impl::canBeAddedToMapping(const Mapping &M, NodeId Id1, 637 NodeId Id2) const { 638 assert(isMatchingPossible(Id1, Id2) && 639 "Matching must be possible in the first place."); 640 if (M.hasSrcDst(Id1, Id2)) 641 return false; 642 if (Options.EnableMatchingWithUnmatchableParents) 643 return true; 644 const Node &N1 = T1.getNode(Id1); 645 const Node &N2 = T2.getNode(Id2); 646 NodeId P1 = N1.Parent; 647 NodeId P2 = N2.Parent; 648 // Only allow matching if parents can be matched. 649 return (P1.isInvalid() && P2.isInvalid()) || 650 (P1.isValid() && P2.isValid() && isMatchingPossible(P1, P2)); 651 } 652 653 bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2) const { 654 return Options.isMatchingAllowed(T1.getNode(Id1).ASTNode, 655 T2.getNode(Id2).ASTNode); 656 } 657 658 void ASTDiff::Impl::addIsomorphicSubTrees(Mapping &M, NodeId Id1, 659 NodeId Id2) const { 660 assert(isomorphic(Id1, Id2) && "Can only be called on isomorphic subtrees."); 661 M.link(Id1, Id2); 662 const Node &N1 = T1.getNode(Id1); 663 const Node &N2 = T2.getNode(Id2); 664 for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id) 665 addIsomorphicSubTrees(M, N1.Children[Id], N2.Children[Id]); 666 } 667 668 void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1, 669 NodeId Id2) const { 670 if (std::max(T1.getNumberOfDescendants(Id1), 671 T2.getNumberOfDescendants(Id2)) >= Options.MaxSize) 672 return; 673 ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2); 674 std::vector<std::pair<NodeId, NodeId>> R = Matcher.getMatchingNodes(); 675 for (const auto Tuple : R) { 676 NodeId Src = Tuple.first; 677 NodeId Dst = Tuple.second; 678 if (canBeAddedToMapping(M, Src, Dst)) 679 M.link(Src, Dst); 680 } 681 } 682 683 double ASTDiff::Impl::getSimilarity(const Mapping &M, NodeId Id1, 684 NodeId Id2) const { 685 if (Id1.isInvalid() || Id2.isInvalid()) 686 return 0.0; 687 int CommonDescendants = 0; 688 const Node &N1 = T1.getNode(Id1); 689 for (NodeId Id = Id1 + 1; Id <= N1.RightMostDescendant; ++Id) 690 CommonDescendants += int(T2.isInSubtree(M.getDst(Id), Id2)); 691 return 2.0 * CommonDescendants / 692 (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2)); 693 } 694 695 NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const { 696 NodeId Candidate; 697 double MaxSimilarity = 0.0; 698 for (NodeId Id2 = 0, E = T2.getSize(); Id2 < E; ++Id2) { 699 if (!isMatchingPossible(Id1, Id2)) 700 continue; 701 if (M.hasDst(Id2)) 702 continue; 703 double Similarity = getSimilarity(M, Id1, Id2); 704 if (Similarity > MaxSimilarity) { 705 MaxSimilarity = Similarity; 706 Candidate = Id2; 707 } 708 } 709 return Candidate; 710 } 711 712 void ASTDiff::Impl::matchBottomUp(Mapping &M) const { 713 std::vector<NodeId> Postorder = getSubtreePostorder(T1, T1.root()); 714 for (NodeId Id1 : Postorder) { 715 if (Id1 == T1.root()) { 716 if (isMatchingPossible(T1.root(), T2.root())) { 717 M.link(T1.root(), T2.root()); 718 addOptimalMapping(M, T1.root(), T2.root()); 719 } 720 break; 721 } 722 const Node &N1 = T1.getNode(Id1); 723 bool Matched = M.hasSrc(Id1); 724 bool MatchedChildren = 725 std::any_of(N1.Children.begin(), N1.Children.end(), 726 [&](NodeId Child) { return M.hasSrc(Child); }); 727 if (Matched || !MatchedChildren) 728 continue; 729 NodeId Id2 = findCandidate(M, Id1); 730 if (Id2.isInvalid() || !canBeAddedToMapping(M, Id1, Id2) || 731 getSimilarity(M, Id1, Id2) < Options.MinSimilarity) 732 continue; 733 M.link(Id1, Id2); 734 addOptimalMapping(M, Id1, Id2); 735 } 736 } 737 738 Mapping ASTDiff::Impl::matchTopDown() const { 739 PriorityList L1(T1); 740 PriorityList L2(T2); 741 742 Mapping M(T1.getSize(), T2.getSize()); 743 744 L1.push(T1.root()); 745 L2.push(T2.root()); 746 747 int Max1, Max2; 748 while (std::min(Max1 = L1.peekMax(), Max2 = L2.peekMax()) > 749 Options.MinHeight) { 750 if (Max1 > Max2) { 751 for (NodeId Id : L1.pop()) 752 L1.open(Id); 753 continue; 754 } 755 if (Max2 > Max1) { 756 for (NodeId Id : L2.pop()) 757 L2.open(Id); 758 continue; 759 } 760 std::vector<NodeId> H1, H2; 761 H1 = L1.pop(); 762 H2 = L2.pop(); 763 for (NodeId Id1 : H1) { 764 for (NodeId Id2 : H2) 765 if (isomorphic(Id1, Id2) && canBeAddedToMapping(M, Id1, Id2)) 766 addIsomorphicSubTrees(M, Id1, Id2); 767 } 768 for (NodeId Id1 : H1) { 769 if (!M.hasSrc(Id1)) 770 L1.open(Id1); 771 } 772 for (NodeId Id2 : H2) { 773 if (!M.hasDst(Id2)) 774 L2.open(Id2); 775 } 776 } 777 return M; 778 } 779 780 void ASTDiff::Impl::computeMapping() { 781 if (IsMappingDone) 782 return; 783 TheMapping = matchTopDown(); 784 matchBottomUp(TheMapping); 785 IsMappingDone = true; 786 } 787 788 std::vector<Match> ASTDiff::Impl::getMatches(Mapping &M) { 789 std::vector<Match> Matches; 790 for (NodeId Id1 = 0, Id2, E = T1.getSize(); Id1 < E; ++Id1) 791 if ((Id2 = M.getDst(Id1)).isValid()) 792 Matches.push_back({Id1, Id2}); 793 return Matches; 794 } 795 796 std::vector<Change> ASTDiff::Impl::computeChanges(Mapping &M) { 797 std::vector<Change> Changes; 798 for (NodeId Id2 : getSubtreeBfs(T2, T2.root())) { 799 const Node &N2 = T2.getNode(Id2); 800 NodeId Id1 = M.getSrc(Id2); 801 if (Id1.isValid()) { 802 assert(isMatchingPossible(Id1, Id2) && "Invalid matching."); 803 if (T1.getNodeValueImpl(Id1) != T2.getNodeValueImpl(Id2)) { 804 Changes.emplace_back(Update, Id1, Id2); 805 } 806 continue; 807 } 808 NodeId P2 = N2.Parent; 809 NodeId P1 = M.getSrc(P2); 810 assert(P1.isValid() && 811 "Parents must be matched for determining the change type."); 812 Node &Parent1 = T1.getMutableNode(P1); 813 const Node &Parent2 = T2.getNode(P2); 814 auto &Siblings1 = Parent1.Children; 815 const auto &Siblings2 = Parent2.Children; 816 size_t Position; 817 for (Position = 0; Position < Siblings2.size(); ++Position) 818 if (Siblings2[Position] == Id2 || Position >= Siblings1.size()) 819 break; 820 Changes.emplace_back(Insert, Id2, P2, Position); 821 Node PatchNode; 822 PatchNode.Parent = P1; 823 PatchNode.LeftMostDescendant = N2.LeftMostDescendant; 824 PatchNode.RightMostDescendant = N2.RightMostDescendant; 825 PatchNode.Depth = N2.Depth; 826 PatchNode.ASTNode = N2.ASTNode; 827 // TODO update Depth if needed 828 NodeId PatchNodeId = T1.getSize(); 829 // TODO maybe choose a different data structure for Children. 830 Siblings1.insert(Siblings1.begin() + Position, PatchNodeId); 831 T1.addNode(PatchNode); 832 M.link(PatchNodeId, Id2); 833 } 834 for (NodeId Id1 = 0; Id1 < T1.getSize(); ++Id1) { 835 NodeId Id2 = M.getDst(Id1); 836 if (Id2.isInvalid()) 837 Changes.emplace_back(Delete, Id1, Id2); 838 } 839 return Changes; 840 } 841 842 void ASTDiff::Impl::printChangeImpl(raw_ostream &OS, const Change &Chg) const { 843 switch (Chg.Kind) { 844 case Delete: 845 OS << "Delete "; 846 T1.printNode(OS, Chg.Src); 847 OS << "\n"; 848 break; 849 case Update: 850 OS << "Update "; 851 T1.printNode(OS, Chg.Src); 852 OS << " to " << T2.getNodeValueImpl(Chg.Dst) << "\n"; 853 break; 854 case Insert: 855 OS << "Insert "; 856 T2.printNode(OS, Chg.Src); 857 OS << " into "; 858 T2.printNode(OS, Chg.Dst); 859 OS << " at " << Chg.Position << "\n"; 860 break; 861 case Move: 862 llvm_unreachable("TODO"); 863 break; 864 }; 865 } 866 867 void ASTDiff::Impl::printMatchImpl(raw_ostream &OS, const Match &M) const { 868 OS << "Match "; 869 T1.printNode(OS, M.Src); 870 OS << " to "; 871 T2.printNode(OS, M.Dst); 872 OS << "\n"; 873 } 874 875 ASTDiff::ASTDiff(SyntaxTree &T1, SyntaxTree &T2, 876 const ComparisonOptions &Options) 877 : DiffImpl(llvm::make_unique<Impl>(*T1.TreeImpl, *T2.TreeImpl, Options)) {} 878 879 ASTDiff::~ASTDiff() {} 880 881 SyntaxTree::SyntaxTree(const ASTContext &AST) 882 : TreeImpl(llvm::make_unique<SyntaxTreeImpl>( 883 this, AST.getTranslationUnitDecl(), AST)) {} 884 885 std::vector<Match> ASTDiff::getMatches() { 886 DiffImpl->computeMapping(); 887 return DiffImpl->getMatches(DiffImpl->TheMapping); 888 } 889 890 std::vector<Change> ASTDiff::getChanges() { 891 DiffImpl->computeMapping(); 892 return DiffImpl->computeChanges(DiffImpl->TheMapping); 893 } 894 895 void ASTDiff::printChange(raw_ostream &OS, const Change &Chg) const { 896 DiffImpl->printChangeImpl(OS, Chg); 897 } 898 899 void ASTDiff::printMatch(raw_ostream &OS, const Match &M) const { 900 DiffImpl->printMatchImpl(OS, M); 901 } 902 903 void SyntaxTree::printAsJson(raw_ostream &OS) { TreeImpl->printAsJsonImpl(OS); } 904 905 std::string SyntaxTree::getNodeValue(const DynTypedNode &DTN) const { 906 return TreeImpl->getNodeValueImpl(DTN); 907 } 908 909 } // end namespace diff 910 } // end namespace clang 911