1 //===- ASTDiff.cpp - AST differencing implementation-----------*- C++ -*- -===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains definitons for the AST differencing interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Tooling/ASTDiff/ASTDiff.h"
15 
16 #include "clang/AST/RecursiveASTVisitor.h"
17 #include "clang/Lex/Lexer.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 
20 #include <limits>
21 #include <memory>
22 #include <unordered_set>
23 
24 using namespace llvm;
25 using namespace clang;
26 
27 namespace clang {
28 namespace diff {
29 
30 class ASTDiff::Impl {
31 public:
32   SyntaxTreeImpl &T1, &T2;
33   bool IsMappingDone = false;
34   Mapping TheMapping;
35 
36   Impl(SyntaxTreeImpl &T1, SyntaxTreeImpl &T2, const ComparisonOptions &Options)
37       : T1(T1), T2(T2), Options(Options) {}
38 
39   /// Matches nodes one-by-one based on their similarity.
40   void computeMapping();
41 
42   std::vector<Match> getMatches(Mapping &M);
43 
44   /// Finds an edit script that converts T1 to T2.
45   std::vector<Change> computeChanges(Mapping &M);
46 
47   void printChangeImpl(raw_ostream &OS, const Change &Chg) const;
48   void printMatchImpl(raw_ostream &OS, const Match &M) const;
49 
50   // Returns a mapping of isomorphic subtrees.
51   Mapping matchTopDown() const;
52 
53 private:
54   // Returns true if the two subtrees are identical.
55   bool isomorphic(NodeId Id1, NodeId Id2) const;
56 
57   bool canBeAddedToMapping(const Mapping &M, NodeId Id1, NodeId Id2) const;
58 
59   // Returns false if the nodes must not be mached.
60   bool isMatchingPossible(NodeId Id1, NodeId Id2) const;
61 
62   // Adds all corresponding subtrees of the two nodes to the mapping.
63   // The two nodes must be isomorphic.
64   void addIsomorphicSubTrees(Mapping &M, NodeId Id1, NodeId Id2) const;
65 
66   // Uses an optimal albeit slow algorithm to compute a mapping between two
67   // subtrees, but only if both have fewer nodes than MaxSize.
68   void addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const;
69 
70   // Computes the ratio of common descendants between the two nodes.
71   // Descendants are only considered to be equal when they are mapped in M.
72   double getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const;
73 
74   // Returns the node that has the highest degree of similarity.
75   NodeId findCandidate(const Mapping &M, NodeId Id1) const;
76 
77   // Tries to match any yet unmapped nodes, in a bottom-up fashion.
78   void matchBottomUp(Mapping &M) const;
79 
80   const ComparisonOptions &Options;
81 
82   friend class ZhangShashaMatcher;
83 };
84 
85 template <class T>
86 static bool isNodeExcluded(const SourceManager &SrcMgr, T *N) {
87   if (!N)
88     return true;
89   SourceLocation SLoc = N->getLocStart();
90   return SLoc.isValid() && SrcMgr.isInSystemHeader(SLoc);
91 }
92 
93 namespace {
94 /// Counts the number of nodes that will be compared.
95 struct NodeCountVisitor : public RecursiveASTVisitor<NodeCountVisitor> {
96   int Count = 0;
97   const SyntaxTreeImpl &Root;
98   NodeCountVisitor(const SyntaxTreeImpl &Root) : Root(Root) {}
99   bool TraverseDecl(Decl *D) {
100     if (isNodeExcluded(Root.AST.getSourceManager(), D))
101       return true;
102     ++Count;
103     RecursiveASTVisitor<NodeCountVisitor>::TraverseDecl(D);
104     return true;
105   }
106   bool TraverseStmt(Stmt *S) {
107     if (isNodeExcluded(Root.AST.getSourceManager(), S))
108       return true;
109     ++Count;
110     RecursiveASTVisitor<NodeCountVisitor>::TraverseStmt(S);
111     return true;
112   }
113   bool TraverseType(QualType T) { return true; }
114 };
115 } // end anonymous namespace
116 
117 namespace {
118 // Sets Height, Parent and Children for each node.
119 struct PreorderVisitor : public RecursiveASTVisitor<PreorderVisitor> {
120   int Id = 0, Depth = 0;
121   NodeId Parent;
122   SyntaxTreeImpl &Root;
123 
124   PreorderVisitor(SyntaxTreeImpl &Root) : Root(Root) {}
125 
126   template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) {
127     NodeId MyId = Id;
128     Node &N = Root.getMutableNode(MyId);
129     N.Parent = Parent;
130     N.Depth = Depth;
131     N.ASTNode = DynTypedNode::create(*ASTNode);
132     assert(!N.ASTNode.getNodeKind().isNone() &&
133            "Expected nodes to have a valid kind.");
134     if (Parent.isValid()) {
135       Node &P = Root.getMutableNode(Parent);
136       P.Children.push_back(MyId);
137     }
138     Parent = MyId;
139     ++Id;
140     ++Depth;
141     return std::make_tuple(MyId, Root.getNode(MyId).Parent);
142   }
143   void PostTraverse(std::tuple<NodeId, NodeId> State) {
144     NodeId MyId, PreviousParent;
145     std::tie(MyId, PreviousParent) = State;
146     assert(MyId.isValid() && "Expecting to only traverse valid nodes.");
147     Parent = PreviousParent;
148     --Depth;
149     Node &N = Root.getMutableNode(MyId);
150     N.RightMostDescendant = Id;
151     if (N.isLeaf())
152       Root.Leaves.push_back(MyId);
153     N.Height = 1;
154     for (NodeId Child : N.Children)
155       N.Height = std::max(N.Height, 1 + Root.getNode(Child).Height);
156   }
157   bool TraverseDecl(Decl *D) {
158     if (isNodeExcluded(Root.AST.getSourceManager(), D))
159       return true;
160     auto SavedState = PreTraverse(D);
161     RecursiveASTVisitor<PreorderVisitor>::TraverseDecl(D);
162     PostTraverse(SavedState);
163     return true;
164   }
165   bool TraverseStmt(Stmt *S) {
166     if (isNodeExcluded(Root.AST.getSourceManager(), S))
167       return true;
168     auto SavedState = PreTraverse(S);
169     RecursiveASTVisitor<PreorderVisitor>::TraverseStmt(S);
170     PostTraverse(SavedState);
171     return true;
172   }
173   bool TraverseType(QualType T) { return true; }
174 };
175 } // end anonymous namespace
176 
177 SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, const ASTContext &AST)
178     : SyntaxTreeImpl(Parent, AST.getTranslationUnitDecl(), AST) {}
179 
180 SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, Decl *N,
181                                const ASTContext &AST)
182     : Parent(Parent), AST(AST) {
183   NodeCountVisitor NodeCounter(*this);
184   NodeCounter.TraverseDecl(N);
185   Nodes.resize(NodeCounter.Count);
186   PreorderVisitor PreorderWalker(*this);
187   PreorderWalker.TraverseDecl(N);
188   initTree();
189 }
190 
191 SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, Stmt *N,
192                                const ASTContext &AST)
193     : Parent(Parent), AST(AST) {
194   NodeCountVisitor NodeCounter(*this);
195   NodeCounter.TraverseStmt(N);
196   Nodes.resize(NodeCounter.Count);
197   PreorderVisitor PreorderWalker(*this);
198   PreorderWalker.TraverseStmt(N);
199   initTree();
200 }
201 
202 void SyntaxTreeImpl::initTree() {
203   setLeftMostDescendants();
204   int PostorderId = 0;
205   PostorderIds.resize(getSize());
206   std::function<void(NodeId)> PostorderTraverse = [&](NodeId Id) {
207     for (NodeId Child : getNode(Id).Children)
208       PostorderTraverse(Child);
209     PostorderIds[Id] = PostorderId;
210     ++PostorderId;
211   };
212   PostorderTraverse(root());
213 }
214 
215 void SyntaxTreeImpl::setLeftMostDescendants() {
216   for (NodeId Leaf : Leaves) {
217     getMutableNode(Leaf).LeftMostDescendant = Leaf;
218     NodeId Parent, Cur = Leaf;
219     while ((Parent = getNode(Cur).Parent).isValid() &&
220            getNode(Parent).Children[0] == Cur) {
221       Cur = Parent;
222       getMutableNode(Cur).LeftMostDescendant = Leaf;
223     }
224   }
225 }
226 
227 static std::vector<NodeId> getSubtreePostorder(const SyntaxTreeImpl &Tree,
228                                                NodeId Root) {
229   std::vector<NodeId> Postorder;
230   std::function<void(NodeId)> Traverse = [&](NodeId Id) {
231     const Node &N = Tree.getNode(Id);
232     for (NodeId Child : N.Children)
233       Traverse(Child);
234     Postorder.push_back(Id);
235   };
236   Traverse(Root);
237   return Postorder;
238 }
239 
240 static std::vector<NodeId> getSubtreeBfs(const SyntaxTreeImpl &Tree,
241                                          NodeId Root) {
242   std::vector<NodeId> Ids;
243   size_t Expanded = 0;
244   Ids.push_back(Root);
245   while (Expanded < Ids.size())
246     for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children)
247       Ids.push_back(Child);
248   return Ids;
249 }
250 
251 int SyntaxTreeImpl::getNumberOfDescendants(NodeId Id) const {
252   return getNode(Id).RightMostDescendant - Id + 1;
253 }
254 
255 bool SyntaxTreeImpl::isInSubtree(NodeId Id, NodeId SubtreeRoot) const {
256   NodeId Lower = SubtreeRoot;
257   NodeId Upper = getNode(SubtreeRoot).RightMostDescendant;
258   return Id >= Lower && Id <= Upper;
259 }
260 
261 std::string SyntaxTreeImpl::getNodeValueImpl(NodeId Id) const {
262   return getNodeValueImpl(getNode(Id).ASTNode);
263 }
264 
265 std::string SyntaxTreeImpl::getNodeValueImpl(const DynTypedNode &DTN) const {
266   if (auto *X = DTN.get<BinaryOperator>())
267     return X->getOpcodeStr();
268   if (auto *X = DTN.get<AccessSpecDecl>()) {
269     CharSourceRange Range(X->getSourceRange(), false);
270     return Lexer::getSourceText(Range, AST.getSourceManager(),
271                                 AST.getLangOpts());
272   }
273   if (auto *X = DTN.get<IntegerLiteral>()) {
274     SmallString<256> Str;
275     X->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false);
276     return Str.str();
277   }
278   if (auto *X = DTN.get<StringLiteral>())
279     return X->getString();
280   if (auto *X = DTN.get<ValueDecl>())
281     return X->getNameAsString() + "(" + X->getType().getAsString() + ")";
282   if (DTN.get<DeclStmt>() || DTN.get<TranslationUnitDecl>())
283     return "";
284   std::string Value;
285   if (auto *X = DTN.get<DeclRefExpr>()) {
286     if (X->hasQualifier()) {
287       llvm::raw_string_ostream OS(Value);
288       PrintingPolicy PP(AST.getLangOpts());
289       X->getQualifier()->print(OS, PP);
290     }
291     Value += X->getDecl()->getNameAsString();
292     return Value;
293   }
294   if (auto *X = DTN.get<NamedDecl>())
295     Value += X->getNameAsString() + ";";
296   if (auto *X = DTN.get<TypedefNameDecl>())
297     return Value + X->getUnderlyingType().getAsString() + ";";
298   if (DTN.get<NamespaceDecl>())
299     return Value;
300   if (auto *X = DTN.get<TypeDecl>())
301     if (X->getTypeForDecl())
302       Value +=
303           X->getTypeForDecl()->getCanonicalTypeInternal().getAsString() + ";";
304   if (DTN.get<Decl>())
305     return Value;
306   if (DTN.get<Stmt>())
307     return "";
308   llvm_unreachable("Fatal: unhandled AST node.\n");
309 }
310 
311 void SyntaxTreeImpl::printTree() const { printTree(root()); }
312 void SyntaxTreeImpl::printTree(NodeId Root) const {
313   printTree(llvm::outs(), Root);
314 }
315 
316 void SyntaxTreeImpl::printTree(raw_ostream &OS, NodeId Root) const {
317   const Node &N = getNode(Root);
318   for (int I = 0; I < N.Depth; ++I)
319     OS << " ";
320   printNode(OS, Root);
321   OS << "\n";
322   for (NodeId Child : N.Children)
323     printTree(OS, Child);
324 }
325 
326 void SyntaxTreeImpl::printNode(raw_ostream &OS, NodeId Id) const {
327   if (Id.isInvalid()) {
328     OS << "None";
329     return;
330   }
331   OS << getNode(Id).getTypeLabel();
332   if (getNodeValueImpl(Id) != "")
333     OS << ": " << getNodeValueImpl(Id);
334   OS << "(" << PostorderIds[Id] << ")";
335 }
336 
337 void SyntaxTreeImpl::printNodeAsJson(raw_ostream &OS, NodeId Id) const {
338   auto N = getNode(Id);
339   OS << R"({"type":")" << N.getTypeLabel() << R"(")";
340   if (getNodeValueImpl(Id) != "")
341     OS << R"(,"value":")" << getNodeValueImpl(Id) << R"(")";
342   OS << R"(,"children":[)";
343   if (N.Children.size() > 0) {
344     printNodeAsJson(OS, N.Children[0]);
345     for (size_t I = 1, E = N.Children.size(); I < E; ++I) {
346       OS << ",";
347       printNodeAsJson(OS, N.Children[I]);
348     }
349   }
350   OS << "]}";
351 }
352 
353 void SyntaxTreeImpl::printAsJsonImpl(raw_ostream &OS) const {
354   OS << R"({"root":)";
355   printNodeAsJson(OS, root());
356   OS << "}\n";
357 }
358 
359 /// Identifies a node in a subtree by its postorder offset, starting at 1.
360 struct SNodeId {
361   int Id = 0;
362 
363   explicit SNodeId(int Id) : Id(Id) {}
364   explicit SNodeId() = default;
365 
366   operator int() const { return Id; }
367   SNodeId &operator++() { return ++Id, *this; }
368   SNodeId &operator--() { return --Id, *this; }
369   SNodeId operator+(int Other) const { return SNodeId(Id + Other); }
370 };
371 
372 class Subtree {
373 private:
374   /// The parent tree.
375   const SyntaxTreeImpl &Tree;
376   /// Maps SNodeIds to original ids.
377   std::vector<NodeId> RootIds;
378   /// Maps subtree nodes to their leftmost descendants wtihin the subtree.
379   std::vector<SNodeId> LeftMostDescendants;
380 
381 public:
382   std::vector<SNodeId> KeyRoots;
383 
384   Subtree(const SyntaxTreeImpl &Tree, NodeId SubtreeRoot) : Tree(Tree) {
385     RootIds = getSubtreePostorder(Tree, SubtreeRoot);
386     int NumLeaves = setLeftMostDescendants();
387     computeKeyRoots(NumLeaves);
388   }
389   int getSize() const { return RootIds.size(); }
390   NodeId getIdInRoot(SNodeId Id) const {
391     assert(Id > 0 && Id <= getSize() && "Invalid subtree node index.");
392     return RootIds[Id - 1];
393   }
394   const Node &getNode(SNodeId Id) const {
395     return Tree.getNode(getIdInRoot(Id));
396   }
397   SNodeId getLeftMostDescendant(SNodeId Id) const {
398     assert(Id > 0 && Id <= getSize() && "Invalid subtree node index.");
399     return LeftMostDescendants[Id - 1];
400   }
401   /// Returns the postorder index of the leftmost descendant in the subtree.
402   NodeId getPostorderOffset() const {
403     return Tree.PostorderIds[getIdInRoot(SNodeId(1))];
404   }
405 
406 private:
407   /// Returns the number of leafs in the subtree.
408   int setLeftMostDescendants() {
409     int NumLeaves = 0;
410     LeftMostDescendants.resize(getSize());
411     for (int I = 0; I < getSize(); ++I) {
412       SNodeId SI(I + 1);
413       const Node &N = getNode(SI);
414       NumLeaves += N.isLeaf();
415       assert(I == Tree.PostorderIds[getIdInRoot(SI)] - getPostorderOffset() &&
416              "Postorder traversal in subtree should correspond to traversal in "
417              "the root tree by a constant offset.");
418       LeftMostDescendants[I] = SNodeId(Tree.PostorderIds[N.LeftMostDescendant] -
419                                        getPostorderOffset());
420     }
421     return NumLeaves;
422   }
423   void computeKeyRoots(int Leaves) {
424     KeyRoots.resize(Leaves);
425     std::unordered_set<int> Visited;
426     int K = Leaves - 1;
427     for (SNodeId I(getSize()); I > 0; --I) {
428       SNodeId LeftDesc = getLeftMostDescendant(I);
429       if (Visited.count(LeftDesc))
430         continue;
431       assert(K >= 0 && "K should be non-negative");
432       KeyRoots[K] = I;
433       Visited.insert(LeftDesc);
434       --K;
435     }
436   }
437 };
438 
439 /// Implementation of Zhang and Shasha's Algorithm for tree edit distance.
440 /// Computes an optimal mapping between two trees using only insertion,
441 /// deletion and update as edit actions (similar to the Levenshtein distance).
442 class ZhangShashaMatcher {
443   const ASTDiff::Impl &DiffImpl;
444   Subtree S1;
445   Subtree S2;
446   std::unique_ptr<std::unique_ptr<double[]>[]> TreeDist, ForestDist;
447 
448 public:
449   ZhangShashaMatcher(const ASTDiff::Impl &DiffImpl, const SyntaxTreeImpl &T1,
450                      const SyntaxTreeImpl &T2, NodeId Id1, NodeId Id2)
451       : DiffImpl(DiffImpl), S1(T1, Id1), S2(T2, Id2) {
452     TreeDist = llvm::make_unique<std::unique_ptr<double[]>[]>(
453         size_t(S1.getSize()) + 1);
454     ForestDist = llvm::make_unique<std::unique_ptr<double[]>[]>(
455         size_t(S1.getSize()) + 1);
456     for (int I = 0, E = S1.getSize() + 1; I < E; ++I) {
457       TreeDist[I] = llvm::make_unique<double[]>(size_t(S2.getSize()) + 1);
458       ForestDist[I] = llvm::make_unique<double[]>(size_t(S2.getSize()) + 1);
459     }
460   }
461 
462   std::vector<std::pair<NodeId, NodeId>> getMatchingNodes() {
463     std::vector<std::pair<NodeId, NodeId>> Matches;
464     std::vector<std::pair<SNodeId, SNodeId>> TreePairs;
465 
466     computeTreeDist();
467 
468     bool RootNodePair = true;
469 
470     TreePairs.emplace_back(SNodeId(S1.getSize()), SNodeId(S2.getSize()));
471 
472     while (!TreePairs.empty()) {
473       SNodeId LastRow, LastCol, FirstRow, FirstCol, Row, Col;
474       std::tie(LastRow, LastCol) = TreePairs.back();
475       TreePairs.pop_back();
476 
477       if (!RootNodePair) {
478         computeForestDist(LastRow, LastCol);
479       }
480 
481       RootNodePair = false;
482 
483       FirstRow = S1.getLeftMostDescendant(LastRow);
484       FirstCol = S2.getLeftMostDescendant(LastCol);
485 
486       Row = LastRow;
487       Col = LastCol;
488 
489       while (Row > FirstRow || Col > FirstCol) {
490         if (Row > FirstRow &&
491             ForestDist[Row - 1][Col] + 1 == ForestDist[Row][Col]) {
492           --Row;
493         } else if (Col > FirstCol &&
494                    ForestDist[Row][Col - 1] + 1 == ForestDist[Row][Col]) {
495           --Col;
496         } else {
497           SNodeId LMD1 = S1.getLeftMostDescendant(Row);
498           SNodeId LMD2 = S2.getLeftMostDescendant(Col);
499           if (LMD1 == S1.getLeftMostDescendant(LastRow) &&
500               LMD2 == S2.getLeftMostDescendant(LastCol)) {
501             NodeId Id1 = S1.getIdInRoot(Row);
502             NodeId Id2 = S2.getIdInRoot(Col);
503             assert(DiffImpl.isMatchingPossible(Id1, Id2) &&
504                    "These nodes must not be matched.");
505             Matches.emplace_back(Id1, Id2);
506             --Row;
507             --Col;
508           } else {
509             TreePairs.emplace_back(Row, Col);
510             Row = LMD1;
511             Col = LMD2;
512           }
513         }
514       }
515     }
516     return Matches;
517   }
518 
519 private:
520   /// Simple cost model for edit actions.
521   /// The values range between 0 and 1, or infinity if this edit action should
522   /// always be avoided.
523 
524   /// These costs could be modified to better model the estimated cost of /
525   /// inserting / deleting the current node.
526   static constexpr double DeletionCost = 1;
527   static constexpr double InsertionCost = 1;
528 
529   double getUpdateCost(SNodeId Id1, SNodeId Id2) {
530     const DynTypedNode &DTN1 = S1.getNode(Id1).ASTNode,
531                        &DTN2 = S2.getNode(Id2).ASTNode;
532     if (!DiffImpl.Options.isMatchingAllowed(DTN1, DTN2))
533       return std::numeric_limits<double>::max();
534     return DiffImpl.Options.getNodeDistance(*DiffImpl.T1.Parent, DTN1,
535                                             *DiffImpl.T2.Parent, DTN2);
536   }
537 
538   void computeTreeDist() {
539     for (SNodeId Id1 : S1.KeyRoots)
540       for (SNodeId Id2 : S2.KeyRoots)
541         computeForestDist(Id1, Id2);
542   }
543 
544   void computeForestDist(SNodeId Id1, SNodeId Id2) {
545     assert(Id1 > 0 && Id2 > 0 && "Expecting offsets greater than 0.");
546     SNodeId LMD1 = S1.getLeftMostDescendant(Id1);
547     SNodeId LMD2 = S2.getLeftMostDescendant(Id2);
548 
549     ForestDist[LMD1][LMD2] = 0;
550     for (SNodeId D1 = LMD1 + 1; D1 <= Id1; ++D1) {
551       ForestDist[D1][LMD2] = ForestDist[D1 - 1][LMD2] + DeletionCost;
552       for (SNodeId D2 = LMD2 + 1; D2 <= Id2; ++D2) {
553         ForestDist[LMD1][D2] = ForestDist[LMD1][D2 - 1] + InsertionCost;
554         SNodeId DLMD1 = S1.getLeftMostDescendant(D1);
555         SNodeId DLMD2 = S2.getLeftMostDescendant(D2);
556         if (DLMD1 == LMD1 && DLMD2 == LMD2) {
557           double UpdateCost = getUpdateCost(D1, D2);
558           ForestDist[D1][D2] =
559               std::min({ForestDist[D1 - 1][D2] + DeletionCost,
560                         ForestDist[D1][D2 - 1] + InsertionCost,
561                         ForestDist[D1 - 1][D2 - 1] + UpdateCost});
562           TreeDist[D1][D2] = ForestDist[D1][D2];
563         } else {
564           ForestDist[D1][D2] =
565               std::min({ForestDist[D1 - 1][D2] + DeletionCost,
566                         ForestDist[D1][D2 - 1] + InsertionCost,
567                         ForestDist[DLMD1][DLMD2] + TreeDist[D1][D2]});
568         }
569       }
570     }
571   }
572 };
573 
574 namespace {
575 // Compares nodes by their depth.
576 struct HeightLess {
577   const SyntaxTreeImpl &Tree;
578   HeightLess(const SyntaxTreeImpl &Tree) : Tree(Tree) {}
579   bool operator()(NodeId Id1, NodeId Id2) const {
580     return Tree.getNode(Id1).Height < Tree.getNode(Id2).Height;
581   }
582 };
583 } // end anonymous namespace
584 
585 // Priority queue for nodes, sorted descendingly by their height.
586 class PriorityList {
587   const SyntaxTreeImpl &Tree;
588   HeightLess Cmp;
589   std::vector<NodeId> Container;
590   PriorityQueue<NodeId, std::vector<NodeId>, HeightLess> List;
591 
592 public:
593   PriorityList(const SyntaxTreeImpl &Tree)
594       : Tree(Tree), Cmp(Tree), List(Cmp, Container) {}
595 
596   void push(NodeId id) { List.push(id); }
597 
598   std::vector<NodeId> pop() {
599     int Max = peekMax();
600     std::vector<NodeId> Result;
601     if (Max == 0)
602       return Result;
603     while (peekMax() == Max) {
604       Result.push_back(List.top());
605       List.pop();
606     }
607     // TODO this is here to get a stable output, not a good heuristic
608     std::sort(Result.begin(), Result.end());
609     return Result;
610   }
611   int peekMax() const {
612     if (List.empty())
613       return 0;
614     return Tree.getNode(List.top()).Height;
615   }
616   void open(NodeId Id) {
617     for (NodeId Child : Tree.getNode(Id).Children)
618       push(Child);
619   }
620 };
621 
622 bool ASTDiff::Impl::isomorphic(NodeId Id1, NodeId Id2) const {
623   const Node &N1 = T1.getNode(Id1);
624   const Node &N2 = T2.getNode(Id2);
625   if (N1.Children.size() != N2.Children.size() ||
626       !isMatchingPossible(Id1, Id2) ||
627       Options.getNodeDistance(*T1.Parent, N1.ASTNode, *T2.Parent, N2.ASTNode) !=
628           0)
629     return false;
630   for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id)
631     if (!isomorphic(N1.Children[Id], N2.Children[Id]))
632       return false;
633   return true;
634 }
635 
636 bool ASTDiff::Impl::canBeAddedToMapping(const Mapping &M, NodeId Id1,
637                                         NodeId Id2) const {
638   assert(isMatchingPossible(Id1, Id2) &&
639          "Matching must be possible in the first place.");
640   if (M.hasSrcDst(Id1, Id2))
641     return false;
642   if (Options.EnableMatchingWithUnmatchableParents)
643     return true;
644   const Node &N1 = T1.getNode(Id1);
645   const Node &N2 = T2.getNode(Id2);
646   NodeId P1 = N1.Parent;
647   NodeId P2 = N2.Parent;
648   // Only allow matching if parents can be matched.
649   return (P1.isInvalid() && P2.isInvalid()) ||
650          (P1.isValid() && P2.isValid() && isMatchingPossible(P1, P2));
651 }
652 
653 bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2) const {
654   return Options.isMatchingAllowed(T1.getNode(Id1).ASTNode,
655                                    T2.getNode(Id2).ASTNode);
656 }
657 
658 void ASTDiff::Impl::addIsomorphicSubTrees(Mapping &M, NodeId Id1,
659                                           NodeId Id2) const {
660   assert(isomorphic(Id1, Id2) && "Can only be called on isomorphic subtrees.");
661   M.link(Id1, Id2);
662   const Node &N1 = T1.getNode(Id1);
663   const Node &N2 = T2.getNode(Id2);
664   for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id)
665     addIsomorphicSubTrees(M, N1.Children[Id], N2.Children[Id]);
666 }
667 
668 void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1,
669                                       NodeId Id2) const {
670   if (std::max(T1.getNumberOfDescendants(Id1),
671                T2.getNumberOfDescendants(Id2)) >= Options.MaxSize)
672     return;
673   ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2);
674   std::vector<std::pair<NodeId, NodeId>> R = Matcher.getMatchingNodes();
675   for (const auto Tuple : R) {
676     NodeId Src = Tuple.first;
677     NodeId Dst = Tuple.second;
678     if (canBeAddedToMapping(M, Src, Dst))
679       M.link(Src, Dst);
680   }
681 }
682 
683 double ASTDiff::Impl::getSimilarity(const Mapping &M, NodeId Id1,
684                                     NodeId Id2) const {
685   if (Id1.isInvalid() || Id2.isInvalid())
686     return 0.0;
687   int CommonDescendants = 0;
688   const Node &N1 = T1.getNode(Id1);
689   for (NodeId Id = Id1 + 1; Id <= N1.RightMostDescendant; ++Id)
690     CommonDescendants += int(T2.isInSubtree(M.getDst(Id), Id2));
691   return 2.0 * CommonDescendants /
692          (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2));
693 }
694 
695 NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const {
696   NodeId Candidate;
697   double MaxSimilarity = 0.0;
698   for (NodeId Id2 = 0, E = T2.getSize(); Id2 < E; ++Id2) {
699     if (!isMatchingPossible(Id1, Id2))
700       continue;
701     if (M.hasDst(Id2))
702       continue;
703     double Similarity = getSimilarity(M, Id1, Id2);
704     if (Similarity > MaxSimilarity) {
705       MaxSimilarity = Similarity;
706       Candidate = Id2;
707     }
708   }
709   return Candidate;
710 }
711 
712 void ASTDiff::Impl::matchBottomUp(Mapping &M) const {
713   std::vector<NodeId> Postorder = getSubtreePostorder(T1, T1.root());
714   for (NodeId Id1 : Postorder) {
715     if (Id1 == T1.root()) {
716       if (isMatchingPossible(T1.root(), T2.root())) {
717         M.link(T1.root(), T2.root());
718         addOptimalMapping(M, T1.root(), T2.root());
719       }
720       break;
721     }
722     const Node &N1 = T1.getNode(Id1);
723     bool Matched = M.hasSrc(Id1);
724     bool MatchedChildren =
725         std::any_of(N1.Children.begin(), N1.Children.end(),
726                     [&](NodeId Child) { return M.hasSrc(Child); });
727     if (Matched || !MatchedChildren)
728       continue;
729     NodeId Id2 = findCandidate(M, Id1);
730     if (Id2.isInvalid() || !canBeAddedToMapping(M, Id1, Id2) ||
731         getSimilarity(M, Id1, Id2) < Options.MinSimilarity)
732       continue;
733     M.link(Id1, Id2);
734     addOptimalMapping(M, Id1, Id2);
735   }
736 }
737 
738 Mapping ASTDiff::Impl::matchTopDown() const {
739   PriorityList L1(T1);
740   PriorityList L2(T2);
741 
742   Mapping M(T1.getSize(), T2.getSize());
743 
744   L1.push(T1.root());
745   L2.push(T2.root());
746 
747   int Max1, Max2;
748   while (std::min(Max1 = L1.peekMax(), Max2 = L2.peekMax()) >
749          Options.MinHeight) {
750     if (Max1 > Max2) {
751       for (NodeId Id : L1.pop())
752         L1.open(Id);
753       continue;
754     }
755     if (Max2 > Max1) {
756       for (NodeId Id : L2.pop())
757         L2.open(Id);
758       continue;
759     }
760     std::vector<NodeId> H1, H2;
761     H1 = L1.pop();
762     H2 = L2.pop();
763     for (NodeId Id1 : H1) {
764       for (NodeId Id2 : H2)
765         if (isomorphic(Id1, Id2) && canBeAddedToMapping(M, Id1, Id2))
766           addIsomorphicSubTrees(M, Id1, Id2);
767     }
768     for (NodeId Id1 : H1) {
769       if (!M.hasSrc(Id1))
770         L1.open(Id1);
771     }
772     for (NodeId Id2 : H2) {
773       if (!M.hasDst(Id2))
774         L2.open(Id2);
775     }
776   }
777   return M;
778 }
779 
780 void ASTDiff::Impl::computeMapping() {
781   if (IsMappingDone)
782     return;
783   TheMapping = matchTopDown();
784   matchBottomUp(TheMapping);
785   IsMappingDone = true;
786 }
787 
788 std::vector<Match> ASTDiff::Impl::getMatches(Mapping &M) {
789   std::vector<Match> Matches;
790   for (NodeId Id1 = 0, Id2, E = T1.getSize(); Id1 < E; ++Id1)
791     if ((Id2 = M.getDst(Id1)).isValid())
792       Matches.push_back({Id1, Id2});
793   return Matches;
794 }
795 
796 std::vector<Change> ASTDiff::Impl::computeChanges(Mapping &M) {
797   std::vector<Change> Changes;
798   for (NodeId Id2 : getSubtreeBfs(T2, T2.root())) {
799     const Node &N2 = T2.getNode(Id2);
800     NodeId Id1 = M.getSrc(Id2);
801     if (Id1.isValid()) {
802       assert(isMatchingPossible(Id1, Id2) && "Invalid matching.");
803       if (T1.getNodeValueImpl(Id1) != T2.getNodeValueImpl(Id2)) {
804         Changes.emplace_back(Update, Id1, Id2);
805       }
806       continue;
807     }
808     NodeId P2 = N2.Parent;
809     NodeId P1 = M.getSrc(P2);
810     assert(P1.isValid() &&
811            "Parents must be matched for determining the change type.");
812     Node &Parent1 = T1.getMutableNode(P1);
813     const Node &Parent2 = T2.getNode(P2);
814     auto &Siblings1 = Parent1.Children;
815     const auto &Siblings2 = Parent2.Children;
816     size_t Position;
817     for (Position = 0; Position < Siblings2.size(); ++Position)
818       if (Siblings2[Position] == Id2 || Position >= Siblings1.size())
819         break;
820     Changes.emplace_back(Insert, Id2, P2, Position);
821     Node PatchNode;
822     PatchNode.Parent = P1;
823     PatchNode.LeftMostDescendant = N2.LeftMostDescendant;
824     PatchNode.RightMostDescendant = N2.RightMostDescendant;
825     PatchNode.Depth = N2.Depth;
826     PatchNode.ASTNode = N2.ASTNode;
827     // TODO update Depth if needed
828     NodeId PatchNodeId = T1.getSize();
829     // TODO maybe choose a different data structure for Children.
830     Siblings1.insert(Siblings1.begin() + Position, PatchNodeId);
831     T1.addNode(PatchNode);
832     M.link(PatchNodeId, Id2);
833   }
834   for (NodeId Id1 = 0; Id1 < T1.getSize(); ++Id1) {
835     NodeId Id2 = M.getDst(Id1);
836     if (Id2.isInvalid())
837       Changes.emplace_back(Delete, Id1, Id2);
838   }
839   return Changes;
840 }
841 
842 void ASTDiff::Impl::printChangeImpl(raw_ostream &OS, const Change &Chg) const {
843   switch (Chg.Kind) {
844   case Delete:
845     OS << "Delete ";
846     T1.printNode(OS, Chg.Src);
847     OS << "\n";
848     break;
849   case Update:
850     OS << "Update ";
851     T1.printNode(OS, Chg.Src);
852     OS << " to " << T2.getNodeValueImpl(Chg.Dst) << "\n";
853     break;
854   case Insert:
855     OS << "Insert ";
856     T2.printNode(OS, Chg.Src);
857     OS << " into ";
858     T2.printNode(OS, Chg.Dst);
859     OS << " at " << Chg.Position << "\n";
860     break;
861   case Move:
862     llvm_unreachable("TODO");
863     break;
864   };
865 }
866 
867 void ASTDiff::Impl::printMatchImpl(raw_ostream &OS, const Match &M) const {
868   OS << "Match ";
869   T1.printNode(OS, M.Src);
870   OS << " to ";
871   T2.printNode(OS, M.Dst);
872   OS << "\n";
873 }
874 
875 ASTDiff::ASTDiff(SyntaxTree &T1, SyntaxTree &T2,
876                  const ComparisonOptions &Options)
877     : DiffImpl(llvm::make_unique<Impl>(*T1.TreeImpl, *T2.TreeImpl, Options)) {}
878 
879 ASTDiff::~ASTDiff() {}
880 
881 SyntaxTree::SyntaxTree(const ASTContext &AST)
882     : TreeImpl(llvm::make_unique<SyntaxTreeImpl>(
883           this, AST.getTranslationUnitDecl(), AST)) {}
884 
885 std::vector<Match> ASTDiff::getMatches() {
886   DiffImpl->computeMapping();
887   return DiffImpl->getMatches(DiffImpl->TheMapping);
888 }
889 
890 std::vector<Change> ASTDiff::getChanges() {
891   DiffImpl->computeMapping();
892   return DiffImpl->computeChanges(DiffImpl->TheMapping);
893 }
894 
895 void ASTDiff::printChange(raw_ostream &OS, const Change &Chg) const {
896   DiffImpl->printChangeImpl(OS, Chg);
897 }
898 
899 void ASTDiff::printMatch(raw_ostream &OS, const Match &M) const {
900   DiffImpl->printMatchImpl(OS, M);
901 }
902 
903 void SyntaxTree::printAsJson(raw_ostream &OS) { TreeImpl->printAsJsonImpl(OS); }
904 
905 std::string SyntaxTree::getNodeValue(const DynTypedNode &DTN) const {
906   return TreeImpl->getNodeValueImpl(DTN);
907 }
908 
909 } // end namespace diff
910 } // end namespace clang
911