1 //===- ASTDiff.cpp - AST differencing implementation-----------*- C++ -*- -===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains definitons for the AST differencing interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Tooling/ASTDiff/ASTDiff.h"
15 
16 #include "clang/AST/RecursiveASTVisitor.h"
17 #include "clang/Lex/Lexer.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 
20 #include <limits>
21 #include <memory>
22 #include <unordered_set>
23 
24 using namespace llvm;
25 using namespace clang;
26 
27 namespace clang {
28 namespace diff {
29 
30 namespace {
31 /// Maps nodes of the left tree to ones on the right, and vice versa.
32 class Mapping {
33 public:
34   Mapping() = default;
35   Mapping(Mapping &&Other) = default;
36   Mapping &operator=(Mapping &&Other) = default;
37   Mapping(int Size1, int Size2) {
38     // Maximum possible size after patching one tree.
39     int Size = Size1 + Size2;
40     SrcToDst = llvm::make_unique<SmallVector<NodeId, 2>[]>(Size);
41     DstToSrc = llvm::make_unique<SmallVector<NodeId, 2>[]>(Size);
42   }
43 
44   void link(NodeId Src, NodeId Dst) {
45     SrcToDst[Src].push_back(Dst);
46     DstToSrc[Dst].push_back(Src);
47   }
48 
49   NodeId getDst(NodeId Src) const {
50     if (hasSrc(Src))
51       return SrcToDst[Src][0];
52     return NodeId();
53   }
54   NodeId getSrc(NodeId Dst) const {
55     if (hasDst(Dst))
56       return DstToSrc[Dst][0];
57     return NodeId();
58   }
59   const SmallVector<NodeId, 2> &getAllDsts(NodeId Src) const {
60     return SrcToDst[Src];
61   }
62   const SmallVector<NodeId, 2> &getAllSrcs(NodeId Dst) const {
63     return DstToSrc[Dst];
64   }
65   bool hasSrc(NodeId Src) const { return !SrcToDst[Src].empty(); }
66   bool hasDst(NodeId Dst) const { return !DstToSrc[Dst].empty(); }
67   bool hasSrcDst(NodeId Src, NodeId Dst) const {
68     for (NodeId DstId : SrcToDst[Src])
69       if (DstId == Dst)
70         return true;
71     for (NodeId SrcId : DstToSrc[Dst])
72       if (SrcId == Src)
73         return true;
74     return false;
75   }
76 
77 private:
78   std::unique_ptr<SmallVector<NodeId, 2>[]> SrcToDst, DstToSrc;
79 };
80 } // end anonymous namespace
81 
82 class ASTDiff::Impl {
83 public:
84   SyntaxTree::Impl &T1, &T2;
85   Mapping TheMapping;
86 
87   Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2,
88        const ComparisonOptions &Options);
89 
90   /// Matches nodes one-by-one based on their similarity.
91   void computeMapping();
92 
93   // Compute Change for each node based on similarity.
94   void computeChangeKinds(Mapping &M);
95 
96   NodeId getMapped(const std::unique_ptr<SyntaxTree::Impl> &Tree,
97                    NodeId Id) const {
98     if (&*Tree == &T1)
99       return TheMapping.getDst(Id);
100     assert(&*Tree == &T2 && "Invalid tree.");
101     return TheMapping.getSrc(Id);
102   }
103 
104 private:
105   // Returns true if the two subtrees are identical.
106   bool identical(NodeId Id1, NodeId Id2) const;
107 
108   bool canBeAddedToMapping(const Mapping &M, NodeId Id1, NodeId Id2) const;
109 
110   // Returns false if the nodes must not be mached.
111   bool isMatchingPossible(NodeId Id1, NodeId Id2) const;
112 
113   // Returns true if the nodes' parents are matched.
114   bool haveSameParents(const Mapping &M, NodeId Id1, NodeId Id2) const;
115 
116   // Uses an optimal albeit slow algorithm to compute a mapping between two
117   // subtrees, but only if both have fewer nodes than MaxSize.
118   void addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const;
119 
120   // Computes the ratio of common descendants between the two nodes.
121   // Descendants are only considered to be equal when they are mapped in M.
122   double getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const;
123 
124   // Returns the node that has the highest degree of similarity.
125   NodeId findCandidate(const Mapping &M, NodeId Id1) const;
126 
127   // Returns a mapping of identical subtrees.
128   Mapping matchTopDown() const;
129 
130   // Tries to match any yet unmapped nodes, in a bottom-up fashion.
131   void matchBottomUp(Mapping &M) const;
132 
133   const ComparisonOptions &Options;
134 
135   friend class ZhangShashaMatcher;
136 };
137 
138 /// Represents the AST of a TranslationUnit.
139 class SyntaxTree::Impl {
140 public:
141   /// Constructs a tree from the entire translation unit.
142   Impl(SyntaxTree *Parent, const ASTContext &AST);
143   /// Constructs a tree from an AST node.
144   Impl(SyntaxTree *Parent, Decl *N, const ASTContext &AST);
145   Impl(SyntaxTree *Parent, Stmt *N, const ASTContext &AST);
146   template <class T>
147   Impl(SyntaxTree *Parent,
148        typename std::enable_if<std::is_base_of<Stmt, T>::value, T>::type *Node,
149        const ASTContext &AST)
150       : Impl(Parent, dyn_cast<Stmt>(Node), AST) {}
151   template <class T>
152   Impl(SyntaxTree *Parent,
153        typename std::enable_if<std::is_base_of<Decl, T>::value, T>::type *Node,
154        const ASTContext &AST)
155       : Impl(Parent, dyn_cast<Decl>(Node), AST) {}
156 
157   SyntaxTree *Parent;
158   const ASTContext &AST;
159   std::vector<NodeId> Leaves;
160   // Maps preorder indices to postorder ones.
161   std::vector<int> PostorderIds;
162   std::vector<NodeId> NodesBfs;
163 
164   int getSize() const { return Nodes.size(); }
165   NodeId getRootId() const { return 0; }
166   PreorderIterator begin() const { return getRootId(); }
167   PreorderIterator end() const { return getSize(); }
168 
169   const Node &getNode(NodeId Id) const { return Nodes[Id]; }
170   Node &getMutableNode(NodeId Id) { return Nodes[Id]; }
171   bool isValidNodeId(NodeId Id) const { return Id >= 0 && Id < getSize(); }
172   void addNode(Node &N) { Nodes.push_back(N); }
173   int getNumberOfDescendants(NodeId Id) const;
174   bool isInSubtree(NodeId Id, NodeId SubtreeRoot) const;
175   int findPositionInParent(NodeId Id, bool Shifted = false) const;
176 
177   std::string getNodeValue(NodeId Id) const;
178   std::string getNodeValue(const Node &Node) const;
179 
180 private:
181   /// Nodes in preorder.
182   std::vector<Node> Nodes;
183 
184   void initTree();
185   void setLeftMostDescendants();
186 };
187 
188 template <class T>
189 static bool isNodeExcluded(const SourceManager &SrcMgr, T *N) {
190   if (!N)
191     return true;
192   SourceLocation SLoc = N->getLocStart();
193   return SLoc.isValid() && SrcMgr.isInSystemHeader(SLoc);
194 }
195 
196 namespace {
197 /// Counts the number of nodes that will be compared.
198 struct NodeCountVisitor : public RecursiveASTVisitor<NodeCountVisitor> {
199   int Count = 0;
200   const SyntaxTree::Impl &Tree;
201   NodeCountVisitor(const SyntaxTree::Impl &Tree) : Tree(Tree) {}
202   bool TraverseDecl(Decl *D) {
203     if (isNodeExcluded(Tree.AST.getSourceManager(), D))
204       return true;
205     ++Count;
206     RecursiveASTVisitor<NodeCountVisitor>::TraverseDecl(D);
207     return true;
208   }
209   bool TraverseStmt(Stmt *S) {
210     if (isNodeExcluded(Tree.AST.getSourceManager(), S))
211       return true;
212     ++Count;
213     RecursiveASTVisitor<NodeCountVisitor>::TraverseStmt(S);
214     return true;
215   }
216   bool TraverseType(QualType T) { return true; }
217 };
218 } // end anonymous namespace
219 
220 namespace {
221 // Sets Height, Parent and Children for each node.
222 struct PreorderVisitor : public RecursiveASTVisitor<PreorderVisitor> {
223   int Id = 0, Depth = 0;
224   NodeId Parent;
225   SyntaxTree::Impl &Tree;
226 
227   PreorderVisitor(SyntaxTree::Impl &Tree) : Tree(Tree) {}
228 
229   template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) {
230     NodeId MyId = Id;
231     Node &N = Tree.getMutableNode(MyId);
232     N.Parent = Parent;
233     N.Depth = Depth;
234     N.ASTNode = DynTypedNode::create(*ASTNode);
235     assert(!N.ASTNode.getNodeKind().isNone() &&
236            "Expected nodes to have a valid kind.");
237     if (Parent.isValid()) {
238       Node &P = Tree.getMutableNode(Parent);
239       P.Children.push_back(MyId);
240     }
241     Parent = MyId;
242     ++Id;
243     ++Depth;
244     return std::make_tuple(MyId, Tree.getNode(MyId).Parent);
245   }
246   void PostTraverse(std::tuple<NodeId, NodeId> State) {
247     NodeId MyId, PreviousParent;
248     std::tie(MyId, PreviousParent) = State;
249     assert(MyId.isValid() && "Expecting to only traverse valid nodes.");
250     Parent = PreviousParent;
251     --Depth;
252     Node &N = Tree.getMutableNode(MyId);
253     N.RightMostDescendant = Id - 1;
254     assert(N.RightMostDescendant >= 0 &&
255            N.RightMostDescendant < Tree.getSize() &&
256            "Rightmost descendant must be a valid tree node.");
257     if (N.isLeaf())
258       Tree.Leaves.push_back(MyId);
259     N.Height = 1;
260     for (NodeId Child : N.Children)
261       N.Height = std::max(N.Height, 1 + Tree.getNode(Child).Height);
262   }
263   bool TraverseDecl(Decl *D) {
264     if (isNodeExcluded(Tree.AST.getSourceManager(), D))
265       return true;
266     auto SavedState = PreTraverse(D);
267     RecursiveASTVisitor<PreorderVisitor>::TraverseDecl(D);
268     PostTraverse(SavedState);
269     return true;
270   }
271   bool TraverseStmt(Stmt *S) {
272     if (isNodeExcluded(Tree.AST.getSourceManager(), S))
273       return true;
274     auto SavedState = PreTraverse(S);
275     RecursiveASTVisitor<PreorderVisitor>::TraverseStmt(S);
276     PostTraverse(SavedState);
277     return true;
278   }
279   bool TraverseType(QualType T) { return true; }
280 };
281 } // end anonymous namespace
282 
283 SyntaxTree::Impl::Impl(SyntaxTree *Parent, const ASTContext &AST)
284     : Impl(Parent, AST.getTranslationUnitDecl(), AST) {}
285 
286 SyntaxTree::Impl::Impl(SyntaxTree *Parent, Decl *N, const ASTContext &AST)
287     : Parent(Parent), AST(AST) {
288   NodeCountVisitor NodeCounter(*this);
289   NodeCounter.TraverseDecl(N);
290   Nodes.resize(NodeCounter.Count);
291   PreorderVisitor PreorderWalker(*this);
292   PreorderWalker.TraverseDecl(N);
293   initTree();
294 }
295 
296 SyntaxTree::Impl::Impl(SyntaxTree *Parent, Stmt *N, const ASTContext &AST)
297     : Parent(Parent), AST(AST) {
298   NodeCountVisitor NodeCounter(*this);
299   NodeCounter.TraverseStmt(N);
300   Nodes.resize(NodeCounter.Count);
301   PreorderVisitor PreorderWalker(*this);
302   PreorderWalker.TraverseStmt(N);
303   initTree();
304 }
305 
306 static std::vector<NodeId> getSubtreePostorder(const SyntaxTree::Impl &Tree,
307                                                NodeId Root) {
308   std::vector<NodeId> Postorder;
309   std::function<void(NodeId)> Traverse = [&](NodeId Id) {
310     const Node &N = Tree.getNode(Id);
311     for (NodeId Child : N.Children)
312       Traverse(Child);
313     Postorder.push_back(Id);
314   };
315   Traverse(Root);
316   return Postorder;
317 }
318 
319 static std::vector<NodeId> getSubtreeBfs(const SyntaxTree::Impl &Tree,
320                                          NodeId Root) {
321   std::vector<NodeId> Ids;
322   size_t Expanded = 0;
323   Ids.push_back(Root);
324   while (Expanded < Ids.size())
325     for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children)
326       Ids.push_back(Child);
327   return Ids;
328 }
329 
330 void SyntaxTree::Impl::initTree() {
331   setLeftMostDescendants();
332   int PostorderId = 0;
333   PostorderIds.resize(getSize());
334   std::function<void(NodeId)> PostorderTraverse = [&](NodeId Id) {
335     for (NodeId Child : getNode(Id).Children)
336       PostorderTraverse(Child);
337     PostorderIds[Id] = PostorderId;
338     ++PostorderId;
339   };
340   PostorderTraverse(getRootId());
341   NodesBfs = getSubtreeBfs(*this, getRootId());
342 }
343 
344 void SyntaxTree::Impl::setLeftMostDescendants() {
345   for (NodeId Leaf : Leaves) {
346     getMutableNode(Leaf).LeftMostDescendant = Leaf;
347     NodeId Parent, Cur = Leaf;
348     while ((Parent = getNode(Cur).Parent).isValid() &&
349            getNode(Parent).Children[0] == Cur) {
350       Cur = Parent;
351       getMutableNode(Cur).LeftMostDescendant = Leaf;
352     }
353   }
354 }
355 
356 int SyntaxTree::Impl::getNumberOfDescendants(NodeId Id) const {
357   return getNode(Id).RightMostDescendant - Id + 1;
358 }
359 
360 bool SyntaxTree::Impl::isInSubtree(NodeId Id, NodeId SubtreeRoot) const {
361   return Id >= SubtreeRoot && Id <= getNode(SubtreeRoot).RightMostDescendant;
362 }
363 
364 int SyntaxTree::Impl::findPositionInParent(NodeId Id, bool Shifted) const {
365   NodeId Parent = getNode(Id).Parent;
366   if (Parent.isInvalid())
367     return 0;
368   const auto &Siblings = getNode(Parent).Children;
369   int Position = 0;
370   for (size_t I = 0, E = Siblings.size(); I < E; ++I) {
371     if (Shifted)
372       Position += getNode(Siblings[I]).Shift;
373     if (Siblings[I] == Id) {
374       Position += I;
375       return Position;
376     }
377   }
378   llvm_unreachable("Node not found in parent's children.");
379 }
380 
381 std::string SyntaxTree::Impl::getNodeValue(NodeId Id) const {
382   return getNodeValue(getNode(Id));
383 }
384 
385 std::string SyntaxTree::Impl::getNodeValue(const Node &N) const {
386   const DynTypedNode &DTN = N.ASTNode;
387   if (auto *X = DTN.get<BinaryOperator>())
388     return X->getOpcodeStr();
389   if (auto *X = DTN.get<AccessSpecDecl>()) {
390     CharSourceRange Range(X->getSourceRange(), false);
391     return Lexer::getSourceText(Range, AST.getSourceManager(),
392                                 AST.getLangOpts());
393   }
394   if (auto *X = DTN.get<IntegerLiteral>()) {
395     SmallString<256> Str;
396     X->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false);
397     return Str.str();
398   }
399   if (auto *X = DTN.get<StringLiteral>())
400     return X->getString();
401   if (auto *X = DTN.get<ValueDecl>())
402     return X->getNameAsString() + "(" + X->getType().getAsString() + ")";
403   if (DTN.get<DeclStmt>() || DTN.get<TranslationUnitDecl>())
404     return "";
405   std::string Value;
406   if (auto *X = DTN.get<DeclRefExpr>()) {
407     if (X->hasQualifier()) {
408       llvm::raw_string_ostream OS(Value);
409       PrintingPolicy PP(AST.getLangOpts());
410       X->getQualifier()->print(OS, PP);
411     }
412     Value += X->getDecl()->getNameAsString();
413     return Value;
414   }
415   if (auto *X = DTN.get<NamedDecl>())
416     Value += X->getNameAsString() + ";";
417   if (auto *X = DTN.get<TypedefNameDecl>())
418     return Value + X->getUnderlyingType().getAsString() + ";";
419   if (DTN.get<NamespaceDecl>())
420     return Value;
421   if (auto *X = DTN.get<TypeDecl>())
422     if (X->getTypeForDecl())
423       Value +=
424           X->getTypeForDecl()->getCanonicalTypeInternal().getAsString() + ";";
425   if (DTN.get<Decl>())
426     return Value;
427   if (DTN.get<Stmt>())
428     return "";
429   llvm_unreachable("Fatal: unhandled AST node.\n");
430 }
431 
432 /// Identifies a node in a subtree by its postorder offset, starting at 1.
433 struct SNodeId {
434   int Id = 0;
435 
436   explicit SNodeId(int Id) : Id(Id) {}
437   explicit SNodeId() = default;
438 
439   operator int() const { return Id; }
440   SNodeId &operator++() { return ++Id, *this; }
441   SNodeId &operator--() { return --Id, *this; }
442   SNodeId operator+(int Other) const { return SNodeId(Id + Other); }
443 };
444 
445 class Subtree {
446 private:
447   /// The parent tree.
448   const SyntaxTree::Impl &Tree;
449   /// Maps SNodeIds to original ids.
450   std::vector<NodeId> RootIds;
451   /// Maps subtree nodes to their leftmost descendants wtihin the subtree.
452   std::vector<SNodeId> LeftMostDescendants;
453 
454 public:
455   std::vector<SNodeId> KeyRoots;
456 
457   Subtree(const SyntaxTree::Impl &Tree, NodeId SubtreeRoot) : Tree(Tree) {
458     RootIds = getSubtreePostorder(Tree, SubtreeRoot);
459     int NumLeaves = setLeftMostDescendants();
460     computeKeyRoots(NumLeaves);
461   }
462   int getSize() const { return RootIds.size(); }
463   NodeId getIdInRoot(SNodeId Id) const {
464     assert(Id > 0 && Id <= getSize() && "Invalid subtree node index.");
465     return RootIds[Id - 1];
466   }
467   const Node &getNode(SNodeId Id) const {
468     return Tree.getNode(getIdInRoot(Id));
469   }
470   SNodeId getLeftMostDescendant(SNodeId Id) const {
471     assert(Id > 0 && Id <= getSize() && "Invalid subtree node index.");
472     return LeftMostDescendants[Id - 1];
473   }
474   /// Returns the postorder index of the leftmost descendant in the subtree.
475   NodeId getPostorderOffset() const {
476     return Tree.PostorderIds[getIdInRoot(SNodeId(1))];
477   }
478   std::string getNodeValue(SNodeId Id) const {
479     return Tree.getNodeValue(getIdInRoot(Id));
480   }
481 
482 private:
483   /// Returns the number of leafs in the subtree.
484   int setLeftMostDescendants() {
485     int NumLeaves = 0;
486     LeftMostDescendants.resize(getSize());
487     for (int I = 0; I < getSize(); ++I) {
488       SNodeId SI(I + 1);
489       const Node &N = getNode(SI);
490       NumLeaves += N.isLeaf();
491       assert(I == Tree.PostorderIds[getIdInRoot(SI)] - getPostorderOffset() &&
492              "Postorder traversal in subtree should correspond to traversal in "
493              "the root tree by a constant offset.");
494       LeftMostDescendants[I] = SNodeId(Tree.PostorderIds[N.LeftMostDescendant] -
495                                        getPostorderOffset());
496     }
497     return NumLeaves;
498   }
499   void computeKeyRoots(int Leaves) {
500     KeyRoots.resize(Leaves);
501     std::unordered_set<int> Visited;
502     int K = Leaves - 1;
503     for (SNodeId I(getSize()); I > 0; --I) {
504       SNodeId LeftDesc = getLeftMostDescendant(I);
505       if (Visited.count(LeftDesc))
506         continue;
507       assert(K >= 0 && "K should be non-negative");
508       KeyRoots[K] = I;
509       Visited.insert(LeftDesc);
510       --K;
511     }
512   }
513 };
514 
515 /// Implementation of Zhang and Shasha's Algorithm for tree edit distance.
516 /// Computes an optimal mapping between two trees using only insertion,
517 /// deletion and update as edit actions (similar to the Levenshtein distance).
518 class ZhangShashaMatcher {
519   const ASTDiff::Impl &DiffImpl;
520   Subtree S1;
521   Subtree S2;
522   std::unique_ptr<std::unique_ptr<double[]>[]> TreeDist, ForestDist;
523 
524 public:
525   ZhangShashaMatcher(const ASTDiff::Impl &DiffImpl, const SyntaxTree::Impl &T1,
526                      const SyntaxTree::Impl &T2, NodeId Id1, NodeId Id2)
527       : DiffImpl(DiffImpl), S1(T1, Id1), S2(T2, Id2) {
528     TreeDist = llvm::make_unique<std::unique_ptr<double[]>[]>(
529         size_t(S1.getSize()) + 1);
530     ForestDist = llvm::make_unique<std::unique_ptr<double[]>[]>(
531         size_t(S1.getSize()) + 1);
532     for (int I = 0, E = S1.getSize() + 1; I < E; ++I) {
533       TreeDist[I] = llvm::make_unique<double[]>(size_t(S2.getSize()) + 1);
534       ForestDist[I] = llvm::make_unique<double[]>(size_t(S2.getSize()) + 1);
535     }
536   }
537 
538   std::vector<std::pair<NodeId, NodeId>> getMatchingNodes() {
539     std::vector<std::pair<NodeId, NodeId>> Matches;
540     std::vector<std::pair<SNodeId, SNodeId>> TreePairs;
541 
542     computeTreeDist();
543 
544     bool RootNodePair = true;
545 
546     TreePairs.emplace_back(SNodeId(S1.getSize()), SNodeId(S2.getSize()));
547 
548     while (!TreePairs.empty()) {
549       SNodeId LastRow, LastCol, FirstRow, FirstCol, Row, Col;
550       std::tie(LastRow, LastCol) = TreePairs.back();
551       TreePairs.pop_back();
552 
553       if (!RootNodePair) {
554         computeForestDist(LastRow, LastCol);
555       }
556 
557       RootNodePair = false;
558 
559       FirstRow = S1.getLeftMostDescendant(LastRow);
560       FirstCol = S2.getLeftMostDescendant(LastCol);
561 
562       Row = LastRow;
563       Col = LastCol;
564 
565       while (Row > FirstRow || Col > FirstCol) {
566         if (Row > FirstRow &&
567             ForestDist[Row - 1][Col] + 1 == ForestDist[Row][Col]) {
568           --Row;
569         } else if (Col > FirstCol &&
570                    ForestDist[Row][Col - 1] + 1 == ForestDist[Row][Col]) {
571           --Col;
572         } else {
573           SNodeId LMD1 = S1.getLeftMostDescendant(Row);
574           SNodeId LMD2 = S2.getLeftMostDescendant(Col);
575           if (LMD1 == S1.getLeftMostDescendant(LastRow) &&
576               LMD2 == S2.getLeftMostDescendant(LastCol)) {
577             NodeId Id1 = S1.getIdInRoot(Row);
578             NodeId Id2 = S2.getIdInRoot(Col);
579             assert(DiffImpl.isMatchingPossible(Id1, Id2) &&
580                    "These nodes must not be matched.");
581             Matches.emplace_back(Id1, Id2);
582             --Row;
583             --Col;
584           } else {
585             TreePairs.emplace_back(Row, Col);
586             Row = LMD1;
587             Col = LMD2;
588           }
589         }
590       }
591     }
592     return Matches;
593   }
594 
595 private:
596   /// We use a simple cost model for edit actions, which seems good enough.
597   /// Simple cost model for edit actions. This seems to make the matching
598   /// algorithm perform reasonably well.
599   /// The values range between 0 and 1, or infinity if this edit action should
600   /// always be avoided.
601   static constexpr double DeletionCost = 1;
602   static constexpr double InsertionCost = 1;
603 
604   double getUpdateCost(SNodeId Id1, SNodeId Id2) {
605     if (!DiffImpl.isMatchingPossible(S1.getIdInRoot(Id1), S2.getIdInRoot(Id2)))
606       return std::numeric_limits<double>::max();
607     return S1.getNodeValue(Id1) != S2.getNodeValue(Id2);
608   }
609 
610   void computeTreeDist() {
611     for (SNodeId Id1 : S1.KeyRoots)
612       for (SNodeId Id2 : S2.KeyRoots)
613         computeForestDist(Id1, Id2);
614   }
615 
616   void computeForestDist(SNodeId Id1, SNodeId Id2) {
617     assert(Id1 > 0 && Id2 > 0 && "Expecting offsets greater than 0.");
618     SNodeId LMD1 = S1.getLeftMostDescendant(Id1);
619     SNodeId LMD2 = S2.getLeftMostDescendant(Id2);
620 
621     ForestDist[LMD1][LMD2] = 0;
622     for (SNodeId D1 = LMD1 + 1; D1 <= Id1; ++D1) {
623       ForestDist[D1][LMD2] = ForestDist[D1 - 1][LMD2] + DeletionCost;
624       for (SNodeId D2 = LMD2 + 1; D2 <= Id2; ++D2) {
625         ForestDist[LMD1][D2] = ForestDist[LMD1][D2 - 1] + InsertionCost;
626         SNodeId DLMD1 = S1.getLeftMostDescendant(D1);
627         SNodeId DLMD2 = S2.getLeftMostDescendant(D2);
628         if (DLMD1 == LMD1 && DLMD2 == LMD2) {
629           double UpdateCost = getUpdateCost(D1, D2);
630           ForestDist[D1][D2] =
631               std::min({ForestDist[D1 - 1][D2] + DeletionCost,
632                         ForestDist[D1][D2 - 1] + InsertionCost,
633                         ForestDist[D1 - 1][D2 - 1] + UpdateCost});
634           TreeDist[D1][D2] = ForestDist[D1][D2];
635         } else {
636           ForestDist[D1][D2] =
637               std::min({ForestDist[D1 - 1][D2] + DeletionCost,
638                         ForestDist[D1][D2 - 1] + InsertionCost,
639                         ForestDist[DLMD1][DLMD2] + TreeDist[D1][D2]});
640         }
641       }
642     }
643   }
644 };
645 
646 ast_type_traits::ASTNodeKind Node::getType() const {
647   return ASTNode.getNodeKind();
648 }
649 
650 StringRef Node::getTypeLabel() const { return getType().asStringRef(); }
651 
652 namespace {
653 // Compares nodes by their depth.
654 struct HeightLess {
655   const SyntaxTree::Impl &Tree;
656   HeightLess(const SyntaxTree::Impl &Tree) : Tree(Tree) {}
657   bool operator()(NodeId Id1, NodeId Id2) const {
658     return Tree.getNode(Id1).Height < Tree.getNode(Id2).Height;
659   }
660 };
661 } // end anonymous namespace
662 
663 namespace {
664 // Priority queue for nodes, sorted descendingly by their height.
665 class PriorityList {
666   const SyntaxTree::Impl &Tree;
667   HeightLess Cmp;
668   std::vector<NodeId> Container;
669   PriorityQueue<NodeId, std::vector<NodeId>, HeightLess> List;
670 
671 public:
672   PriorityList(const SyntaxTree::Impl &Tree)
673       : Tree(Tree), Cmp(Tree), List(Cmp, Container) {}
674 
675   void push(NodeId id) { List.push(id); }
676 
677   std::vector<NodeId> pop() {
678     int Max = peekMax();
679     std::vector<NodeId> Result;
680     if (Max == 0)
681       return Result;
682     while (peekMax() == Max) {
683       Result.push_back(List.top());
684       List.pop();
685     }
686     // TODO this is here to get a stable output, not a good heuristic
687     std::sort(Result.begin(), Result.end());
688     return Result;
689   }
690   int peekMax() const {
691     if (List.empty())
692       return 0;
693     return Tree.getNode(List.top()).Height;
694   }
695   void open(NodeId Id) {
696     for (NodeId Child : Tree.getNode(Id).Children)
697       push(Child);
698   }
699 };
700 } // end anonymous namespace
701 
702 bool ASTDiff::Impl::identical(NodeId Id1, NodeId Id2) const {
703   const Node &N1 = T1.getNode(Id1);
704   const Node &N2 = T2.getNode(Id2);
705   if (N1.Children.size() != N2.Children.size() ||
706       !isMatchingPossible(Id1, Id2) ||
707       T1.getNodeValue(Id1) != T2.getNodeValue(Id2))
708     return false;
709   for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id)
710     if (!identical(N1.Children[Id], N2.Children[Id]))
711       return false;
712   return true;
713 }
714 
715 bool ASTDiff::Impl::canBeAddedToMapping(const Mapping &M, NodeId Id1,
716                                         NodeId Id2) const {
717   assert(isMatchingPossible(Id1, Id2) &&
718          "Matching must be possible in the first place.");
719   if (M.hasSrcDst(Id1, Id2))
720     return false;
721   if (Options.EnableMatchingWithUnmatchableParents)
722     return true;
723   const Node &N1 = T1.getNode(Id1);
724   const Node &N2 = T2.getNode(Id2);
725   NodeId P1 = N1.Parent;
726   NodeId P2 = N2.Parent;
727   // Only allow matching if parents can be matched.
728   return (P1.isInvalid() && P2.isInvalid()) ||
729          (P1.isValid() && P2.isValid() && isMatchingPossible(P1, P2));
730 }
731 
732 bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2) const {
733   return Options.isMatchingAllowed(T1.getNode(Id1), T2.getNode(Id2));
734 }
735 
736 bool ASTDiff::Impl::haveSameParents(const Mapping &M, NodeId Id1,
737                                     NodeId Id2) const {
738   NodeId P1 = T1.getNode(Id1).Parent;
739   NodeId P2 = T2.getNode(Id2).Parent;
740   return (P1.isInvalid() && P2.isInvalid()) ||
741          (P1.isValid() && P2.isValid() && M.getDst(P1) == P2);
742 }
743 
744 void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1,
745                                       NodeId Id2) const {
746   if (std::max(T1.getNumberOfDescendants(Id1),
747                T2.getNumberOfDescendants(Id2)) >= Options.MaxSize)
748     return;
749   ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2);
750   std::vector<std::pair<NodeId, NodeId>> R = Matcher.getMatchingNodes();
751   for (const auto Tuple : R) {
752     NodeId Src = Tuple.first;
753     NodeId Dst = Tuple.second;
754     if (canBeAddedToMapping(M, Src, Dst))
755       M.link(Src, Dst);
756   }
757 }
758 
759 double ASTDiff::Impl::getSimilarity(const Mapping &M, NodeId Id1,
760                                     NodeId Id2) const {
761   if (Id1.isInvalid() || Id2.isInvalid())
762     return 0.0;
763   int CommonDescendants = 0;
764   const Node &N1 = T1.getNode(Id1);
765   for (NodeId Id = Id1 + 1; Id <= N1.RightMostDescendant; ++Id)
766     CommonDescendants += int(T2.isInSubtree(M.getDst(Id), Id2));
767   return 2.0 * CommonDescendants /
768          (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2));
769 }
770 
771 NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const {
772   NodeId Candidate;
773   double HighestSimilarity = 0.0;
774   for (NodeId Id2 : T2) {
775     if (!isMatchingPossible(Id1, Id2))
776       continue;
777     if (M.hasDst(Id2))
778       continue;
779     double Similarity = getSimilarity(M, Id1, Id2);
780     if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity) {
781       HighestSimilarity = Similarity;
782       Candidate = Id2;
783     }
784   }
785   return Candidate;
786 }
787 
788 void ASTDiff::Impl::matchBottomUp(Mapping &M) const {
789   std::vector<NodeId> Postorder = getSubtreePostorder(T1, T1.getRootId());
790   for (NodeId Id1 : Postorder) {
791     if (Id1 == T1.getRootId() && !M.hasSrc(T1.getRootId()) &&
792         !M.hasDst(T2.getRootId())) {
793       if (isMatchingPossible(T1.getRootId(), T2.getRootId())) {
794         M.link(T1.getRootId(), T2.getRootId());
795         addOptimalMapping(M, T1.getRootId(), T2.getRootId());
796       }
797       break;
798     }
799     const Node &N1 = T1.getNode(Id1);
800     bool Matched = M.hasSrc(Id1);
801     bool MatchedChildren =
802         std::any_of(N1.Children.begin(), N1.Children.end(),
803                     [&](NodeId Child) { return M.hasSrc(Child); });
804     if (Matched || !MatchedChildren)
805       continue;
806     NodeId Id2 = findCandidate(M, Id1);
807     if (Id2.isValid() && canBeAddedToMapping(M, Id1, Id2)) {
808       M.link(Id1, Id2);
809       addOptimalMapping(M, Id1, Id2);
810     }
811   }
812 }
813 
814 Mapping ASTDiff::Impl::matchTopDown() const {
815   PriorityList L1(T1);
816   PriorityList L2(T2);
817 
818   Mapping M(T1.getSize(), T2.getSize());
819 
820   L1.push(T1.getRootId());
821   L2.push(T2.getRootId());
822 
823   int Max1, Max2;
824   while (std::min(Max1 = L1.peekMax(), Max2 = L2.peekMax()) >
825          Options.MinHeight) {
826     if (Max1 > Max2) {
827       for (NodeId Id : L1.pop())
828         L1.open(Id);
829       continue;
830     }
831     if (Max2 > Max1) {
832       for (NodeId Id : L2.pop())
833         L2.open(Id);
834       continue;
835     }
836     std::vector<NodeId> H1, H2;
837     H1 = L1.pop();
838     H2 = L2.pop();
839     for (NodeId Id1 : H1) {
840       for (NodeId Id2 : H2) {
841         if (identical(Id1, Id2) && canBeAddedToMapping(M, Id1, Id2)) {
842           for (int I = 0, E = T1.getNumberOfDescendants(Id1); I < E; ++I)
843             M.link(Id1 + I, Id2 + I);
844         }
845       }
846     }
847     for (NodeId Id1 : H1) {
848       if (!M.hasSrc(Id1))
849         L1.open(Id1);
850     }
851     for (NodeId Id2 : H2) {
852       if (!M.hasDst(Id2))
853         L2.open(Id2);
854     }
855   }
856   return M;
857 }
858 
859 ASTDiff::Impl::Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2,
860                     const ComparisonOptions &Options)
861     : T1(T1), T2(T2), Options(Options) {
862   computeMapping();
863   computeChangeKinds(TheMapping);
864 }
865 
866 void ASTDiff::Impl::computeMapping() {
867   TheMapping = matchTopDown();
868   matchBottomUp(TheMapping);
869 }
870 
871 void ASTDiff::Impl::computeChangeKinds(Mapping &M) {
872   for (NodeId Id1 : T1) {
873     if (!M.hasSrc(Id1)) {
874       T1.getMutableNode(Id1).Change = Delete;
875       T1.getMutableNode(Id1).Shift -= 1;
876     }
877   }
878   for (NodeId Id2 : T2) {
879     if (!M.hasDst(Id2)) {
880       T2.getMutableNode(Id2).Change = Insert;
881       T2.getMutableNode(Id2).Shift -= 1;
882     }
883   }
884   for (NodeId Id1 : T1.NodesBfs) {
885     NodeId Id2 = M.getDst(Id1);
886     if (Id2.isInvalid())
887       continue;
888     if (!haveSameParents(M, Id1, Id2) ||
889         T1.findPositionInParent(Id1, true) !=
890             T2.findPositionInParent(Id2, true)) {
891       T1.getMutableNode(Id1).Shift -= 1;
892       T2.getMutableNode(Id2).Shift -= 1;
893     }
894   }
895   for (NodeId Id2 : T2.NodesBfs) {
896     NodeId Id1 = M.getSrc(Id2);
897     if (Id1.isInvalid())
898       continue;
899     Node &N1 = T1.getMutableNode(Id1);
900     Node &N2 = T2.getMutableNode(Id2);
901     if (Id1.isInvalid())
902       continue;
903     if (!haveSameParents(M, Id1, Id2) ||
904         T1.findPositionInParent(Id1, true) !=
905             T2.findPositionInParent(Id2, true)) {
906       N1.Change = N2.Change = Move;
907     }
908     if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) {
909       N1.Change = N2.Change = (N1.Change == Move ? UpdateMove : Update);
910     }
911   }
912 }
913 
914 ASTDiff::ASTDiff(SyntaxTree &T1, SyntaxTree &T2,
915                  const ComparisonOptions &Options)
916     : DiffImpl(llvm::make_unique<Impl>(*T1.TreeImpl, *T2.TreeImpl, Options)) {}
917 
918 ASTDiff::~ASTDiff() = default;
919 
920 NodeId ASTDiff::getMapped(const SyntaxTree &SourceTree, NodeId Id) const {
921   return DiffImpl->getMapped(SourceTree.TreeImpl, Id);
922 }
923 
924 SyntaxTree::SyntaxTree(const ASTContext &AST)
925     : TreeImpl(llvm::make_unique<SyntaxTree::Impl>(
926           this, AST.getTranslationUnitDecl(), AST)) {}
927 
928 SyntaxTree::~SyntaxTree() = default;
929 
930 const ASTContext &SyntaxTree::getASTContext() const { return TreeImpl->AST; }
931 
932 const Node &SyntaxTree::getNode(NodeId Id) const {
933   return TreeImpl->getNode(Id);
934 }
935 
936 int SyntaxTree::getSize() const { return TreeImpl->getSize(); }
937 NodeId SyntaxTree::getRootId() const { return TreeImpl->getRootId(); }
938 SyntaxTree::PreorderIterator SyntaxTree::begin() const {
939   return TreeImpl->begin();
940 }
941 SyntaxTree::PreorderIterator SyntaxTree::end() const { return TreeImpl->end(); }
942 
943 int SyntaxTree::findPositionInParent(NodeId Id) const {
944   return TreeImpl->findPositionInParent(Id);
945 }
946 
947 std::pair<unsigned, unsigned>
948 SyntaxTree::getSourceRangeOffsets(const Node &N) const {
949   const SourceManager &SrcMgr = TreeImpl->AST.getSourceManager();
950   SourceRange Range = N.ASTNode.getSourceRange();
951   SourceLocation BeginLoc = Range.getBegin();
952   SourceLocation EndLoc = Lexer::getLocForEndOfToken(
953       Range.getEnd(), /*Offset=*/0, SrcMgr, TreeImpl->AST.getLangOpts());
954   if (auto *ThisExpr = N.ASTNode.get<CXXThisExpr>()) {
955     if (ThisExpr->isImplicit())
956       EndLoc = BeginLoc;
957   }
958   unsigned Begin = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(BeginLoc));
959   unsigned End = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(EndLoc));
960   return {Begin, End};
961 }
962 
963 std::string SyntaxTree::getNodeValue(NodeId Id) const {
964   return TreeImpl->getNodeValue(Id);
965 }
966 
967 std::string SyntaxTree::getNodeValue(const Node &N) const {
968   return TreeImpl->getNodeValue(N);
969 }
970 
971 } // end namespace diff
972 } // end namespace clang
973