1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/BuildTree.h"
9 #include "clang/AST/ASTFwd.h"
10 #include "clang/AST/Decl.h"
11 #include "clang/AST/DeclBase.h"
12 #include "clang/AST/DeclCXX.h"
13 #include "clang/AST/DeclarationName.h"
14 #include "clang/AST/RecursiveASTVisitor.h"
15 #include "clang/AST/Stmt.h"
16 #include "clang/AST/TypeLoc.h"
17 #include "clang/AST/TypeLocVisitor.h"
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/Specifiers.h"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Lex/Lexer.h"
24 #include "clang/Tooling/Syntax/Nodes.h"
25 #include "clang/Tooling/Syntax/Tokens.h"
26 #include "clang/Tooling/Syntax/Tree.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/PointerUnion.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/ScopeExit.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/Support/Allocator.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/FormatVariadic.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cstddef>
40 #include <map>
41 
42 using namespace clang;
43 
44 LLVM_ATTRIBUTE_UNUSED
45 static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; }
46 
47 static SourceLocation getQualifiedNameStart(DeclaratorDecl *D) {
48   auto DN = D->getDeclName();
49   bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo();
50   if (IsAnonymous)
51     return SourceLocation();
52   return D->getQualifierLoc() ? D->getQualifierLoc().getBeginLoc()
53                               : D->getLocation();
54 }
55 
56 namespace {
57 /// Get start location of the Declarator from the TypeLoc.
58 /// E.g.:
59 ///   loc of `(` in `int (a)`
60 ///   loc of `*` in `int *(a)`
61 ///   loc of the first `(` in `int (*a)(int)`
62 ///   loc of the `*` in `int *(a)(int)`
63 ///   loc of the first `*` in `const int *const *volatile a;`
64 ///
65 /// It is non-trivial to get the start location because TypeLocs are stored
66 /// inside out. In the example above `*volatile` is the TypeLoc returned
67 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()`
68 /// returns.
69 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> {
70   SourceLocation VisitParenTypeLoc(ParenTypeLoc T) {
71     auto L = Visit(T.getInnerLoc());
72     if (L.isValid())
73       return L;
74     return T.getLParenLoc();
75   }
76 
77   // Types spelled in the prefix part of the declarator.
78   SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) {
79     return HandlePointer(T);
80   }
81 
82   SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) {
83     return HandlePointer(T);
84   }
85 
86   SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) {
87     return HandlePointer(T);
88   }
89 
90   SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) {
91     return HandlePointer(T);
92   }
93 
94   SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) {
95     return HandlePointer(T);
96   }
97 
98   // All other cases are not important, as they are either part of declaration
99   // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on
100   // existing declarators (e.g. QualifiedTypeLoc). They cannot start the
101   // declarator themselves, but their underlying type can.
102   SourceLocation VisitTypeLoc(TypeLoc T) {
103     auto N = T.getNextTypeLoc();
104     if (!N)
105       return SourceLocation();
106     return Visit(N);
107   }
108 
109   SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) {
110     if (T.getTypePtr()->hasTrailingReturn())
111       return SourceLocation(); // avoid recursing into the suffix of declarator.
112     return VisitTypeLoc(T);
113   }
114 
115 private:
116   template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) {
117     auto L = Visit(T.getPointeeLoc());
118     if (L.isValid())
119       return L;
120     return T.getLocalSourceRange().getBegin();
121   }
122 };
123 } // namespace
124 
125 /// Gets the range of declarator as defined by the C++ grammar. E.g.
126 ///     `int a;` -> range of `a`,
127 ///     `int *a;` -> range of `*a`,
128 ///     `int a[10];` -> range of `a[10]`,
129 ///     `int a[1][2][3];` -> range of `a[1][2][3]`,
130 ///     `int *a = nullptr` -> range of `*a = nullptr`.
131 /// FIMXE: \p Name must be a source range, e.g. for `operator+`.
132 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T,
133                                       SourceLocation Name,
134                                       SourceRange Initializer) {
135   SourceLocation Start = GetStartLoc().Visit(T);
136   SourceLocation End = T.getSourceRange().getEnd();
137   assert(End.isValid());
138   if (Name.isValid()) {
139     if (Start.isInvalid())
140       Start = Name;
141     if (SM.isBeforeInTranslationUnit(End, Name))
142       End = Name;
143   }
144   if (Initializer.isValid()) {
145     assert(SM.isBeforeInTranslationUnit(End, Initializer.getEnd()));
146     End = Initializer.getEnd();
147   }
148   return SourceRange(Start, End);
149 }
150 
151 namespace {
152 /// All AST hierarchy roots that can be represented as pointers.
153 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>;
154 /// Maintains a mapping from AST to syntax tree nodes. This class will get more
155 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs.
156 /// FIXME: expose this as public API.
157 class ASTToSyntaxMapping {
158 public:
159   void add(ASTPtr From, syntax::Tree *To) {
160     assert(To != nullptr);
161     assert(!From.isNull());
162 
163     bool Added = Nodes.insert({From, To}).second;
164     (void)Added;
165     assert(Added && "mapping added twice");
166   }
167 
168   syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); }
169 
170 private:
171   llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes;
172 };
173 } // namespace
174 
175 /// A helper class for constructing the syntax tree while traversing a clang
176 /// AST.
177 ///
178 /// At each point of the traversal we maintain a list of pending nodes.
179 /// Initially all tokens are added as pending nodes. When processing a clang AST
180 /// node, the clients need to:
181 ///   - create a corresponding syntax node,
182 ///   - assign roles to all pending child nodes with 'markChild' and
183 ///     'markChildToken',
184 ///   - replace the child nodes with the new syntax node in the pending list
185 ///     with 'foldNode'.
186 ///
187 /// Note that all children are expected to be processed when building a node.
188 ///
189 /// Call finalize() to finish building the tree and consume the root node.
190 class syntax::TreeBuilder {
191 public:
192   TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {
193     for (const auto &T : Arena.tokenBuffer().expandedTokens())
194       LocationToToken.insert({T.location().getRawEncoding(), &T});
195   }
196 
197   llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); }
198   const SourceManager &sourceManager() const { return Arena.sourceManager(); }
199 
200   /// Populate children for \p New node, assuming it covers tokens from \p
201   /// Range.
202   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,
203                 ASTPtr From) {
204     assert(New);
205     Pending.foldChildren(Arena, Range, New);
206     if (From)
207       Mapping.add(From, New);
208   }
209   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,
210                 TypeLoc L) {
211     // FIXME: add mapping for TypeLocs
212     foldNode(Range, New, nullptr);
213   }
214 
215   /// Must be called with the range of each `DeclaratorDecl`. Ensures the
216   /// corresponding declarator nodes are covered by `SimpleDeclaration`.
217   void noticeDeclRange(llvm::ArrayRef<syntax::Token> Range);
218 
219   /// Notifies that we should not consume trailing semicolon when computing
220   /// token range of \p D.
221   void noticeDeclWithoutSemicolon(Decl *D);
222 
223   /// Mark the \p Child node with a corresponding \p Role. All marked children
224   /// should be consumed by foldNode.
225   /// When called on expressions (clang::Expr is derived from clang::Stmt),
226   /// wraps expressions into expression statement.
227   void markStmtChild(Stmt *Child, NodeRole Role);
228   /// Should be called for expressions in non-statement position to avoid
229   /// wrapping into expression statement.
230   void markExprChild(Expr *Child, NodeRole Role);
231   /// Set role for a token starting at \p Loc.
232   void markChildToken(SourceLocation Loc, NodeRole R);
233   /// Set role for \p T.
234   void markChildToken(const syntax::Token *T, NodeRole R);
235 
236   /// Set role for \p N.
237   void markChild(syntax::Node *N, NodeRole R);
238   /// Set role for the syntax node matching \p N.
239   void markChild(ASTPtr N, NodeRole R);
240   /// Set role for the delayed node that spans exactly \p Range.
241   void markDelayedChild(llvm::ArrayRef<syntax::Token> Range, NodeRole R);
242   /// Set role for the node that may or may not be delayed. Node must span
243   /// exactly \p Range.
244   void markMaybeDelayedChild(llvm::ArrayRef<syntax::Token> Range, NodeRole R);
245 
246   /// Finish building the tree and consume the root node.
247   syntax::TranslationUnit *finalize() && {
248     auto Tokens = Arena.tokenBuffer().expandedTokens();
249     assert(!Tokens.empty());
250     assert(Tokens.back().kind() == tok::eof);
251 
252     // Build the root of the tree, consuming all the children.
253     Pending.foldChildren(Arena, Tokens.drop_back(),
254                          new (Arena.allocator()) syntax::TranslationUnit);
255 
256     auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize());
257     TU->assertInvariantsRecursive();
258     return TU;
259   }
260 
261   /// Finds a token starting at \p L. The token must exist if \p L is valid.
262   const syntax::Token *findToken(SourceLocation L) const;
263 
264   /// Finds the syntax tokens corresponding to the \p SourceRange.
265   llvm::ArrayRef<syntax::Token> getRange(SourceRange Range) const {
266     assert(Range.isValid());
267     return getRange(Range.getBegin(), Range.getEnd());
268   }
269 
270   /// Finds the syntax tokens corresponding to the passed source locations.
271   /// \p First is the start position of the first token and \p Last is the start
272   /// position of the last token.
273   llvm::ArrayRef<syntax::Token> getRange(SourceLocation First,
274                                          SourceLocation Last) const {
275     assert(First.isValid());
276     assert(Last.isValid());
277     assert(First == Last ||
278            Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
279     return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
280   }
281 
282   llvm::ArrayRef<syntax::Token>
283   getTemplateRange(const ClassTemplateSpecializationDecl *D) const {
284     auto Tokens = getRange(D->getSourceRange());
285     return maybeAppendSemicolon(Tokens, D);
286   }
287 
288   llvm::ArrayRef<syntax::Token> getDeclRange(const Decl *D) const {
289     llvm::ArrayRef<clang::syntax::Token> Tokens;
290     // We want to drop the template parameters for specializations.
291     if (const auto *S = llvm::dyn_cast<TagDecl>(D))
292       Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc());
293     else
294       Tokens = getRange(D->getSourceRange());
295     return maybeAppendSemicolon(Tokens, D);
296   }
297   llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const {
298     return getRange(E->getSourceRange());
299   }
300   /// Find the adjusted range for the statement, consuming the trailing
301   /// semicolon when needed.
302   llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const {
303     auto Tokens = getRange(S->getSourceRange());
304     if (isa<CompoundStmt>(S))
305       return Tokens;
306 
307     // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and
308     // all statements that end with those. Consume this semicolon here.
309     if (Tokens.back().kind() == tok::semi)
310       return Tokens;
311     return withTrailingSemicolon(Tokens);
312   }
313 
314 private:
315   llvm::ArrayRef<syntax::Token>
316   maybeAppendSemicolon(llvm::ArrayRef<syntax::Token> Tokens,
317                        const Decl *D) const {
318     if (llvm::isa<NamespaceDecl>(D))
319       return Tokens;
320     if (DeclsWithoutSemicolons.count(D))
321       return Tokens;
322     // FIXME: do not consume trailing semicolon on function definitions.
323     // Most declarations own a semicolon in syntax trees, but not in clang AST.
324     return withTrailingSemicolon(Tokens);
325   }
326 
327   llvm::ArrayRef<syntax::Token>
328   withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const {
329     assert(!Tokens.empty());
330     assert(Tokens.back().kind() != tok::eof);
331     // We never consume 'eof', so looking at the next token is ok.
332     if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
333       return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
334     return Tokens;
335   }
336 
337   void setRole(syntax::Node *N, NodeRole R) {
338     assert(N->role() == NodeRole::Detached);
339     N->setRole(R);
340   }
341 
342   /// A collection of trees covering the input tokens.
343   /// When created, each tree corresponds to a single token in the file.
344   /// Clients call 'foldChildren' to attach one or more subtrees to a parent
345   /// node and update the list of trees accordingly.
346   ///
347   /// Ensures that added nodes properly nest and cover the whole token stream.
348   struct Forest {
349     Forest(syntax::Arena &A) {
350       assert(!A.tokenBuffer().expandedTokens().empty());
351       assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof);
352       // Create all leaf nodes.
353       // Note that we do not have 'eof' in the tree.
354       for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) {
355         auto *L = new (A.allocator()) syntax::Leaf(&T);
356         L->Original = true;
357         L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue();
358         Trees.insert(Trees.end(), {&T, L});
359       }
360     }
361 
362     ~Forest() { assert(DelayedFolds.empty()); }
363 
364     void assignRoleDelayed(llvm::ArrayRef<syntax::Token> Range,
365                            syntax::NodeRole Role) {
366       auto It = DelayedFolds.find(Range.begin());
367       assert(It != DelayedFolds.end());
368       assert(It->second.End == Range.end());
369       It->second.Role = Role;
370     }
371 
372     void assignRoleMaybeDelayed(llvm::ArrayRef<syntax::Token> Range,
373                                 syntax::NodeRole Role) {
374       auto It = DelayedFolds.find(Range.begin());
375       if (It == DelayedFolds.end())
376         return assignRole(Range, Role);
377       assert(It->second.End == Range.end());
378       It->second.Role = Role;
379     }
380 
381     void assignRole(llvm::ArrayRef<syntax::Token> Range,
382                     syntax::NodeRole Role) {
383       assert(!Range.empty());
384       auto It = Trees.lower_bound(Range.begin());
385       assert(It != Trees.end() && "no node found");
386       assert(It->first == Range.begin() && "no child with the specified range");
387       assert((std::next(It) == Trees.end() ||
388               std::next(It)->first == Range.end()) &&
389              "no child with the specified range");
390       assert(It->second->role() == NodeRole::Detached &&
391              "re-assigning role for a child");
392       It->second->setRole(Role);
393     }
394 
395     /// Add \p Node to the forest and attach child nodes based on \p Tokens.
396     void foldChildren(const syntax::Arena &A,
397                       llvm::ArrayRef<syntax::Token> Tokens,
398                       syntax::Tree *Node) {
399       // Execute delayed folds inside `Tokens`.
400       auto BeginFolds = DelayedFolds.lower_bound(Tokens.begin());
401       auto EndFolds = BeginFolds;
402       for (; EndFolds != DelayedFolds.end() &&
403              EndFolds->second.End <= Tokens.end();
404            ++EndFolds)
405         ;
406       // We go in reverse order to ensure we fold deeper nodes first.
407       for (auto RevIt = EndFolds; RevIt != BeginFolds; --RevIt) {
408         auto It = std::prev(RevIt);
409         foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End),
410                           It->second.Node);
411       }
412       DelayedFolds.erase(BeginFolds, EndFolds);
413 
414       // Attach children to `Node`.
415       foldChildrenEager(A, Tokens, Node);
416     }
417 
418     /// Schedule a call to `foldChildren` that will only be executed when
419     /// containing node is folded. The range of delayed nodes can be extended by
420     /// calling `extendDelayedFold`. Only one delayed node for each starting
421     /// token is allowed.
422     void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens,
423                              syntax::Tree *Node) {
424       assert(!Tokens.empty());
425       bool Inserted =
426           DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}})
427               .second;
428       (void)Inserted;
429       assert(Inserted && "Multiple delayed folds start at the same token");
430     }
431 
432     /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends
433     /// its endpoint to `ExtendedRange.end()` and returns true.
434     /// Otherwise, returns false.
435     bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) {
436       assert(!ExtendedRange.empty());
437       auto It = DelayedFolds.find(ExtendedRange.data());
438       if (It == DelayedFolds.end())
439         return false;
440       assert(It->second.End <= ExtendedRange.end());
441       It->second.End = ExtendedRange.end();
442       return true;
443     }
444 
445     // EXPECTS: all tokens were consumed and are owned by a single root node.
446     syntax::Node *finalize() && {
447       assert(Trees.size() == 1);
448       auto *Root = Trees.begin()->second;
449       Trees = {};
450       return Root;
451     }
452 
453     std::string str(const syntax::Arena &A) const {
454       std::string R;
455       for (auto It = Trees.begin(); It != Trees.end(); ++It) {
456         unsigned CoveredTokens =
457             It != Trees.end()
458                 ? (std::next(It)->first - It->first)
459                 : A.tokenBuffer().expandedTokens().end() - It->first;
460 
461         R += std::string(llvm::formatv(
462             "- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(),
463             It->first->text(A.sourceManager()), CoveredTokens));
464         R += It->second->dump(A);
465       }
466       return R;
467     }
468 
469   private:
470     /// Implementation detail of `foldChildren`, does acutal folding ignoring
471     /// delayed folds.
472     void foldChildrenEager(const syntax::Arena &A,
473                            llvm::ArrayRef<syntax::Token> Tokens,
474                            syntax::Tree *Node) {
475       assert(Node->firstChild() == nullptr && "node already has children");
476 
477       auto *FirstToken = Tokens.begin();
478       auto BeginChildren = Trees.lower_bound(FirstToken);
479       assert((BeginChildren == Trees.end() ||
480               BeginChildren->first == FirstToken) &&
481              "fold crosses boundaries of existing subtrees");
482       auto EndChildren = Trees.lower_bound(Tokens.end());
483       assert(
484           (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&
485           "fold crosses boundaries of existing subtrees");
486 
487       // We need to go in reverse order, because we can only prepend.
488       for (auto It = EndChildren; It != BeginChildren; --It) {
489         auto *C = std::prev(It)->second;
490         if (C->role() == NodeRole::Detached)
491           C->setRole(NodeRole::Unknown);
492         Node->prependChildLowLevel(C);
493       }
494 
495       // Mark that this node came from the AST and is backed by the source code.
496       Node->Original = true;
497       Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue();
498 
499       Trees.erase(BeginChildren, EndChildren);
500       Trees.insert({FirstToken, Node});
501     }
502 
503     /// Maps from the start token to a subtree starting at that token.
504     /// Keys in the map are pointers into the array of expanded tokens, so
505     /// pointer order corresponds to the order of preprocessor tokens.
506     std::map<const syntax::Token *, syntax::Node *> Trees;
507 
508     /// See documentation of `foldChildrenDelayed` for details.
509     struct DelayedFold {
510       const syntax::Token *End = nullptr;
511       syntax::Tree *Node = nullptr;
512       NodeRole Role = NodeRole::Unknown;
513     };
514     std::map<const syntax::Token *, DelayedFold> DelayedFolds;
515   };
516 
517   /// For debugging purposes.
518   std::string str() { return Pending.str(Arena); }
519 
520   syntax::Arena &Arena;
521   /// To quickly find tokens by their start location.
522   llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *>
523       LocationToToken;
524   Forest Pending;
525   llvm::DenseSet<Decl *> DeclsWithoutSemicolons;
526   ASTToSyntaxMapping Mapping;
527 };
528 
529 namespace {
530 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
531 public:
532   explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
533       : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
534 
535   bool shouldTraversePostOrder() const { return true; }
536 
537   bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) {
538     // Ensure declarators are covered by SimpleDeclaration.
539     Builder.noticeDeclRange(Builder.getDeclRange(DD));
540 
541     // Build the declarator node.
542     SourceRange Initializer;
543     if (auto *V = llvm::dyn_cast<VarDecl>(DD)) {
544       auto *I = V->getInit();
545       // Initializers in range-based-for are not part of the declarator
546       if (I && !V->isCXXForRangeDecl())
547         Initializer = I->getSourceRange();
548     }
549     auto Declarator = getDeclaratorRange(
550         Builder.sourceManager(), DD->getTypeSourceInfo()->getTypeLoc(),
551         getQualifiedNameStart(DD), Initializer);
552     if (Declarator.isValid()) {
553       auto *N = new (allocator()) syntax::SimpleDeclarator;
554       Builder.foldNode(Builder.getRange(Declarator), N, DD);
555       Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator);
556     }
557 
558     return true;
559   }
560 
561   bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) {
562     // Ensure declarators are covered by SimpleDeclaration.
563     Builder.noticeDeclRange(Builder.getDeclRange(D));
564 
565     auto R = getDeclaratorRange(
566         Builder.sourceManager(), D->getTypeSourceInfo()->getTypeLoc(),
567         /*Name=*/D->getLocation(), /*Initializer=*/SourceRange());
568     if (R.isValid()) {
569       auto *N = new (allocator()) syntax::SimpleDeclarator;
570       Builder.foldNode(Builder.getRange(R), N, D);
571       Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator);
572     }
573     return true;
574   }
575 
576   bool VisitDecl(Decl *D) {
577     assert(!D->isImplicit());
578     Builder.foldNode(Builder.getDeclRange(D),
579                      new (allocator()) syntax::UnknownDeclaration(), D);
580     return true;
581   }
582 
583   // RAV does not call WalkUpFrom* on explicit instantiations, so we have to
584   // override Traverse.
585   // FIXME: make RAV call WalkUpFrom* instead.
586   bool
587   TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) {
588     if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C))
589       return false;
590     if (C->isExplicitSpecialization())
591       return true; // we are only interested in explicit instantiations.
592     auto *Declaration =
593         cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C));
594     foldExplicitTemplateInstantiation(
595         Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()),
596         Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C);
597     return true;
598   }
599 
600   bool WalkUpFromTemplateDecl(TemplateDecl *S) {
601     foldTemplateDeclaration(
602         Builder.getDeclRange(S),
603         Builder.findToken(S->getTemplateParameters()->getTemplateLoc()),
604         Builder.getDeclRange(S->getTemplatedDecl()), S);
605     return true;
606   }
607 
608   bool WalkUpFromTagDecl(TagDecl *C) {
609     // FIXME: build the ClassSpecifier node.
610     if (!C->isFreeStanding()) {
611       assert(C->getNumTemplateParameterLists() == 0);
612       return true;
613     }
614     handleFreeStandingTagDecl(C);
615     return true;
616   }
617 
618   syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) {
619     assert(C->isFreeStanding());
620     // Class is a declaration specifier and needs a spanning declaration node.
621     auto DeclarationRange = Builder.getDeclRange(C);
622     syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration;
623     Builder.foldNode(DeclarationRange, Result, nullptr);
624 
625     // Build TemplateDeclaration nodes if we had template parameters.
626     auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) {
627       const auto *TemplateKW = Builder.findToken(L.getTemplateLoc());
628       auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end());
629       Result =
630           foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr);
631       DeclarationRange = R;
632     };
633     if (auto *S = llvm::dyn_cast<ClassTemplatePartialSpecializationDecl>(C))
634       ConsumeTemplateParameters(*S->getTemplateParameters());
635     for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I)
636       ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1));
637     return Result;
638   }
639 
640   bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
641     // We do not want to call VisitDecl(), the declaration for translation
642     // unit is built by finalize().
643     return true;
644   }
645 
646   bool WalkUpFromCompoundStmt(CompoundStmt *S) {
647     using NodeRole = syntax::NodeRole;
648 
649     Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen);
650     for (auto *Child : S->body())
651       Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement);
652     Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen);
653 
654     Builder.foldNode(Builder.getStmtRange(S),
655                      new (allocator()) syntax::CompoundStatement, S);
656     return true;
657   }
658 
659   // Some statements are not yet handled by syntax trees.
660   bool WalkUpFromStmt(Stmt *S) {
661     Builder.foldNode(Builder.getStmtRange(S),
662                      new (allocator()) syntax::UnknownStatement, S);
663     return true;
664   }
665 
666   bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
667     // We override to traverse range initializer as VarDecl.
668     // RAV traverses it as a statement, we produce invalid node kinds in that
669     // case.
670     // FIXME: should do this in RAV instead?
671     if (S->getInit() && !TraverseStmt(S->getInit()))
672       return false;
673     if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
674       return false;
675     if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
676       return false;
677     if (S->getBody() && !TraverseStmt(S->getBody()))
678       return false;
679     return true;
680   }
681 
682   bool TraverseStmt(Stmt *S) {
683     if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) {
684       // We want to consume the semicolon, make sure SimpleDeclaration does not.
685       for (auto *D : DS->decls())
686         Builder.noticeDeclWithoutSemicolon(D);
687     } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
688       // Do not recurse into subexpressions.
689       // We do not have syntax trees for expressions yet, so we only want to see
690       // the first top-level expression.
691       return WalkUpFromExpr(E->IgnoreImplicit());
692     }
693     return RecursiveASTVisitor::TraverseStmt(S);
694   }
695 
696   // Some expressions are not yet handled by syntax trees.
697   bool WalkUpFromExpr(Expr *E) {
698     assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");
699     Builder.foldNode(Builder.getExprRange(E),
700                      new (allocator()) syntax::UnknownExpression, E);
701     return true;
702   }
703 
704   bool WalkUpFromNamespaceDecl(NamespaceDecl *S) {
705     auto Tokens = Builder.getDeclRange(S);
706     if (Tokens.front().kind() == tok::coloncolon) {
707       // Handle nested namespace definitions. Those start at '::' token, e.g.
708       // namespace a^::b {}
709       // FIXME: build corresponding nodes for the name of this namespace.
710       return true;
711     }
712     Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S);
713     return true;
714   }
715 
716   bool TraverseParenTypeLoc(ParenTypeLoc L) {
717     // We reverse order of traversal to get the proper syntax structure.
718     if (!WalkUpFromParenTypeLoc(L))
719       return false;
720     return TraverseTypeLoc(L.getInnerLoc());
721   }
722 
723   bool WalkUpFromParenTypeLoc(ParenTypeLoc L) {
724     Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
725     Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
726     Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()),
727                      new (allocator()) syntax::ParenDeclarator, L);
728     return true;
729   }
730 
731   // Declarator chunks, they are produced by type locs and some clang::Decls.
732   bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) {
733     Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen);
734     Builder.markExprChild(L.getSizeExpr(),
735                           syntax::NodeRole::ArraySubscript_sizeExpression);
736     Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen);
737     Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()),
738                      new (allocator()) syntax::ArraySubscript, L);
739     return true;
740   }
741 
742   bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) {
743     Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
744     for (auto *P : L.getParams())
745       Builder.markDelayedChild(
746           Builder.getDeclRange(P),
747           syntax::NodeRole::ParametersAndQualifiers_parameter);
748     Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
749     Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()),
750                      new (allocator()) syntax::ParametersAndQualifiers, L);
751     return true;
752   }
753 
754   bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) {
755     if (!L.getTypePtr()->hasTrailingReturn())
756       return WalkUpFromFunctionTypeLoc(L);
757 
758     auto TrailingReturnTokens = BuildTrailingReturn(L);
759     // Finish building the node for parameters.
760     Builder.markChild(TrailingReturnTokens,
761                       syntax::NodeRole::ParametersAndQualifiers_trailingReturn);
762     return WalkUpFromFunctionTypeLoc(L);
763   }
764 
765   bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) {
766     auto SR = L.getLocalSourceRange();
767     Builder.foldNode(Builder.getRange(SR),
768                      new (allocator()) syntax::MemberPointer, L);
769     return true;
770   }
771 
772   // The code below is very regular, it could even be generated with some
773   // preprocessor magic. We merely assign roles to the corresponding children
774   // and fold resulting nodes.
775   bool WalkUpFromDeclStmt(DeclStmt *S) {
776     Builder.foldNode(Builder.getStmtRange(S),
777                      new (allocator()) syntax::DeclarationStatement, S);
778     return true;
779   }
780 
781   bool WalkUpFromNullStmt(NullStmt *S) {
782     Builder.foldNode(Builder.getStmtRange(S),
783                      new (allocator()) syntax::EmptyStatement, S);
784     return true;
785   }
786 
787   bool WalkUpFromSwitchStmt(SwitchStmt *S) {
788     Builder.markChildToken(S->getSwitchLoc(),
789                            syntax::NodeRole::IntroducerKeyword);
790     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
791     Builder.foldNode(Builder.getStmtRange(S),
792                      new (allocator()) syntax::SwitchStatement, S);
793     return true;
794   }
795 
796   bool WalkUpFromCaseStmt(CaseStmt *S) {
797     Builder.markChildToken(S->getKeywordLoc(),
798                            syntax::NodeRole::IntroducerKeyword);
799     Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value);
800     Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
801     Builder.foldNode(Builder.getStmtRange(S),
802                      new (allocator()) syntax::CaseStatement, S);
803     return true;
804   }
805 
806   bool WalkUpFromDefaultStmt(DefaultStmt *S) {
807     Builder.markChildToken(S->getKeywordLoc(),
808                            syntax::NodeRole::IntroducerKeyword);
809     Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
810     Builder.foldNode(Builder.getStmtRange(S),
811                      new (allocator()) syntax::DefaultStatement, S);
812     return true;
813   }
814 
815   bool WalkUpFromIfStmt(IfStmt *S) {
816     Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword);
817     Builder.markStmtChild(S->getThen(),
818                           syntax::NodeRole::IfStatement_thenStatement);
819     Builder.markChildToken(S->getElseLoc(),
820                            syntax::NodeRole::IfStatement_elseKeyword);
821     Builder.markStmtChild(S->getElse(),
822                           syntax::NodeRole::IfStatement_elseStatement);
823     Builder.foldNode(Builder.getStmtRange(S),
824                      new (allocator()) syntax::IfStatement, S);
825     return true;
826   }
827 
828   bool WalkUpFromForStmt(ForStmt *S) {
829     Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
830     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
831     Builder.foldNode(Builder.getStmtRange(S),
832                      new (allocator()) syntax::ForStatement, S);
833     return true;
834   }
835 
836   bool WalkUpFromWhileStmt(WhileStmt *S) {
837     Builder.markChildToken(S->getWhileLoc(),
838                            syntax::NodeRole::IntroducerKeyword);
839     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
840     Builder.foldNode(Builder.getStmtRange(S),
841                      new (allocator()) syntax::WhileStatement, S);
842     return true;
843   }
844 
845   bool WalkUpFromContinueStmt(ContinueStmt *S) {
846     Builder.markChildToken(S->getContinueLoc(),
847                            syntax::NodeRole::IntroducerKeyword);
848     Builder.foldNode(Builder.getStmtRange(S),
849                      new (allocator()) syntax::ContinueStatement, S);
850     return true;
851   }
852 
853   bool WalkUpFromBreakStmt(BreakStmt *S) {
854     Builder.markChildToken(S->getBreakLoc(),
855                            syntax::NodeRole::IntroducerKeyword);
856     Builder.foldNode(Builder.getStmtRange(S),
857                      new (allocator()) syntax::BreakStatement, S);
858     return true;
859   }
860 
861   bool WalkUpFromReturnStmt(ReturnStmt *S) {
862     Builder.markChildToken(S->getReturnLoc(),
863                            syntax::NodeRole::IntroducerKeyword);
864     Builder.markExprChild(S->getRetValue(),
865                           syntax::NodeRole::ReturnStatement_value);
866     Builder.foldNode(Builder.getStmtRange(S),
867                      new (allocator()) syntax::ReturnStatement, S);
868     return true;
869   }
870 
871   bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) {
872     Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
873     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
874     Builder.foldNode(Builder.getStmtRange(S),
875                      new (allocator()) syntax::RangeBasedForStatement, S);
876     return true;
877   }
878 
879   bool WalkUpFromEmptyDecl(EmptyDecl *S) {
880     Builder.foldNode(Builder.getDeclRange(S),
881                      new (allocator()) syntax::EmptyDeclaration, S);
882     return true;
883   }
884 
885   bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) {
886     Builder.markExprChild(S->getAssertExpr(),
887                           syntax::NodeRole::StaticAssertDeclaration_condition);
888     Builder.markExprChild(S->getMessage(),
889                           syntax::NodeRole::StaticAssertDeclaration_message);
890     Builder.foldNode(Builder.getDeclRange(S),
891                      new (allocator()) syntax::StaticAssertDeclaration, S);
892     return true;
893   }
894 
895   bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) {
896     Builder.foldNode(Builder.getDeclRange(S),
897                      new (allocator()) syntax::LinkageSpecificationDeclaration,
898                      S);
899     return true;
900   }
901 
902   bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) {
903     Builder.foldNode(Builder.getDeclRange(S),
904                      new (allocator()) syntax::NamespaceAliasDefinition, S);
905     return true;
906   }
907 
908   bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) {
909     Builder.foldNode(Builder.getDeclRange(S),
910                      new (allocator()) syntax::UsingNamespaceDirective, S);
911     return true;
912   }
913 
914   bool WalkUpFromUsingDecl(UsingDecl *S) {
915     Builder.foldNode(Builder.getDeclRange(S),
916                      new (allocator()) syntax::UsingDeclaration, S);
917     return true;
918   }
919 
920   bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) {
921     Builder.foldNode(Builder.getDeclRange(S),
922                      new (allocator()) syntax::UsingDeclaration, S);
923     return true;
924   }
925 
926   bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) {
927     Builder.foldNode(Builder.getDeclRange(S),
928                      new (allocator()) syntax::UsingDeclaration, S);
929     return true;
930   }
931 
932   bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) {
933     Builder.foldNode(Builder.getDeclRange(S),
934                      new (allocator()) syntax::TypeAliasDeclaration, S);
935     return true;
936   }
937 
938 private:
939   /// Returns the range of the built node.
940   syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) {
941     assert(L.getTypePtr()->hasTrailingReturn());
942 
943     auto ReturnedType = L.getReturnLoc();
944     // Build node for the declarator, if any.
945     auto ReturnDeclaratorRange =
946         getDeclaratorRange(this->Builder.sourceManager(), ReturnedType,
947                            /*Name=*/SourceLocation(),
948                            /*Initializer=*/SourceLocation());
949     syntax::SimpleDeclarator *ReturnDeclarator = nullptr;
950     if (ReturnDeclaratorRange.isValid()) {
951       ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator;
952       Builder.foldNode(Builder.getRange(ReturnDeclaratorRange),
953                        ReturnDeclarator, nullptr);
954     }
955 
956     // Build node for trailing return type.
957     auto Return = Builder.getRange(ReturnedType.getSourceRange());
958     const auto *Arrow = Return.begin() - 1;
959     assert(Arrow->kind() == tok::arrow);
960     auto Tokens = llvm::makeArrayRef(Arrow, Return.end());
961     Builder.markChildToken(Arrow, syntax::NodeRole::TrailingReturnType_arrow);
962     if (ReturnDeclarator)
963       Builder.markChild(ReturnDeclarator,
964                         syntax::NodeRole::TrailingReturnType_declarator);
965     auto *R = new (allocator()) syntax::TrailingReturnType;
966     Builder.foldNode(Tokens, R, nullptr);
967     return R;
968   }
969 
970   void foldExplicitTemplateInstantiation(
971       ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW,
972       const syntax::Token *TemplateKW,
973       syntax::SimpleDeclaration *InnerDeclaration, Decl *From) {
974     assert(!ExternKW || ExternKW->kind() == tok::kw_extern);
975     assert(TemplateKW && TemplateKW->kind() == tok::kw_template);
976     Builder.markChildToken(
977         ExternKW,
978         syntax::NodeRole::ExplicitTemplateInstantiation_externKeyword);
979     Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);
980     Builder.markChild(
981         InnerDeclaration,
982         syntax::NodeRole::ExplicitTemplateInstantiation_declaration);
983     Builder.foldNode(
984         Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From);
985   }
986 
987   syntax::TemplateDeclaration *foldTemplateDeclaration(
988       ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW,
989       ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) {
990     assert(TemplateKW && TemplateKW->kind() == tok::kw_template);
991     Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);
992     Builder.markMaybeDelayedChild(
993         TemplatedDeclaration,
994         syntax::NodeRole::TemplateDeclaration_declaration);
995 
996     auto *N = new (allocator()) syntax::TemplateDeclaration;
997     Builder.foldNode(Range, N, From);
998     return N;
999   }
1000 
1001   /// A small helper to save some typing.
1002   llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
1003 
1004   syntax::TreeBuilder &Builder;
1005   const LangOptions &LangOpts;
1006 };
1007 } // namespace
1008 
1009 void syntax::TreeBuilder::noticeDeclRange(llvm::ArrayRef<syntax::Token> Range) {
1010   if (Pending.extendDelayedFold(Range))
1011     return;
1012   Pending.foldChildrenDelayed(Range,
1013                               new (allocator()) syntax::SimpleDeclaration);
1014 }
1015 
1016 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) {
1017   DeclsWithoutSemicolons.insert(D);
1018 }
1019 
1020 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) {
1021   if (Loc.isInvalid())
1022     return;
1023   Pending.assignRole(*findToken(Loc), Role);
1024 }
1025 
1026 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) {
1027   if (!T)
1028     return;
1029   Pending.assignRole(*T, R);
1030 }
1031 
1032 void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) {
1033   assert(N);
1034   setRole(N, R);
1035 }
1036 
1037 void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) {
1038   auto *SN = Mapping.find(N);
1039   assert(SN != nullptr);
1040   setRole(SN, R);
1041 }
1042 
1043 void syntax::TreeBuilder::markDelayedChild(llvm::ArrayRef<syntax::Token> Range,
1044                                            NodeRole R) {
1045   Pending.assignRoleDelayed(Range, R);
1046 }
1047 
1048 void syntax::TreeBuilder::markMaybeDelayedChild(
1049     llvm::ArrayRef<syntax::Token> Range, NodeRole R) {
1050   Pending.assignRoleMaybeDelayed(Range, R);
1051 }
1052 
1053 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) {
1054   if (!Child)
1055     return;
1056 
1057   syntax::Tree *ChildNode = Mapping.find(Child);
1058   assert(ChildNode != nullptr);
1059 
1060   // This is an expression in a statement position, consume the trailing
1061   // semicolon and form an 'ExpressionStatement' node.
1062   if (isa<Expr>(Child)) {
1063     setRole(ChildNode, NodeRole::ExpressionStatement_expression);
1064     ChildNode = new (allocator()) syntax::ExpressionStatement;
1065     // (!) 'getStmtRange()' ensures this covers a trailing semicolon.
1066     Pending.foldChildren(Arena, getStmtRange(Child), ChildNode);
1067   }
1068   setRole(ChildNode, Role);
1069 }
1070 
1071 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) {
1072   if (!Child)
1073     return;
1074   Child = Child->IgnoreImplicit();
1075 
1076   syntax::Tree *ChildNode = Mapping.find(Child);
1077   assert(ChildNode != nullptr);
1078   setRole(ChildNode, Role);
1079 }
1080 
1081 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
1082   if (L.isInvalid())
1083     return nullptr;
1084   auto It = LocationToToken.find(L.getRawEncoding());
1085   assert(It != LocationToToken.end());
1086   return It->second;
1087 }
1088 
1089 syntax::TranslationUnit *
1090 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) {
1091   TreeBuilder Builder(A);
1092   BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext());
1093   return std::move(Builder).finalize();
1094 }
1095