1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/BuildTree.h"
9 #include "clang/AST/ASTFwd.h"
10 #include "clang/AST/Decl.h"
11 #include "clang/AST/DeclBase.h"
12 #include "clang/AST/DeclCXX.h"
13 #include "clang/AST/DeclarationName.h"
14 #include "clang/AST/RecursiveASTVisitor.h"
15 #include "clang/AST/Stmt.h"
16 #include "clang/AST/TypeLoc.h"
17 #include "clang/AST/TypeLocVisitor.h"
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/Specifiers.h"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Lex/Lexer.h"
24 #include "clang/Tooling/Syntax/Nodes.h"
25 #include "clang/Tooling/Syntax/Tokens.h"
26 #include "clang/Tooling/Syntax/Tree.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/PointerUnion.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/ScopeExit.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/Support/Allocator.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/FormatVariadic.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cstddef>
40 #include <map>
41 
42 using namespace clang;
43 
44 LLVM_ATTRIBUTE_UNUSED
45 static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; }
46 
47 namespace {
48 /// Get start location of the Declarator from the TypeLoc.
49 /// E.g.:
50 ///   loc of `(` in `int (a)`
51 ///   loc of `*` in `int *(a)`
52 ///   loc of the first `(` in `int (*a)(int)`
53 ///   loc of the `*` in `int *(a)(int)`
54 ///   loc of the first `*` in `const int *const *volatile a;`
55 ///
56 /// It is non-trivial to get the start location because TypeLocs are stored
57 /// inside out. In the example above `*volatile` is the TypeLoc returned
58 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()`
59 /// returns.
60 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> {
61   SourceLocation VisitParenTypeLoc(ParenTypeLoc T) {
62     auto L = Visit(T.getInnerLoc());
63     if (L.isValid())
64       return L;
65     return T.getLParenLoc();
66   }
67 
68   // Types spelled in the prefix part of the declarator.
69   SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) {
70     return HandlePointer(T);
71   }
72 
73   SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) {
74     return HandlePointer(T);
75   }
76 
77   SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) {
78     return HandlePointer(T);
79   }
80 
81   SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) {
82     return HandlePointer(T);
83   }
84 
85   SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) {
86     return HandlePointer(T);
87   }
88 
89   // All other cases are not important, as they are either part of declaration
90   // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on
91   // existing declarators (e.g. QualifiedTypeLoc). They cannot start the
92   // declarator themselves, but their underlying type can.
93   SourceLocation VisitTypeLoc(TypeLoc T) {
94     auto N = T.getNextTypeLoc();
95     if (!N)
96       return SourceLocation();
97     return Visit(N);
98   }
99 
100   SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) {
101     if (T.getTypePtr()->hasTrailingReturn())
102       return SourceLocation(); // avoid recursing into the suffix of declarator.
103     return VisitTypeLoc(T);
104   }
105 
106 private:
107   template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) {
108     auto L = Visit(T.getPointeeLoc());
109     if (L.isValid())
110       return L;
111     return T.getLocalSourceRange().getBegin();
112   }
113 };
114 } // namespace
115 
116 /// Gets the range of declarator as defined by the C++ grammar. E.g.
117 ///     `int a;` -> range of `a`,
118 ///     `int *a;` -> range of `*a`,
119 ///     `int a[10];` -> range of `a[10]`,
120 ///     `int a[1][2][3];` -> range of `a[1][2][3]`,
121 ///     `int *a = nullptr` -> range of `*a = nullptr`.
122 /// FIMXE: \p Name must be a source range, e.g. for `operator+`.
123 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T,
124                                       SourceLocation Name,
125                                       SourceRange Initializer) {
126   SourceLocation Start = GetStartLoc().Visit(T);
127   SourceLocation End = T.getSourceRange().getEnd();
128   assert(End.isValid());
129   if (Name.isValid()) {
130     if (Start.isInvalid())
131       Start = Name;
132     if (SM.isBeforeInTranslationUnit(End, Name))
133       End = Name;
134   }
135   if (Initializer.isValid()) {
136     auto InitializerEnd = Initializer.getEnd();
137     assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || End == InitializerEnd);
138     End = InitializerEnd;
139   }
140   return SourceRange(Start, End);
141 }
142 
143 namespace {
144 /// All AST hierarchy roots that can be represented as pointers.
145 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>;
146 /// Maintains a mapping from AST to syntax tree nodes. This class will get more
147 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs.
148 /// FIXME: expose this as public API.
149 class ASTToSyntaxMapping {
150 public:
151   void add(ASTPtr From, syntax::Tree *To) {
152     assert(To != nullptr);
153     assert(!From.isNull());
154 
155     bool Added = Nodes.insert({From, To}).second;
156     (void)Added;
157     assert(Added && "mapping added twice");
158   }
159 
160   syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); }
161 
162 private:
163   llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes;
164 };
165 } // namespace
166 
167 /// A helper class for constructing the syntax tree while traversing a clang
168 /// AST.
169 ///
170 /// At each point of the traversal we maintain a list of pending nodes.
171 /// Initially all tokens are added as pending nodes. When processing a clang AST
172 /// node, the clients need to:
173 ///   - create a corresponding syntax node,
174 ///   - assign roles to all pending child nodes with 'markChild' and
175 ///     'markChildToken',
176 ///   - replace the child nodes with the new syntax node in the pending list
177 ///     with 'foldNode'.
178 ///
179 /// Note that all children are expected to be processed when building a node.
180 ///
181 /// Call finalize() to finish building the tree and consume the root node.
182 class syntax::TreeBuilder {
183 public:
184   TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {
185     for (const auto &T : Arena.tokenBuffer().expandedTokens())
186       LocationToToken.insert({T.location().getRawEncoding(), &T});
187   }
188 
189   llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); }
190   const SourceManager &sourceManager() const { return Arena.sourceManager(); }
191 
192   /// Populate children for \p New node, assuming it covers tokens from \p
193   /// Range.
194   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,
195                 ASTPtr From) {
196     assert(New);
197     Pending.foldChildren(Arena, Range, New);
198     if (From)
199       Mapping.add(From, New);
200   }
201   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,
202                 TypeLoc L) {
203     // FIXME: add mapping for TypeLocs
204     foldNode(Range, New, nullptr);
205   }
206 
207   /// Notifies that we should not consume trailing semicolon when computing
208   /// token range of \p D.
209   void noticeDeclWithoutSemicolon(Decl *D);
210 
211   /// Mark the \p Child node with a corresponding \p Role. All marked children
212   /// should be consumed by foldNode.
213   /// When called on expressions (clang::Expr is derived from clang::Stmt),
214   /// wraps expressions into expression statement.
215   void markStmtChild(Stmt *Child, NodeRole Role);
216   /// Should be called for expressions in non-statement position to avoid
217   /// wrapping into expression statement.
218   void markExprChild(Expr *Child, NodeRole Role);
219   /// Set role for a token starting at \p Loc.
220   void markChildToken(SourceLocation Loc, NodeRole R);
221   /// Set role for \p T.
222   void markChildToken(const syntax::Token *T, NodeRole R);
223 
224   /// Set role for \p N.
225   void markChild(syntax::Node *N, NodeRole R);
226   /// Set role for the syntax node matching \p N.
227   void markChild(ASTPtr N, NodeRole R);
228 
229   /// Finish building the tree and consume the root node.
230   syntax::TranslationUnit *finalize() && {
231     auto Tokens = Arena.tokenBuffer().expandedTokens();
232     assert(!Tokens.empty());
233     assert(Tokens.back().kind() == tok::eof);
234 
235     // Build the root of the tree, consuming all the children.
236     Pending.foldChildren(Arena, Tokens.drop_back(),
237                          new (Arena.allocator()) syntax::TranslationUnit);
238 
239     auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize());
240     TU->assertInvariantsRecursive();
241     return TU;
242   }
243 
244   /// Finds a token starting at \p L. The token must exist if \p L is valid.
245   const syntax::Token *findToken(SourceLocation L) const;
246 
247   /// Finds the syntax tokens corresponding to the \p SourceRange.
248   llvm::ArrayRef<syntax::Token> getRange(SourceRange Range) const {
249     assert(Range.isValid());
250     return getRange(Range.getBegin(), Range.getEnd());
251   }
252 
253   /// Finds the syntax tokens corresponding to the passed source locations.
254   /// \p First is the start position of the first token and \p Last is the start
255   /// position of the last token.
256   llvm::ArrayRef<syntax::Token> getRange(SourceLocation First,
257                                          SourceLocation Last) const {
258     assert(First.isValid());
259     assert(Last.isValid());
260     assert(First == Last ||
261            Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
262     return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
263   }
264 
265   llvm::ArrayRef<syntax::Token>
266   getTemplateRange(const ClassTemplateSpecializationDecl *D) const {
267     auto Tokens = getRange(D->getSourceRange());
268     return maybeAppendSemicolon(Tokens, D);
269   }
270 
271   /// Returns true if \p D is the last declarator in a chain and is thus
272   /// reponsible for creating SimpleDeclaration for the whole chain.
273   template <class T>
274   bool isResponsibleForCreatingDeclaration(const T *D) const {
275     static_assert((std::is_base_of<DeclaratorDecl, T>::value ||
276                    std::is_base_of<TypedefNameDecl, T>::value),
277                   "only DeclaratorDecl and TypedefNameDecl are supported.");
278 
279     const Decl *Next = D->getNextDeclInContext();
280 
281     // There's no next sibling, this one is responsible.
282     if (Next == nullptr) {
283       return true;
284     }
285     const auto *NextT = llvm::dyn_cast<T>(Next);
286 
287     // Next sibling is not the same type, this one is responsible.
288     if (NextT == nullptr) {
289       return true;
290     }
291     // Next sibling doesn't begin at the same loc, it must be a different
292     // declaration, so this declarator is responsible.
293     if (NextT->getBeginLoc() != D->getBeginLoc()) {
294       return true;
295     }
296 
297     // NextT is a member of the same declaration, and we need the last member to
298     // create declaration. This one is not responsible.
299     return false;
300   }
301 
302   llvm::ArrayRef<syntax::Token> getDeclarationRange(Decl *D) {
303     llvm::ArrayRef<clang::syntax::Token> Tokens;
304     // We want to drop the template parameters for specializations.
305     if (const auto *S = llvm::dyn_cast<TagDecl>(D))
306       Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc());
307     else
308       Tokens = getRange(D->getSourceRange());
309     return maybeAppendSemicolon(Tokens, D);
310   }
311 
312   llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const {
313     return getRange(E->getSourceRange());
314   }
315 
316   /// Find the adjusted range for the statement, consuming the trailing
317   /// semicolon when needed.
318   llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const {
319     auto Tokens = getRange(S->getSourceRange());
320     if (isa<CompoundStmt>(S))
321       return Tokens;
322 
323     // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and
324     // all statements that end with those. Consume this semicolon here.
325     if (Tokens.back().kind() == tok::semi)
326       return Tokens;
327     return withTrailingSemicolon(Tokens);
328   }
329 
330 private:
331   llvm::ArrayRef<syntax::Token>
332   maybeAppendSemicolon(llvm::ArrayRef<syntax::Token> Tokens,
333                        const Decl *D) const {
334     if (llvm::isa<NamespaceDecl>(D))
335       return Tokens;
336     if (DeclsWithoutSemicolons.count(D))
337       return Tokens;
338     // FIXME: do not consume trailing semicolon on function definitions.
339     // Most declarations own a semicolon in syntax trees, but not in clang AST.
340     return withTrailingSemicolon(Tokens);
341   }
342 
343   llvm::ArrayRef<syntax::Token>
344   withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const {
345     assert(!Tokens.empty());
346     assert(Tokens.back().kind() != tok::eof);
347     // We never consume 'eof', so looking at the next token is ok.
348     if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
349       return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
350     return Tokens;
351   }
352 
353   void setRole(syntax::Node *N, NodeRole R) {
354     assert(N->role() == NodeRole::Detached);
355     N->setRole(R);
356   }
357 
358   /// A collection of trees covering the input tokens.
359   /// When created, each tree corresponds to a single token in the file.
360   /// Clients call 'foldChildren' to attach one or more subtrees to a parent
361   /// node and update the list of trees accordingly.
362   ///
363   /// Ensures that added nodes properly nest and cover the whole token stream.
364   struct Forest {
365     Forest(syntax::Arena &A) {
366       assert(!A.tokenBuffer().expandedTokens().empty());
367       assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof);
368       // Create all leaf nodes.
369       // Note that we do not have 'eof' in the tree.
370       for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) {
371         auto *L = new (A.allocator()) syntax::Leaf(&T);
372         L->Original = true;
373         L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue();
374         Trees.insert(Trees.end(), {&T, L});
375       }
376     }
377 
378     void assignRole(llvm::ArrayRef<syntax::Token> Range,
379                     syntax::NodeRole Role) {
380       assert(!Range.empty());
381       auto It = Trees.lower_bound(Range.begin());
382       assert(It != Trees.end() && "no node found");
383       assert(It->first == Range.begin() && "no child with the specified range");
384       assert((std::next(It) == Trees.end() ||
385               std::next(It)->first == Range.end()) &&
386              "no child with the specified range");
387       assert(It->second->role() == NodeRole::Detached &&
388              "re-assigning role for a child");
389       It->second->setRole(Role);
390     }
391 
392     /// Add \p Node to the forest and attach child nodes based on \p Tokens.
393     void foldChildren(const syntax::Arena &A,
394                       llvm::ArrayRef<syntax::Token> Tokens,
395                       syntax::Tree *Node) {
396       // Attach children to `Node`.
397       assert(Node->firstChild() == nullptr && "node already has children");
398 
399       auto *FirstToken = Tokens.begin();
400       auto BeginChildren = Trees.lower_bound(FirstToken);
401 
402       assert((BeginChildren == Trees.end() ||
403               BeginChildren->first == FirstToken) &&
404              "fold crosses boundaries of existing subtrees");
405       auto EndChildren = Trees.lower_bound(Tokens.end());
406       assert(
407           (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&
408           "fold crosses boundaries of existing subtrees");
409 
410       // We need to go in reverse order, because we can only prepend.
411       for (auto It = EndChildren; It != BeginChildren; --It) {
412         auto *C = std::prev(It)->second;
413         if (C->role() == NodeRole::Detached)
414           C->setRole(NodeRole::Unknown);
415         Node->prependChildLowLevel(C);
416       }
417 
418       // Mark that this node came from the AST and is backed by the source code.
419       Node->Original = true;
420       Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue();
421 
422       Trees.erase(BeginChildren, EndChildren);
423       Trees.insert({FirstToken, Node});
424     }
425 
426     // EXPECTS: all tokens were consumed and are owned by a single root node.
427     syntax::Node *finalize() && {
428       assert(Trees.size() == 1);
429       auto *Root = Trees.begin()->second;
430       Trees = {};
431       return Root;
432     }
433 
434     std::string str(const syntax::Arena &A) const {
435       std::string R;
436       for (auto It = Trees.begin(); It != Trees.end(); ++It) {
437         unsigned CoveredTokens =
438             It != Trees.end()
439                 ? (std::next(It)->first - It->first)
440                 : A.tokenBuffer().expandedTokens().end() - It->first;
441 
442         R += std::string(llvm::formatv(
443             "- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(),
444             It->first->text(A.sourceManager()), CoveredTokens));
445         R += It->second->dump(A);
446       }
447       return R;
448     }
449 
450   private:
451     /// Maps from the start token to a subtree starting at that token.
452     /// Keys in the map are pointers into the array of expanded tokens, so
453     /// pointer order corresponds to the order of preprocessor tokens.
454     std::map<const syntax::Token *, syntax::Node *> Trees;
455   };
456 
457   /// For debugging purposes.
458   std::string str() { return Pending.str(Arena); }
459 
460   syntax::Arena &Arena;
461   /// To quickly find tokens by their start location.
462   llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *>
463       LocationToToken;
464   Forest Pending;
465   llvm::DenseSet<Decl *> DeclsWithoutSemicolons;
466   ASTToSyntaxMapping Mapping;
467 };
468 
469 namespace {
470 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
471 public:
472   explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
473       : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
474 
475   bool shouldTraversePostOrder() const { return true; }
476 
477   bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) {
478     return processDeclaratorAndDeclaration(DD);
479   }
480 
481   bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) {
482     return processDeclaratorAndDeclaration(TD);
483   }
484 
485   bool VisitDecl(Decl *D) {
486     assert(!D->isImplicit());
487     Builder.foldNode(Builder.getDeclarationRange(D),
488                      new (allocator()) syntax::UnknownDeclaration(), D);
489     return true;
490   }
491 
492   // RAV does not call WalkUpFrom* on explicit instantiations, so we have to
493   // override Traverse.
494   // FIXME: make RAV call WalkUpFrom* instead.
495   bool
496   TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) {
497     if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C))
498       return false;
499     if (C->isExplicitSpecialization())
500       return true; // we are only interested in explicit instantiations.
501     auto *Declaration =
502         cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C));
503     foldExplicitTemplateInstantiation(
504         Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()),
505         Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C);
506     return true;
507   }
508 
509   bool WalkUpFromTemplateDecl(TemplateDecl *S) {
510     foldTemplateDeclaration(
511         Builder.getDeclarationRange(S),
512         Builder.findToken(S->getTemplateParameters()->getTemplateLoc()),
513         Builder.getDeclarationRange(S->getTemplatedDecl()), S);
514     return true;
515   }
516 
517   bool WalkUpFromTagDecl(TagDecl *C) {
518     // FIXME: build the ClassSpecifier node.
519     if (!C->isFreeStanding()) {
520       assert(C->getNumTemplateParameterLists() == 0);
521       return true;
522     }
523     handleFreeStandingTagDecl(C);
524     return true;
525   }
526 
527   syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) {
528     assert(C->isFreeStanding());
529     // Class is a declaration specifier and needs a spanning declaration node.
530     auto DeclarationRange = Builder.getDeclarationRange(C);
531     syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration;
532     Builder.foldNode(DeclarationRange, Result, nullptr);
533 
534     // Build TemplateDeclaration nodes if we had template parameters.
535     auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) {
536       const auto *TemplateKW = Builder.findToken(L.getTemplateLoc());
537       auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end());
538       Result =
539           foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr);
540       DeclarationRange = R;
541     };
542     if (auto *S = llvm::dyn_cast<ClassTemplatePartialSpecializationDecl>(C))
543       ConsumeTemplateParameters(*S->getTemplateParameters());
544     for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I)
545       ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1));
546     return Result;
547   }
548 
549   bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
550     // We do not want to call VisitDecl(), the declaration for translation
551     // unit is built by finalize().
552     return true;
553   }
554 
555   bool WalkUpFromCompoundStmt(CompoundStmt *S) {
556     using NodeRole = syntax::NodeRole;
557 
558     Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen);
559     for (auto *Child : S->body())
560       Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement);
561     Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen);
562 
563     Builder.foldNode(Builder.getStmtRange(S),
564                      new (allocator()) syntax::CompoundStatement, S);
565     return true;
566   }
567 
568   // Some statements are not yet handled by syntax trees.
569   bool WalkUpFromStmt(Stmt *S) {
570     Builder.foldNode(Builder.getStmtRange(S),
571                      new (allocator()) syntax::UnknownStatement, S);
572     return true;
573   }
574 
575   bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
576     // We override to traverse range initializer as VarDecl.
577     // RAV traverses it as a statement, we produce invalid node kinds in that
578     // case.
579     // FIXME: should do this in RAV instead?
580     if (S->getInit() && !TraverseStmt(S->getInit()))
581       return false;
582     if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
583       return false;
584     if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
585       return false;
586     if (S->getBody() && !TraverseStmt(S->getBody()))
587       return false;
588     return true;
589   }
590 
591   bool TraverseStmt(Stmt *S) {
592     if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) {
593       // We want to consume the semicolon, make sure SimpleDeclaration does not.
594       for (auto *D : DS->decls())
595         Builder.noticeDeclWithoutSemicolon(D);
596     } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
597       // Do not recurse into subexpressions.
598       // We do not have syntax trees for expressions yet, so we only want to see
599       // the first top-level expression.
600       return WalkUpFromExpr(E->IgnoreImplicit());
601     }
602     return RecursiveASTVisitor::TraverseStmt(S);
603   }
604 
605   // Some expressions are not yet handled by syntax trees.
606   bool WalkUpFromExpr(Expr *E) {
607     assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");
608     Builder.foldNode(Builder.getExprRange(E),
609                      new (allocator()) syntax::UnknownExpression, E);
610     return true;
611   }
612 
613   bool WalkUpFromNamespaceDecl(NamespaceDecl *S) {
614     auto Tokens = Builder.getDeclarationRange(S);
615     if (Tokens.front().kind() == tok::coloncolon) {
616       // Handle nested namespace definitions. Those start at '::' token, e.g.
617       // namespace a^::b {}
618       // FIXME: build corresponding nodes for the name of this namespace.
619       return true;
620     }
621     Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S);
622     return true;
623   }
624 
625   bool TraverseParenTypeLoc(ParenTypeLoc L) {
626     // We reverse order of traversal to get the proper syntax structure.
627     if (!WalkUpFromParenTypeLoc(L))
628       return false;
629     return TraverseTypeLoc(L.getInnerLoc());
630   }
631 
632   bool WalkUpFromParenTypeLoc(ParenTypeLoc L) {
633     Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
634     Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
635     Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()),
636                      new (allocator()) syntax::ParenDeclarator, L);
637     return true;
638   }
639 
640   // Declarator chunks, they are produced by type locs and some clang::Decls.
641   bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) {
642     Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen);
643     Builder.markExprChild(L.getSizeExpr(),
644                           syntax::NodeRole::ArraySubscript_sizeExpression);
645     Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen);
646     Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()),
647                      new (allocator()) syntax::ArraySubscript, L);
648     return true;
649   }
650 
651   bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) {
652     Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
653     for (auto *P : L.getParams()) {
654       Builder.markChild(P, syntax::NodeRole::ParametersAndQualifiers_parameter);
655     }
656     Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
657     Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()),
658                      new (allocator()) syntax::ParametersAndQualifiers, L);
659     return true;
660   }
661 
662   bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) {
663     if (!L.getTypePtr()->hasTrailingReturn())
664       return WalkUpFromFunctionTypeLoc(L);
665 
666     auto *TrailingReturnTokens = BuildTrailingReturn(L);
667     // Finish building the node for parameters.
668     Builder.markChild(TrailingReturnTokens,
669                       syntax::NodeRole::ParametersAndQualifiers_trailingReturn);
670     return WalkUpFromFunctionTypeLoc(L);
671   }
672 
673   bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) {
674     auto SR = L.getLocalSourceRange();
675     Builder.foldNode(Builder.getRange(SR),
676                      new (allocator()) syntax::MemberPointer, L);
677     return true;
678   }
679 
680   // The code below is very regular, it could even be generated with some
681   // preprocessor magic. We merely assign roles to the corresponding children
682   // and fold resulting nodes.
683   bool WalkUpFromDeclStmt(DeclStmt *S) {
684     Builder.foldNode(Builder.getStmtRange(S),
685                      new (allocator()) syntax::DeclarationStatement, S);
686     return true;
687   }
688 
689   bool WalkUpFromNullStmt(NullStmt *S) {
690     Builder.foldNode(Builder.getStmtRange(S),
691                      new (allocator()) syntax::EmptyStatement, S);
692     return true;
693   }
694 
695   bool WalkUpFromSwitchStmt(SwitchStmt *S) {
696     Builder.markChildToken(S->getSwitchLoc(),
697                            syntax::NodeRole::IntroducerKeyword);
698     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
699     Builder.foldNode(Builder.getStmtRange(S),
700                      new (allocator()) syntax::SwitchStatement, S);
701     return true;
702   }
703 
704   bool WalkUpFromCaseStmt(CaseStmt *S) {
705     Builder.markChildToken(S->getKeywordLoc(),
706                            syntax::NodeRole::IntroducerKeyword);
707     Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value);
708     Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
709     Builder.foldNode(Builder.getStmtRange(S),
710                      new (allocator()) syntax::CaseStatement, S);
711     return true;
712   }
713 
714   bool WalkUpFromDefaultStmt(DefaultStmt *S) {
715     Builder.markChildToken(S->getKeywordLoc(),
716                            syntax::NodeRole::IntroducerKeyword);
717     Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
718     Builder.foldNode(Builder.getStmtRange(S),
719                      new (allocator()) syntax::DefaultStatement, S);
720     return true;
721   }
722 
723   bool WalkUpFromIfStmt(IfStmt *S) {
724     Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword);
725     Builder.markStmtChild(S->getThen(),
726                           syntax::NodeRole::IfStatement_thenStatement);
727     Builder.markChildToken(S->getElseLoc(),
728                            syntax::NodeRole::IfStatement_elseKeyword);
729     Builder.markStmtChild(S->getElse(),
730                           syntax::NodeRole::IfStatement_elseStatement);
731     Builder.foldNode(Builder.getStmtRange(S),
732                      new (allocator()) syntax::IfStatement, S);
733     return true;
734   }
735 
736   bool WalkUpFromForStmt(ForStmt *S) {
737     Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
738     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
739     Builder.foldNode(Builder.getStmtRange(S),
740                      new (allocator()) syntax::ForStatement, S);
741     return true;
742   }
743 
744   bool WalkUpFromWhileStmt(WhileStmt *S) {
745     Builder.markChildToken(S->getWhileLoc(),
746                            syntax::NodeRole::IntroducerKeyword);
747     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
748     Builder.foldNode(Builder.getStmtRange(S),
749                      new (allocator()) syntax::WhileStatement, S);
750     return true;
751   }
752 
753   bool WalkUpFromContinueStmt(ContinueStmt *S) {
754     Builder.markChildToken(S->getContinueLoc(),
755                            syntax::NodeRole::IntroducerKeyword);
756     Builder.foldNode(Builder.getStmtRange(S),
757                      new (allocator()) syntax::ContinueStatement, S);
758     return true;
759   }
760 
761   bool WalkUpFromBreakStmt(BreakStmt *S) {
762     Builder.markChildToken(S->getBreakLoc(),
763                            syntax::NodeRole::IntroducerKeyword);
764     Builder.foldNode(Builder.getStmtRange(S),
765                      new (allocator()) syntax::BreakStatement, S);
766     return true;
767   }
768 
769   bool WalkUpFromReturnStmt(ReturnStmt *S) {
770     Builder.markChildToken(S->getReturnLoc(),
771                            syntax::NodeRole::IntroducerKeyword);
772     Builder.markExprChild(S->getRetValue(),
773                           syntax::NodeRole::ReturnStatement_value);
774     Builder.foldNode(Builder.getStmtRange(S),
775                      new (allocator()) syntax::ReturnStatement, S);
776     return true;
777   }
778 
779   bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) {
780     Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
781     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
782     Builder.foldNode(Builder.getStmtRange(S),
783                      new (allocator()) syntax::RangeBasedForStatement, S);
784     return true;
785   }
786 
787   bool WalkUpFromEmptyDecl(EmptyDecl *S) {
788     Builder.foldNode(Builder.getDeclarationRange(S),
789                      new (allocator()) syntax::EmptyDeclaration, S);
790     return true;
791   }
792 
793   bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) {
794     Builder.markExprChild(S->getAssertExpr(),
795                           syntax::NodeRole::StaticAssertDeclaration_condition);
796     Builder.markExprChild(S->getMessage(),
797                           syntax::NodeRole::StaticAssertDeclaration_message);
798     Builder.foldNode(Builder.getDeclarationRange(S),
799                      new (allocator()) syntax::StaticAssertDeclaration, S);
800     return true;
801   }
802 
803   bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) {
804     Builder.foldNode(Builder.getDeclarationRange(S),
805                      new (allocator()) syntax::LinkageSpecificationDeclaration,
806                      S);
807     return true;
808   }
809 
810   bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) {
811     Builder.foldNode(Builder.getDeclarationRange(S),
812                      new (allocator()) syntax::NamespaceAliasDefinition, S);
813     return true;
814   }
815 
816   bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) {
817     Builder.foldNode(Builder.getDeclarationRange(S),
818                      new (allocator()) syntax::UsingNamespaceDirective, S);
819     return true;
820   }
821 
822   bool WalkUpFromUsingDecl(UsingDecl *S) {
823     Builder.foldNode(Builder.getDeclarationRange(S),
824                      new (allocator()) syntax::UsingDeclaration, S);
825     return true;
826   }
827 
828   bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) {
829     Builder.foldNode(Builder.getDeclarationRange(S),
830                      new (allocator()) syntax::UsingDeclaration, S);
831     return true;
832   }
833 
834   bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) {
835     Builder.foldNode(Builder.getDeclarationRange(S),
836                      new (allocator()) syntax::UsingDeclaration, S);
837     return true;
838   }
839 
840   bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) {
841     Builder.foldNode(Builder.getDeclarationRange(S),
842                      new (allocator()) syntax::TypeAliasDeclaration, S);
843     return true;
844   }
845 
846 private:
847   template <class T> SourceLocation getQualifiedNameStart(T *D) {
848     static_assert((std::is_base_of<DeclaratorDecl, T>::value ||
849                    std::is_base_of<TypedefNameDecl, T>::value),
850                   "only DeclaratorDecl and TypedefNameDecl are supported.");
851 
852     auto DN = D->getDeclName();
853     bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo();
854     if (IsAnonymous)
855       return SourceLocation();
856 
857     if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D)) {
858       if (DD->getQualifierLoc()) {
859         return DD->getQualifierLoc().getBeginLoc();
860       }
861     }
862 
863     return D->getLocation();
864   }
865 
866   SourceRange getInitializerRange(Decl *D) {
867     if (auto *V = llvm::dyn_cast<VarDecl>(D)) {
868       auto *I = V->getInit();
869       // Initializers in range-based-for are not part of the declarator
870       if (I && !V->isCXXForRangeDecl())
871         return I->getSourceRange();
872     }
873 
874     return SourceRange();
875   }
876 
877   /// Folds SimpleDeclarator node (if present) and in case this is the last
878   /// declarator in the chain it also folds SimpleDeclaration node.
879   template <class T> bool processDeclaratorAndDeclaration(T *D) {
880     SourceRange Initializer = getInitializerRange(D);
881     auto Range = getDeclaratorRange(Builder.sourceManager(),
882                                     D->getTypeSourceInfo()->getTypeLoc(),
883                                     getQualifiedNameStart(D), Initializer);
884 
885     // There doesn't have to be a declarator (e.g. `void foo(int)` only has
886     // declaration, but no declarator).
887     if (Range.getBegin().isValid()) {
888       auto *N = new (allocator()) syntax::SimpleDeclarator;
889       Builder.foldNode(Builder.getRange(Range), N, nullptr);
890       Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator);
891     }
892 
893     if (Builder.isResponsibleForCreatingDeclaration(D)) {
894       Builder.foldNode(Builder.getDeclarationRange(D),
895                        new (allocator()) syntax::SimpleDeclaration, D);
896     }
897     return true;
898   }
899 
900   /// Returns the range of the built node.
901   syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) {
902     assert(L.getTypePtr()->hasTrailingReturn());
903 
904     auto ReturnedType = L.getReturnLoc();
905     // Build node for the declarator, if any.
906     auto ReturnDeclaratorRange =
907         getDeclaratorRange(this->Builder.sourceManager(), ReturnedType,
908                            /*Name=*/SourceLocation(),
909                            /*Initializer=*/SourceLocation());
910     syntax::SimpleDeclarator *ReturnDeclarator = nullptr;
911     if (ReturnDeclaratorRange.isValid()) {
912       ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator;
913       Builder.foldNode(Builder.getRange(ReturnDeclaratorRange),
914                        ReturnDeclarator, nullptr);
915     }
916 
917     // Build node for trailing return type.
918     auto Return = Builder.getRange(ReturnedType.getSourceRange());
919     const auto *Arrow = Return.begin() - 1;
920     assert(Arrow->kind() == tok::arrow);
921     auto Tokens = llvm::makeArrayRef(Arrow, Return.end());
922     Builder.markChildToken(Arrow, syntax::NodeRole::TrailingReturnType_arrow);
923     if (ReturnDeclarator)
924       Builder.markChild(ReturnDeclarator,
925                         syntax::NodeRole::TrailingReturnType_declarator);
926     auto *R = new (allocator()) syntax::TrailingReturnType;
927     Builder.foldNode(Tokens, R, L);
928     return R;
929   }
930 
931   void foldExplicitTemplateInstantiation(
932       ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW,
933       const syntax::Token *TemplateKW,
934       syntax::SimpleDeclaration *InnerDeclaration, Decl *From) {
935     assert(!ExternKW || ExternKW->kind() == tok::kw_extern);
936     assert(TemplateKW && TemplateKW->kind() == tok::kw_template);
937     Builder.markChildToken(
938         ExternKW,
939         syntax::NodeRole::ExplicitTemplateInstantiation_externKeyword);
940     Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);
941     Builder.markChild(
942         InnerDeclaration,
943         syntax::NodeRole::ExplicitTemplateInstantiation_declaration);
944     Builder.foldNode(
945         Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From);
946   }
947 
948   syntax::TemplateDeclaration *foldTemplateDeclaration(
949       ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW,
950       ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) {
951     assert(TemplateKW && TemplateKW->kind() == tok::kw_template);
952     Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);
953 
954     auto *N = new (allocator()) syntax::TemplateDeclaration;
955     Builder.foldNode(Range, N, From);
956     Builder.markChild(N, syntax::NodeRole::TemplateDeclaration_declaration);
957     return N;
958   }
959 
960   /// A small helper to save some typing.
961   llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
962 
963   syntax::TreeBuilder &Builder;
964   const LangOptions &LangOpts;
965 };
966 } // namespace
967 
968 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) {
969   DeclsWithoutSemicolons.insert(D);
970 }
971 
972 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) {
973   if (Loc.isInvalid())
974     return;
975   Pending.assignRole(*findToken(Loc), Role);
976 }
977 
978 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) {
979   if (!T)
980     return;
981   Pending.assignRole(*T, R);
982 }
983 
984 void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) {
985   assert(N);
986   setRole(N, R);
987 }
988 
989 void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) {
990   auto *SN = Mapping.find(N);
991   assert(SN != nullptr);
992   setRole(SN, R);
993 }
994 
995 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) {
996   if (!Child)
997     return;
998 
999   syntax::Tree *ChildNode = Mapping.find(Child);
1000   assert(ChildNode != nullptr);
1001 
1002   // This is an expression in a statement position, consume the trailing
1003   // semicolon and form an 'ExpressionStatement' node.
1004   if (isa<Expr>(Child)) {
1005     setRole(ChildNode, NodeRole::ExpressionStatement_expression);
1006     ChildNode = new (allocator()) syntax::ExpressionStatement;
1007     // (!) 'getStmtRange()' ensures this covers a trailing semicolon.
1008     Pending.foldChildren(Arena, getStmtRange(Child), ChildNode);
1009   }
1010   setRole(ChildNode, Role);
1011 }
1012 
1013 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) {
1014   if (!Child)
1015     return;
1016   Child = Child->IgnoreImplicit();
1017 
1018   syntax::Tree *ChildNode = Mapping.find(Child);
1019   assert(ChildNode != nullptr);
1020   setRole(ChildNode, Role);
1021 }
1022 
1023 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
1024   if (L.isInvalid())
1025     return nullptr;
1026   auto It = LocationToToken.find(L.getRawEncoding());
1027   assert(It != LocationToToken.end());
1028   return It->second;
1029 }
1030 
1031 syntax::TranslationUnit *
1032 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) {
1033   TreeBuilder Builder(A);
1034   BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext());
1035   return std::move(Builder).finalize();
1036 }
1037