1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/BuildTree.h"
9 #include "clang/AST/ASTFwd.h"
10 #include "clang/AST/Decl.h"
11 #include "clang/AST/DeclBase.h"
12 #include "clang/AST/DeclCXX.h"
13 #include "clang/AST/DeclarationName.h"
14 #include "clang/AST/RecursiveASTVisitor.h"
15 #include "clang/AST/Stmt.h"
16 #include "clang/AST/TypeLoc.h"
17 #include "clang/AST/TypeLocVisitor.h"
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "clang/Lex/Lexer.h"
23 #include "clang/Tooling/Syntax/Nodes.h"
24 #include "clang/Tooling/Syntax/Tokens.h"
25 #include "clang/Tooling/Syntax/Tree.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/ScopeExit.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Support/Allocator.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/Compiler.h"
33 #include "llvm/Support/FormatVariadic.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <map>
37 
38 using namespace clang;
39 
40 LLVM_ATTRIBUTE_UNUSED
41 static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; }
42 
43 static SourceLocation getQualifiedNameStart(DeclaratorDecl *D) {
44   auto DN = D->getDeclName();
45   bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo();
46   if (IsAnonymous)
47     return SourceLocation();
48   return D->getQualifierLoc() ? D->getQualifierLoc().getBeginLoc()
49                               : D->getLocation();
50 }
51 
52 namespace {
53 /// Get start location of the Declarator from the TypeLoc.
54 /// E.g.:
55 ///   loc of `(` in `int (a)`
56 ///   loc of `*` in `int *(a)`
57 ///   loc of the first `(` in `int (*a)(int)`
58 ///   loc of the `*` in `int *(a)(int)`
59 ///   loc of the first `*` in `const int *const *volatile a;`
60 ///
61 /// It is non-trivial to get the start location because TypeLocs are stored
62 /// inside out. In the example above `*volatile` is the TypeLoc returned
63 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()`
64 /// returns.
65 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> {
66   SourceLocation VisitParenTypeLoc(ParenTypeLoc T) {
67     auto L = Visit(T.getInnerLoc());
68     if (L.isValid())
69       return L;
70     return T.getLParenLoc();
71   }
72 
73   // Types spelled in the prefix part of the declarator.
74   SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) {
75     return HandlePointer(T);
76   }
77 
78   SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) {
79     return HandlePointer(T);
80   }
81 
82   SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) {
83     return HandlePointer(T);
84   }
85 
86   SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) {
87     return HandlePointer(T);
88   }
89 
90   SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) {
91     return HandlePointer(T);
92   }
93 
94   // All other cases are not important, as they are either part of declaration
95   // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on
96   // existing declarators (e.g. QualifiedTypeLoc). They cannot start the
97   // declarator themselves, but their underlying type can.
98   SourceLocation VisitTypeLoc(TypeLoc T) {
99     auto N = T.getNextTypeLoc();
100     if (!N)
101       return SourceLocation();
102     return Visit(N);
103   }
104 
105   SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) {
106     if (T.getTypePtr()->hasTrailingReturn())
107       return SourceLocation(); // avoid recursing into the suffix of declarator.
108     return VisitTypeLoc(T);
109   }
110 
111 private:
112   template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) {
113     auto L = Visit(T.getPointeeLoc());
114     if (L.isValid())
115       return L;
116     return T.getLocalSourceRange().getBegin();
117   }
118 };
119 } // namespace
120 
121 /// Gets the range of declarator as defined by the C++ grammar. E.g.
122 ///     `int a;` -> range of `a`,
123 ///     `int *a;` -> range of `*a`,
124 ///     `int a[10];` -> range of `a[10]`,
125 ///     `int a[1][2][3];` -> range of `a[1][2][3]`,
126 ///     `int *a = nullptr` -> range of `*a = nullptr`.
127 /// FIMXE: \p Name must be a source range, e.g. for `operator+`.
128 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T,
129                                       SourceLocation Name,
130                                       SourceRange Initializer) {
131   SourceLocation Start = GetStartLoc().Visit(T);
132   SourceLocation End = T.getSourceRange().getEnd();
133   assert(End.isValid());
134   if (Name.isValid()) {
135     if (Start.isInvalid())
136       Start = Name;
137     if (SM.isBeforeInTranslationUnit(End, Name))
138       End = Name;
139   }
140   if (Initializer.isValid()) {
141     assert(SM.isBeforeInTranslationUnit(End, Initializer.getEnd()));
142     End = Initializer.getEnd();
143   }
144   return SourceRange(Start, End);
145 }
146 
147 /// A helper class for constructing the syntax tree while traversing a clang
148 /// AST.
149 ///
150 /// At each point of the traversal we maintain a list of pending nodes.
151 /// Initially all tokens are added as pending nodes. When processing a clang AST
152 /// node, the clients need to:
153 ///   - create a corresponding syntax node,
154 ///   - assign roles to all pending child nodes with 'markChild' and
155 ///     'markChildToken',
156 ///   - replace the child nodes with the new syntax node in the pending list
157 ///     with 'foldNode'.
158 ///
159 /// Note that all children are expected to be processed when building a node.
160 ///
161 /// Call finalize() to finish building the tree and consume the root node.
162 class syntax::TreeBuilder {
163 public:
164   TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {
165     for (const auto &T : Arena.tokenBuffer().expandedTokens())
166       LocationToToken.insert({T.location().getRawEncoding(), &T});
167   }
168 
169   llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); }
170   const SourceManager &sourceManager() const { return Arena.sourceManager(); }
171 
172   /// Populate children for \p New node, assuming it covers tokens from \p
173   /// Range.
174   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
175 
176   /// Must be called with the range of each `DeclaratorDecl`. Ensures the
177   /// corresponding declarator nodes are covered by `SimpleDeclaration`.
178   void noticeDeclRange(llvm::ArrayRef<syntax::Token> Range);
179 
180   /// Notifies that we should not consume trailing semicolon when computing
181   /// token range of \p D.
182   void noticeDeclWithoutSemicolon(Decl *D);
183 
184   /// Mark the \p Child node with a corresponding \p Role. All marked children
185   /// should be consumed by foldNode.
186   /// When called on expressions (clang::Expr is derived from clang::Stmt),
187   /// wraps expressions into expression statement.
188   void markStmtChild(Stmt *Child, NodeRole Role);
189   /// Should be called for expressions in non-statement position to avoid
190   /// wrapping into expression statement.
191   void markExprChild(Expr *Child, NodeRole Role);
192 
193   /// Set role for a token starting at \p Loc.
194   void markChildToken(SourceLocation Loc, NodeRole R);
195   /// Set role for \p T.
196   void markChildToken(const syntax::Token *T, NodeRole R);
197 
198   /// Set role for the node that spans exactly \p Range.
199   void markChild(llvm::ArrayRef<syntax::Token> Range, NodeRole R);
200   /// Set role for the delayed node that spans exactly \p Range.
201   void markDelayedChild(llvm::ArrayRef<syntax::Token> Range, NodeRole R);
202 
203   /// Finish building the tree and consume the root node.
204   syntax::TranslationUnit *finalize() && {
205     auto Tokens = Arena.tokenBuffer().expandedTokens();
206     assert(!Tokens.empty());
207     assert(Tokens.back().kind() == tok::eof);
208 
209     // Build the root of the tree, consuming all the children.
210     Pending.foldChildren(Arena, Tokens.drop_back(),
211                          new (Arena.allocator()) syntax::TranslationUnit);
212 
213     auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize());
214     TU->assertInvariantsRecursive();
215     return TU;
216   }
217 
218   /// getRange() finds the syntax tokens corresponding to the passed source
219   /// locations.
220   /// \p First is the start position of the first token and \p Last is the start
221   /// position of the last token.
222   llvm::ArrayRef<syntax::Token> getRange(SourceLocation First,
223                                          SourceLocation Last) const {
224     assert(First.isValid());
225     assert(Last.isValid());
226     assert(First == Last ||
227            Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
228     return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
229   }
230   llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
231     auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc());
232     if (llvm::isa<NamespaceDecl>(D))
233       return Tokens;
234     if (DeclsWithoutSemicolons.count(D))
235       return Tokens;
236     // FIXME: do not consume trailing semicolon on function definitions.
237     // Most declarations own a semicolon in syntax trees, but not in clang AST.
238     return withTrailingSemicolon(Tokens);
239   }
240   llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const {
241     return getRange(E->getBeginLoc(), E->getEndLoc());
242   }
243   /// Find the adjusted range for the statement, consuming the trailing
244   /// semicolon when needed.
245   llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const {
246     auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc());
247     if (isa<CompoundStmt>(S))
248       return Tokens;
249 
250     // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and
251     // all statements that end with those. Consume this semicolon here.
252     if (Tokens.back().kind() == tok::semi)
253       return Tokens;
254     return withTrailingSemicolon(Tokens);
255   }
256 
257 private:
258   llvm::ArrayRef<syntax::Token>
259   withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const {
260     assert(!Tokens.empty());
261     assert(Tokens.back().kind() != tok::eof);
262     // We never consume 'eof', so looking at the next token is ok.
263     if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
264       return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
265     return Tokens;
266   }
267 
268   /// Finds a token starting at \p L. The token must exist.
269   const syntax::Token *findToken(SourceLocation L) const;
270 
271   /// A collection of trees covering the input tokens.
272   /// When created, each tree corresponds to a single token in the file.
273   /// Clients call 'foldChildren' to attach one or more subtrees to a parent
274   /// node and update the list of trees accordingly.
275   ///
276   /// Ensures that added nodes properly nest and cover the whole token stream.
277   struct Forest {
278     Forest(syntax::Arena &A) {
279       assert(!A.tokenBuffer().expandedTokens().empty());
280       assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof);
281       // Create all leaf nodes.
282       // Note that we do not have 'eof' in the tree.
283       for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) {
284         auto *L = new (A.allocator()) syntax::Leaf(&T);
285         L->Original = true;
286         L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue();
287         Trees.insert(Trees.end(), {&T, NodeAndRole{L}});
288       }
289     }
290 
291     ~Forest() { assert(DelayedFolds.empty()); }
292 
293     void assignRoleDelayed(llvm::ArrayRef<syntax::Token> Range,
294                            syntax::NodeRole Role) {
295       auto It = DelayedFolds.find(Range.begin());
296       assert(It != DelayedFolds.end());
297       assert(It->second.End == Range.end());
298       It->second.Role = Role;
299     }
300 
301     void assignRole(llvm::ArrayRef<syntax::Token> Range,
302                     syntax::NodeRole Role) {
303       assert(!Range.empty());
304       auto It = Trees.lower_bound(Range.begin());
305       assert(It != Trees.end() && "no node found");
306       assert(It->first == Range.begin() && "no child with the specified range");
307       assert((std::next(It) == Trees.end() ||
308               std::next(It)->first == Range.end()) &&
309              "no child with the specified range");
310       It->second.Role = Role;
311     }
312 
313     /// Add \p Node to the forest and attach child nodes based on \p Tokens.
314     void foldChildren(const syntax::Arena &A,
315                       llvm::ArrayRef<syntax::Token> Tokens,
316                       syntax::Tree *Node) {
317       // Execute delayed folds inside `Tokens`.
318       auto BeginFolds = DelayedFolds.lower_bound(Tokens.begin());
319       auto EndFolds = BeginFolds;
320       for (; EndFolds != DelayedFolds.end() &&
321              EndFolds->second.End <= Tokens.end();
322            ++EndFolds)
323         ;
324       // We go in reverse order to ensure we fold deeper nodes first.
325       for (auto RevIt = EndFolds; RevIt != BeginFolds; --RevIt) {
326         auto It = std::prev(RevIt);
327         foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End),
328                           It->second.Node);
329       }
330       DelayedFolds.erase(BeginFolds, EndFolds);
331 
332       // Attach children to `Node`.
333       foldChildrenEager(A, Tokens, Node);
334     }
335 
336     /// Schedule a call to `foldChildren` that will only be executed when
337     /// containing node is folded. The range of delayed nodes can be extended by
338     /// calling `extendDelayedFold`. Only one delayed node for each starting
339     /// token is allowed.
340     void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens,
341                              syntax::Tree *Node) {
342       assert(!Tokens.empty());
343       bool Inserted =
344           DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}})
345               .second;
346       (void)Inserted;
347       assert(Inserted && "Multiple delayed folds start at the same token");
348     }
349 
350     /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends
351     /// its endpoint to `ExtendedRange.end()` and returns true.
352     /// Otherwise, returns false.
353     bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) {
354       assert(!ExtendedRange.empty());
355       auto It = DelayedFolds.find(ExtendedRange.data());
356       if (It == DelayedFolds.end())
357         return false;
358       assert(It->second.End <= ExtendedRange.end());
359       It->second.End = ExtendedRange.end();
360       return true;
361     }
362 
363     // EXPECTS: all tokens were consumed and are owned by a single root node.
364     syntax::Node *finalize() && {
365       assert(Trees.size() == 1);
366       auto *Root = Trees.begin()->second.Node;
367       Trees = {};
368       return Root;
369     }
370 
371     std::string str(const syntax::Arena &A) const {
372       std::string R;
373       for (auto It = Trees.begin(); It != Trees.end(); ++It) {
374         unsigned CoveredTokens =
375             It != Trees.end()
376                 ? (std::next(It)->first - It->first)
377                 : A.tokenBuffer().expandedTokens().end() - It->first;
378 
379         R += std::string(llvm::formatv(
380             "- '{0}' covers '{1}'+{2} tokens\n", It->second.Node->kind(),
381             It->first->text(A.sourceManager()), CoveredTokens));
382         R += It->second.Node->dump(A);
383       }
384       return R;
385     }
386 
387   private:
388     /// Implementation detail of `foldChildren`, does acutal folding ignoring
389     /// delayed folds.
390     void foldChildrenEager(const syntax::Arena &A,
391                            llvm::ArrayRef<syntax::Token> Tokens,
392                            syntax::Tree *Node) {
393       assert(Node->firstChild() == nullptr && "node already has children");
394 
395       auto *FirstToken = Tokens.begin();
396       auto BeginChildren = Trees.lower_bound(FirstToken);
397       assert((BeginChildren == Trees.end() ||
398               BeginChildren->first == FirstToken) &&
399              "fold crosses boundaries of existing subtrees");
400       auto EndChildren = Trees.lower_bound(Tokens.end());
401       assert(
402           (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&
403           "fold crosses boundaries of existing subtrees");
404 
405       // We need to go in reverse order, because we can only prepend.
406       for (auto It = EndChildren; It != BeginChildren; --It)
407         Node->prependChildLowLevel(std::prev(It)->second.Node,
408                                    std::prev(It)->second.Role);
409 
410       // Mark that this node came from the AST and is backed by the source code.
411       Node->Original = true;
412       Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue();
413 
414       Trees.erase(BeginChildren, EndChildren);
415       Trees.insert({FirstToken, NodeAndRole(Node)});
416     }
417     /// A with a role that should be assigned to it when adding to a parent.
418     struct NodeAndRole {
419       explicit NodeAndRole(syntax::Node *Node)
420           : Node(Node), Role(NodeRole::Unknown) {}
421 
422       syntax::Node *Node;
423       NodeRole Role;
424     };
425 
426     /// Maps from the start token to a subtree starting at that token.
427     /// Keys in the map are pointers into the array of expanded tokens, so
428     /// pointer order corresponds to the order of preprocessor tokens.
429     /// FIXME: storing the end tokens is redundant.
430     /// FIXME: the key of a map is redundant, it is also stored in NodeForRange.
431     std::map<const syntax::Token *, NodeAndRole> Trees;
432 
433     /// See documentation of `foldChildrenDelayed` for details.
434     struct DelayedFold {
435       const syntax::Token *End = nullptr;
436       syntax::Tree *Node = nullptr;
437       NodeRole Role = NodeRole::Unknown;
438     };
439     std::map<const syntax::Token *, DelayedFold> DelayedFolds;
440   };
441 
442   /// For debugging purposes.
443   std::string str() { return Pending.str(Arena); }
444 
445   syntax::Arena &Arena;
446   /// To quickly find tokens by their start location.
447   llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *>
448       LocationToToken;
449   Forest Pending;
450   llvm::DenseSet<Decl *> DeclsWithoutSemicolons;
451 };
452 
453 namespace {
454 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
455 public:
456   explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
457       : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
458 
459   bool shouldTraversePostOrder() const { return true; }
460 
461   bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) {
462     // Ensure declarators are covered by SimpleDeclaration.
463     Builder.noticeDeclRange(Builder.getRange(DD));
464 
465     // Build the declarator node.
466     SourceRange Initializer;
467     if (auto *V = llvm::dyn_cast<VarDecl>(DD)) {
468       auto *I = V->getInit();
469       // Initializers in range-based-for are not part of the declarator
470       if (I && !V->isCXXForRangeDecl())
471         Initializer = I->getSourceRange();
472     }
473     auto Declarator = getDeclaratorRange(
474         Builder.sourceManager(), DD->getTypeSourceInfo()->getTypeLoc(),
475         getQualifiedNameStart(DD), Initializer);
476     if (Declarator.isValid()) {
477       auto Tokens =
478           Builder.getRange(Declarator.getBegin(), Declarator.getEnd());
479       Builder.foldNode(Tokens, new (allocator()) syntax::SimpleDeclarator);
480       Builder.markChild(Tokens, syntax::NodeRole::SimpleDeclaration_declarator);
481     }
482 
483     return true;
484   }
485 
486   bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) {
487     // Ensure declarators are covered by SimpleDeclaration.
488     Builder.noticeDeclRange(Builder.getRange(D));
489 
490     auto R = getDeclaratorRange(
491         Builder.sourceManager(), D->getTypeSourceInfo()->getTypeLoc(),
492         /*Name=*/D->getLocation(), /*Initializer=*/SourceRange());
493     if (R.isValid()) {
494       auto Tokens = Builder.getRange(R.getBegin(), R.getEnd());
495       Builder.foldNode(Tokens, new (allocator()) syntax::SimpleDeclarator);
496       Builder.markChild(Tokens, syntax::NodeRole::SimpleDeclaration_declarator);
497     }
498     return true;
499   }
500 
501   bool VisitDecl(Decl *D) {
502     assert(!D->isImplicit());
503     Builder.foldNode(Builder.getRange(D),
504                      new (allocator()) syntax::UnknownDeclaration());
505     return true;
506   }
507 
508   bool WalkUpFromTagDecl(TagDecl *C) {
509     // FIXME: build the ClassSpecifier node.
510     if (C->isFreeStanding()) {
511       // Class is a declaration specifier and needs a spanning declaration node.
512       Builder.foldNode(Builder.getRange(C),
513                        new (allocator()) syntax::SimpleDeclaration);
514       return true;
515     }
516     return true;
517   }
518 
519   bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
520     // We do not want to call VisitDecl(), the declaration for translation
521     // unit is built by finalize().
522     return true;
523   }
524 
525   bool WalkUpFromCompoundStmt(CompoundStmt *S) {
526     using NodeRole = syntax::NodeRole;
527 
528     Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen);
529     for (auto *Child : S->body())
530       Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement);
531     Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen);
532 
533     Builder.foldNode(Builder.getStmtRange(S),
534                      new (allocator()) syntax::CompoundStatement);
535     return true;
536   }
537 
538   // Some statements are not yet handled by syntax trees.
539   bool WalkUpFromStmt(Stmt *S) {
540     Builder.foldNode(Builder.getStmtRange(S),
541                      new (allocator()) syntax::UnknownStatement);
542     return true;
543   }
544 
545   bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
546     // We override to traverse range initializer as VarDecl.
547     // RAV traverses it as a statement, we produce invalid node kinds in that
548     // case.
549     // FIXME: should do this in RAV instead?
550     if (S->getInit() && !TraverseStmt(S->getInit()))
551       return false;
552     if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
553       return false;
554     if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
555       return false;
556     if (S->getBody() && !TraverseStmt(S->getBody()))
557       return false;
558     return true;
559   }
560 
561   bool TraverseStmt(Stmt *S) {
562     if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) {
563       // We want to consume the semicolon, make sure SimpleDeclaration does not.
564       for (auto *D : DS->decls())
565         Builder.noticeDeclWithoutSemicolon(D);
566     } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
567       // Do not recurse into subexpressions.
568       // We do not have syntax trees for expressions yet, so we only want to see
569       // the first top-level expression.
570       return WalkUpFromExpr(E->IgnoreImplicit());
571     }
572     return RecursiveASTVisitor::TraverseStmt(S);
573   }
574 
575   // Some expressions are not yet handled by syntax trees.
576   bool WalkUpFromExpr(Expr *E) {
577     assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");
578     Builder.foldNode(Builder.getExprRange(E),
579                      new (allocator()) syntax::UnknownExpression);
580     return true;
581   }
582 
583   bool WalkUpFromNamespaceDecl(NamespaceDecl *S) {
584     auto Tokens = Builder.getRange(S);
585     if (Tokens.front().kind() == tok::coloncolon) {
586       // Handle nested namespace definitions. Those start at '::' token, e.g.
587       // namespace a^::b {}
588       // FIXME: build corresponding nodes for the name of this namespace.
589       return true;
590     }
591     Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition);
592     return true;
593   }
594 
595   bool TraverseParenTypeLoc(ParenTypeLoc L) {
596     // We reverse order of traversal to get the proper syntax structure.
597     if (!WalkUpFromParenTypeLoc(L))
598       return false;
599     return TraverseTypeLoc(L.getInnerLoc());
600   }
601 
602   bool WalkUpFromParenTypeLoc(ParenTypeLoc L) {
603     Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
604     Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
605     Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()),
606                      new (allocator()) syntax::ParenDeclarator);
607     return true;
608   }
609 
610   // Declarator chunks, they are produced by type locs and some clang::Decls.
611   bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) {
612     Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen);
613     Builder.markExprChild(L.getSizeExpr(),
614                           syntax::NodeRole::ArraySubscript_sizeExpression);
615     Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen);
616     Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()),
617                      new (allocator()) syntax::ArraySubscript);
618     return true;
619   }
620 
621   bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) {
622     Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
623     for (auto *P : L.getParams())
624       Builder.markDelayedChild(
625           Builder.getRange(P),
626           syntax::NodeRole::ParametersAndQualifiers_parameter);
627     Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
628     Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()),
629                      new (allocator()) syntax::ParametersAndQualifiers);
630     return true;
631   }
632 
633   bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) {
634     if (!L.getTypePtr()->hasTrailingReturn())
635       return WalkUpFromFunctionTypeLoc(L);
636 
637     auto TrailingReturnTokens = BuildTrailingReturn(L);
638     // Finish building the node for parameters.
639     Builder.markChild(TrailingReturnTokens,
640                       syntax::NodeRole::ParametersAndQualifiers_trailingReturn);
641     return WalkUpFromFunctionTypeLoc(L);
642   }
643 
644   bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) {
645     auto SR = L.getLocalSourceRange();
646     Builder.foldNode(Builder.getRange(SR.getBegin(), SR.getEnd()),
647                      new (allocator()) syntax::MemberPointer);
648     return true;
649   }
650 
651   // The code below is very regular, it could even be generated with some
652   // preprocessor magic. We merely assign roles to the corresponding children
653   // and fold resulting nodes.
654   bool WalkUpFromDeclStmt(DeclStmt *S) {
655     Builder.foldNode(Builder.getStmtRange(S),
656                      new (allocator()) syntax::DeclarationStatement);
657     return true;
658   }
659 
660   bool WalkUpFromNullStmt(NullStmt *S) {
661     Builder.foldNode(Builder.getStmtRange(S),
662                      new (allocator()) syntax::EmptyStatement);
663     return true;
664   }
665 
666   bool WalkUpFromSwitchStmt(SwitchStmt *S) {
667     Builder.markChildToken(S->getSwitchLoc(),
668                            syntax::NodeRole::IntroducerKeyword);
669     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
670     Builder.foldNode(Builder.getStmtRange(S),
671                      new (allocator()) syntax::SwitchStatement);
672     return true;
673   }
674 
675   bool WalkUpFromCaseStmt(CaseStmt *S) {
676     Builder.markChildToken(S->getKeywordLoc(),
677                            syntax::NodeRole::IntroducerKeyword);
678     Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value);
679     Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
680     Builder.foldNode(Builder.getStmtRange(S),
681                      new (allocator()) syntax::CaseStatement);
682     return true;
683   }
684 
685   bool WalkUpFromDefaultStmt(DefaultStmt *S) {
686     Builder.markChildToken(S->getKeywordLoc(),
687                            syntax::NodeRole::IntroducerKeyword);
688     Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
689     Builder.foldNode(Builder.getStmtRange(S),
690                      new (allocator()) syntax::DefaultStatement);
691     return true;
692   }
693 
694   bool WalkUpFromIfStmt(IfStmt *S) {
695     Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword);
696     Builder.markStmtChild(S->getThen(),
697                           syntax::NodeRole::IfStatement_thenStatement);
698     Builder.markChildToken(S->getElseLoc(),
699                            syntax::NodeRole::IfStatement_elseKeyword);
700     Builder.markStmtChild(S->getElse(),
701                           syntax::NodeRole::IfStatement_elseStatement);
702     Builder.foldNode(Builder.getStmtRange(S),
703                      new (allocator()) syntax::IfStatement);
704     return true;
705   }
706 
707   bool WalkUpFromForStmt(ForStmt *S) {
708     Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
709     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
710     Builder.foldNode(Builder.getStmtRange(S),
711                      new (allocator()) syntax::ForStatement);
712     return true;
713   }
714 
715   bool WalkUpFromWhileStmt(WhileStmt *S) {
716     Builder.markChildToken(S->getWhileLoc(),
717                            syntax::NodeRole::IntroducerKeyword);
718     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
719     Builder.foldNode(Builder.getStmtRange(S),
720                      new (allocator()) syntax::WhileStatement);
721     return true;
722   }
723 
724   bool WalkUpFromContinueStmt(ContinueStmt *S) {
725     Builder.markChildToken(S->getContinueLoc(),
726                            syntax::NodeRole::IntroducerKeyword);
727     Builder.foldNode(Builder.getStmtRange(S),
728                      new (allocator()) syntax::ContinueStatement);
729     return true;
730   }
731 
732   bool WalkUpFromBreakStmt(BreakStmt *S) {
733     Builder.markChildToken(S->getBreakLoc(),
734                            syntax::NodeRole::IntroducerKeyword);
735     Builder.foldNode(Builder.getStmtRange(S),
736                      new (allocator()) syntax::BreakStatement);
737     return true;
738   }
739 
740   bool WalkUpFromReturnStmt(ReturnStmt *S) {
741     Builder.markChildToken(S->getReturnLoc(),
742                            syntax::NodeRole::IntroducerKeyword);
743     Builder.markExprChild(S->getRetValue(),
744                           syntax::NodeRole::ReturnStatement_value);
745     Builder.foldNode(Builder.getStmtRange(S),
746                      new (allocator()) syntax::ReturnStatement);
747     return true;
748   }
749 
750   bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) {
751     Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
752     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
753     Builder.foldNode(Builder.getStmtRange(S),
754                      new (allocator()) syntax::RangeBasedForStatement);
755     return true;
756   }
757 
758   bool WalkUpFromEmptyDecl(EmptyDecl *S) {
759     Builder.foldNode(Builder.getRange(S),
760                      new (allocator()) syntax::EmptyDeclaration);
761     return true;
762   }
763 
764   bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) {
765     Builder.markExprChild(S->getAssertExpr(),
766                           syntax::NodeRole::StaticAssertDeclaration_condition);
767     Builder.markExprChild(S->getMessage(),
768                           syntax::NodeRole::StaticAssertDeclaration_message);
769     Builder.foldNode(Builder.getRange(S),
770                      new (allocator()) syntax::StaticAssertDeclaration);
771     return true;
772   }
773 
774   bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) {
775     Builder.foldNode(Builder.getRange(S),
776                      new (allocator()) syntax::LinkageSpecificationDeclaration);
777     return true;
778   }
779 
780   bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) {
781     Builder.foldNode(Builder.getRange(S),
782                      new (allocator()) syntax::NamespaceAliasDefinition);
783     return true;
784   }
785 
786   bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) {
787     Builder.foldNode(Builder.getRange(S),
788                      new (allocator()) syntax::UsingNamespaceDirective);
789     return true;
790   }
791 
792   bool WalkUpFromUsingDecl(UsingDecl *S) {
793     Builder.foldNode(Builder.getRange(S),
794                      new (allocator()) syntax::UsingDeclaration);
795     return true;
796   }
797 
798   bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) {
799     Builder.foldNode(Builder.getRange(S),
800                      new (allocator()) syntax::UsingDeclaration);
801     return true;
802   }
803 
804   bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) {
805     Builder.foldNode(Builder.getRange(S),
806                      new (allocator()) syntax::UsingDeclaration);
807     return true;
808   }
809 
810   bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) {
811     Builder.foldNode(Builder.getRange(S),
812                      new (allocator()) syntax::TypeAliasDeclaration);
813     return true;
814   }
815 
816 private:
817   /// Returns the range of the built node.
818   llvm::ArrayRef<syntax::Token> BuildTrailingReturn(FunctionProtoTypeLoc L) {
819     assert(L.getTypePtr()->hasTrailingReturn());
820 
821     auto ReturnedType = L.getReturnLoc();
822     // Build node for the declarator, if any.
823     auto ReturnDeclaratorRange =
824         getDeclaratorRange(this->Builder.sourceManager(), ReturnedType,
825                            /*Name=*/SourceLocation(),
826                            /*Initializer=*/SourceLocation());
827     llvm::ArrayRef<syntax::Token> ReturnDeclaratorTokens;
828     if (ReturnDeclaratorRange.isValid()) {
829       ReturnDeclaratorTokens = Builder.getRange(
830           ReturnDeclaratorRange.getBegin(), ReturnDeclaratorRange.getEnd());
831       Builder.foldNode(ReturnDeclaratorTokens,
832                        new (allocator()) syntax::SimpleDeclarator);
833     }
834 
835     // Build node for trailing return type.
836     auto Return =
837         Builder.getRange(ReturnedType.getBeginLoc(), ReturnedType.getEndLoc());
838     const auto *Arrow = Return.begin() - 1;
839     assert(Arrow->kind() == tok::arrow);
840     auto Tokens = llvm::makeArrayRef(Arrow, Return.end());
841     Builder.markChildToken(Arrow, syntax::NodeRole::TrailingReturnType_arrow);
842     if (!ReturnDeclaratorTokens.empty())
843       Builder.markChild(ReturnDeclaratorTokens,
844                         syntax::NodeRole::TrailingReturnType_declarator);
845     Builder.foldNode(Tokens, new (allocator()) syntax::TrailingReturnType);
846     return Tokens;
847   }
848   /// A small helper to save some typing.
849   llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
850 
851   syntax::TreeBuilder &Builder;
852   const LangOptions &LangOpts;
853 };
854 } // namespace
855 
856 void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range,
857                                    syntax::Tree *New) {
858   Pending.foldChildren(Arena, Range, New);
859 }
860 
861 void syntax::TreeBuilder::noticeDeclRange(llvm::ArrayRef<syntax::Token> Range) {
862   if (Pending.extendDelayedFold(Range))
863     return;
864   Pending.foldChildrenDelayed(Range,
865                               new (allocator()) syntax::SimpleDeclaration);
866 }
867 
868 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) {
869   DeclsWithoutSemicolons.insert(D);
870 }
871 
872 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) {
873   if (Loc.isInvalid())
874     return;
875   Pending.assignRole(*findToken(Loc), Role);
876 }
877 
878 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) {
879   if (!T)
880     return;
881   Pending.assignRole(*T, R);
882 }
883 
884 void syntax::TreeBuilder::markChild(llvm::ArrayRef<syntax::Token> Range,
885                                     NodeRole R) {
886   Pending.assignRole(Range, R);
887 }
888 
889 void syntax::TreeBuilder::markDelayedChild(llvm::ArrayRef<syntax::Token> Range,
890                                            NodeRole R) {
891   Pending.assignRoleDelayed(Range, R);
892 }
893 
894 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) {
895   if (!Child)
896     return;
897 
898   auto Range = getStmtRange(Child);
899   // This is an expression in a statement position, consume the trailing
900   // semicolon and form an 'ExpressionStatement' node.
901   if (auto *E = dyn_cast<Expr>(Child)) {
902     Pending.assignRole(getExprRange(E),
903                        NodeRole::ExpressionStatement_expression);
904     // 'getRange(Stmt)' ensures this already covers a trailing semicolon.
905     Pending.foldChildren(Arena, Range,
906                          new (allocator()) syntax::ExpressionStatement);
907   }
908   Pending.assignRole(Range, Role);
909 }
910 
911 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) {
912   if (!Child)
913     return;
914 
915   Pending.assignRole(getExprRange(Child), Role);
916 }
917 
918 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
919   auto It = LocationToToken.find(L.getRawEncoding());
920   assert(It != LocationToToken.end());
921   return It->second;
922 }
923 
924 syntax::TranslationUnit *
925 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) {
926   TreeBuilder Builder(A);
927   BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext());
928   return std::move(Builder).finalize();
929 }
930