19b3f38f9SIlya Biryukov //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====//
29b3f38f9SIlya Biryukov //
39b3f38f9SIlya Biryukov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49b3f38f9SIlya Biryukov // See https://llvm.org/LICENSE.txt for license information.
59b3f38f9SIlya Biryukov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69b3f38f9SIlya Biryukov //
79b3f38f9SIlya Biryukov //===----------------------------------------------------------------------===//
89b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/BuildTree.h"
99b3f38f9SIlya Biryukov #include "clang/AST/RecursiveASTVisitor.h"
109b3f38f9SIlya Biryukov #include "clang/AST/Stmt.h"
119b3f38f9SIlya Biryukov #include "clang/Basic/LLVM.h"
129b3f38f9SIlya Biryukov #include "clang/Basic/SourceLocation.h"
139b3f38f9SIlya Biryukov #include "clang/Basic/SourceManager.h"
149b3f38f9SIlya Biryukov #include "clang/Basic/TokenKinds.h"
159b3f38f9SIlya Biryukov #include "clang/Lex/Lexer.h"
169b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Nodes.h"
179b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tokens.h"
189b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tree.h"
199b3f38f9SIlya Biryukov #include "llvm/ADT/ArrayRef.h"
209b3f38f9SIlya Biryukov #include "llvm/ADT/STLExtras.h"
219b3f38f9SIlya Biryukov #include "llvm/ADT/SmallVector.h"
229b3f38f9SIlya Biryukov #include "llvm/Support/Allocator.h"
239b3f38f9SIlya Biryukov #include "llvm/Support/Casting.h"
249b3f38f9SIlya Biryukov #include "llvm/Support/FormatVariadic.h"
259b3f38f9SIlya Biryukov #include "llvm/Support/raw_ostream.h"
269b3f38f9SIlya Biryukov #include <map>
279b3f38f9SIlya Biryukov 
289b3f38f9SIlya Biryukov using namespace clang;
299b3f38f9SIlya Biryukov 
30*58fa50f4SIlya Biryukov static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; }
31*58fa50f4SIlya Biryukov 
329b3f38f9SIlya Biryukov /// A helper class for constructing the syntax tree while traversing a clang
339b3f38f9SIlya Biryukov /// AST.
349b3f38f9SIlya Biryukov ///
359b3f38f9SIlya Biryukov /// At each point of the traversal we maintain a list of pending nodes.
369b3f38f9SIlya Biryukov /// Initially all tokens are added as pending nodes. When processing a clang AST
379b3f38f9SIlya Biryukov /// node, the clients need to:
389b3f38f9SIlya Biryukov ///   - create a corresponding syntax node,
399b3f38f9SIlya Biryukov ///   - assign roles to all pending child nodes with 'markChild' and
409b3f38f9SIlya Biryukov ///     'markChildToken',
419b3f38f9SIlya Biryukov ///   - replace the child nodes with the new syntax node in the pending list
429b3f38f9SIlya Biryukov ///     with 'foldNode'.
439b3f38f9SIlya Biryukov ///
449b3f38f9SIlya Biryukov /// Note that all children are expected to be processed when building a node.
459b3f38f9SIlya Biryukov ///
469b3f38f9SIlya Biryukov /// Call finalize() to finish building the tree and consume the root node.
479b3f38f9SIlya Biryukov class syntax::TreeBuilder {
489b3f38f9SIlya Biryukov public:
499b3f38f9SIlya Biryukov   TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {}
509b3f38f9SIlya Biryukov 
519b3f38f9SIlya Biryukov   llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); }
529b3f38f9SIlya Biryukov 
539b3f38f9SIlya Biryukov   /// Populate children for \p New node, assuming it covers tokens from \p
549b3f38f9SIlya Biryukov   /// Range.
559b3f38f9SIlya Biryukov   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
569b3f38f9SIlya Biryukov 
57*58fa50f4SIlya Biryukov   /// Mark the \p Child node with a corresponding \p Role. All marked children
58*58fa50f4SIlya Biryukov   /// should be consumed by foldNode.
59*58fa50f4SIlya Biryukov   /// (!) when called on expressions (clang::Expr is derived from clang::Stmt),
60*58fa50f4SIlya Biryukov   ///     wraps expressions into expression statement.
61*58fa50f4SIlya Biryukov   void markStmtChild(Stmt *Child, NodeRole Role);
62*58fa50f4SIlya Biryukov   /// Should be called for expressions in non-statement position to avoid
63*58fa50f4SIlya Biryukov   /// wrapping into expression statement.
64*58fa50f4SIlya Biryukov   void markExprChild(Expr *Child, NodeRole Role);
65*58fa50f4SIlya Biryukov 
669b3f38f9SIlya Biryukov   /// Set role for a token starting at \p Loc.
679b3f38f9SIlya Biryukov   void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R);
689b3f38f9SIlya Biryukov 
699b3f38f9SIlya Biryukov   /// Finish building the tree and consume the root node.
709b3f38f9SIlya Biryukov   syntax::TranslationUnit *finalize() && {
719b3f38f9SIlya Biryukov     auto Tokens = Arena.tokenBuffer().expandedTokens();
72bfbf6b6cSIlya Biryukov     assert(!Tokens.empty());
73bfbf6b6cSIlya Biryukov     assert(Tokens.back().kind() == tok::eof);
74bfbf6b6cSIlya Biryukov 
759b3f38f9SIlya Biryukov     // Build the root of the tree, consuming all the children.
76bfbf6b6cSIlya Biryukov     Pending.foldChildren(Tokens.drop_back(),
779b3f38f9SIlya Biryukov                          new (Arena.allocator()) syntax::TranslationUnit);
789b3f38f9SIlya Biryukov 
799b3f38f9SIlya Biryukov     return cast<syntax::TranslationUnit>(std::move(Pending).finalize());
809b3f38f9SIlya Biryukov   }
819b3f38f9SIlya Biryukov 
829b3f38f9SIlya Biryukov   /// getRange() finds the syntax tokens corresponding to the passed source
839b3f38f9SIlya Biryukov   /// locations.
849b3f38f9SIlya Biryukov   /// \p First is the start position of the first token and \p Last is the start
859b3f38f9SIlya Biryukov   /// position of the last token.
869b3f38f9SIlya Biryukov   llvm::ArrayRef<syntax::Token> getRange(SourceLocation First,
879b3f38f9SIlya Biryukov                                          SourceLocation Last) const {
889b3f38f9SIlya Biryukov     assert(First.isValid());
899b3f38f9SIlya Biryukov     assert(Last.isValid());
909b3f38f9SIlya Biryukov     assert(First == Last ||
919b3f38f9SIlya Biryukov            Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
929b3f38f9SIlya Biryukov     return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
939b3f38f9SIlya Biryukov   }
949b3f38f9SIlya Biryukov   llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
959b3f38f9SIlya Biryukov     return getRange(D->getBeginLoc(), D->getEndLoc());
969b3f38f9SIlya Biryukov   }
97*58fa50f4SIlya Biryukov   llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const {
98*58fa50f4SIlya Biryukov     return getRange(E->getBeginLoc(), E->getEndLoc());
99*58fa50f4SIlya Biryukov   }
100*58fa50f4SIlya Biryukov   /// Find the adjusted range for the statement, consuming the trailing
101*58fa50f4SIlya Biryukov   /// semicolon when needed.
102*58fa50f4SIlya Biryukov   llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const {
103*58fa50f4SIlya Biryukov     auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc());
104*58fa50f4SIlya Biryukov     if (isa<CompoundStmt>(S))
105*58fa50f4SIlya Biryukov       return Tokens;
106*58fa50f4SIlya Biryukov 
107*58fa50f4SIlya Biryukov     // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and
108*58fa50f4SIlya Biryukov     // all statements that end with those. Consume this semicolon here.
109*58fa50f4SIlya Biryukov     //
110*58fa50f4SIlya Biryukov     // (!) statements never consume 'eof', so looking at the next token is ok.
111*58fa50f4SIlya Biryukov     if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
112*58fa50f4SIlya Biryukov       return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
113*58fa50f4SIlya Biryukov     return Tokens;
1149b3f38f9SIlya Biryukov   }
1159b3f38f9SIlya Biryukov 
1169b3f38f9SIlya Biryukov private:
1179b3f38f9SIlya Biryukov   /// Finds a token starting at \p L. The token must exist.
1189b3f38f9SIlya Biryukov   const syntax::Token *findToken(SourceLocation L) const;
1199b3f38f9SIlya Biryukov 
1209b3f38f9SIlya Biryukov   /// A collection of trees covering the input tokens.
1219b3f38f9SIlya Biryukov   /// When created, each tree corresponds to a single token in the file.
1229b3f38f9SIlya Biryukov   /// Clients call 'foldChildren' to attach one or more subtrees to a parent
1239b3f38f9SIlya Biryukov   /// node and update the list of trees accordingly.
1249b3f38f9SIlya Biryukov   ///
1259b3f38f9SIlya Biryukov   /// Ensures that added nodes properly nest and cover the whole token stream.
1269b3f38f9SIlya Biryukov   struct Forest {
1279b3f38f9SIlya Biryukov     Forest(syntax::Arena &A) {
128bfbf6b6cSIlya Biryukov       assert(!A.tokenBuffer().expandedTokens().empty());
129bfbf6b6cSIlya Biryukov       assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof);
1309b3f38f9SIlya Biryukov       // Create all leaf nodes.
131bfbf6b6cSIlya Biryukov       // Note that we do not have 'eof' in the tree.
132bfbf6b6cSIlya Biryukov       for (auto &T : A.tokenBuffer().expandedTokens().drop_back())
1339b3f38f9SIlya Biryukov         Trees.insert(Trees.end(),
1349b3f38f9SIlya Biryukov                      {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}});
1359b3f38f9SIlya Biryukov     }
1369b3f38f9SIlya Biryukov 
1379b3f38f9SIlya Biryukov     void assignRole(llvm::ArrayRef<syntax::Token> Range,
1389b3f38f9SIlya Biryukov                     syntax::NodeRole Role) {
1399b3f38f9SIlya Biryukov       assert(!Range.empty());
1409b3f38f9SIlya Biryukov       auto It = Trees.lower_bound(Range.begin());
1419b3f38f9SIlya Biryukov       assert(It != Trees.end() && "no node found");
1429b3f38f9SIlya Biryukov       assert(It->first == Range.begin() && "no child with the specified range");
1439b3f38f9SIlya Biryukov       assert((std::next(It) == Trees.end() ||
1449b3f38f9SIlya Biryukov               std::next(It)->first == Range.end()) &&
1459b3f38f9SIlya Biryukov              "no child with the specified range");
1469b3f38f9SIlya Biryukov       It->second.Role = Role;
1479b3f38f9SIlya Biryukov     }
1489b3f38f9SIlya Biryukov 
1499b3f38f9SIlya Biryukov     /// Add \p Node to the forest and fill its children nodes based on the \p
1509b3f38f9SIlya Biryukov     /// NodeRange.
1519b3f38f9SIlya Biryukov     void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens,
1529b3f38f9SIlya Biryukov                       syntax::Tree *Node) {
1539b3f38f9SIlya Biryukov       assert(!NodeTokens.empty());
1549b3f38f9SIlya Biryukov       assert(Node->firstChild() == nullptr && "node already has children");
1559b3f38f9SIlya Biryukov 
1569b3f38f9SIlya Biryukov       auto *FirstToken = NodeTokens.begin();
1579b3f38f9SIlya Biryukov       auto BeginChildren = Trees.lower_bound(FirstToken);
1589b3f38f9SIlya Biryukov       assert(BeginChildren != Trees.end() &&
1599b3f38f9SIlya Biryukov              BeginChildren->first == FirstToken &&
1609b3f38f9SIlya Biryukov              "fold crosses boundaries of existing subtrees");
1619b3f38f9SIlya Biryukov       auto EndChildren = Trees.lower_bound(NodeTokens.end());
1629b3f38f9SIlya Biryukov       assert((EndChildren == Trees.end() ||
1639b3f38f9SIlya Biryukov               EndChildren->first == NodeTokens.end()) &&
1649b3f38f9SIlya Biryukov              "fold crosses boundaries of existing subtrees");
1659b3f38f9SIlya Biryukov 
1669b3f38f9SIlya Biryukov       // (!) we need to go in reverse order, because we can only prepend.
1679b3f38f9SIlya Biryukov       for (auto It = EndChildren; It != BeginChildren; --It)
1689b3f38f9SIlya Biryukov         Node->prependChildLowLevel(std::prev(It)->second.Node,
1699b3f38f9SIlya Biryukov                                    std::prev(It)->second.Role);
1709b3f38f9SIlya Biryukov 
1719b3f38f9SIlya Biryukov       Trees.erase(BeginChildren, EndChildren);
1729b3f38f9SIlya Biryukov       Trees.insert({FirstToken, NodeAndRole(Node)});
1739b3f38f9SIlya Biryukov     }
1749b3f38f9SIlya Biryukov 
1759b3f38f9SIlya Biryukov     // EXPECTS: all tokens were consumed and are owned by a single root node.
1769b3f38f9SIlya Biryukov     syntax::Node *finalize() && {
1779b3f38f9SIlya Biryukov       assert(Trees.size() == 1);
1789b3f38f9SIlya Biryukov       auto *Root = Trees.begin()->second.Node;
1799b3f38f9SIlya Biryukov       Trees = {};
1809b3f38f9SIlya Biryukov       return Root;
1819b3f38f9SIlya Biryukov     }
1829b3f38f9SIlya Biryukov 
1839b3f38f9SIlya Biryukov     std::string str(const syntax::Arena &A) const {
1849b3f38f9SIlya Biryukov       std::string R;
1859b3f38f9SIlya Biryukov       for (auto It = Trees.begin(); It != Trees.end(); ++It) {
1869b3f38f9SIlya Biryukov         unsigned CoveredTokens =
1879b3f38f9SIlya Biryukov             It != Trees.end()
1889b3f38f9SIlya Biryukov                 ? (std::next(It)->first - It->first)
1899b3f38f9SIlya Biryukov                 : A.tokenBuffer().expandedTokens().end() - It->first;
1909b3f38f9SIlya Biryukov 
1919b3f38f9SIlya Biryukov         R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n",
1929b3f38f9SIlya Biryukov                            It->second.Node->kind(),
1939b3f38f9SIlya Biryukov                            It->first->text(A.sourceManager()), CoveredTokens);
1949b3f38f9SIlya Biryukov         R += It->second.Node->dump(A);
1959b3f38f9SIlya Biryukov       }
1969b3f38f9SIlya Biryukov       return R;
1979b3f38f9SIlya Biryukov     }
1989b3f38f9SIlya Biryukov 
1999b3f38f9SIlya Biryukov   private:
2009b3f38f9SIlya Biryukov     /// A with a role that should be assigned to it when adding to a parent.
2019b3f38f9SIlya Biryukov     struct NodeAndRole {
2029b3f38f9SIlya Biryukov       explicit NodeAndRole(syntax::Node *Node)
20351dad419SIlya Biryukov           : Node(Node), Role(NodeRole::Unknown) {}
2049b3f38f9SIlya Biryukov 
2059b3f38f9SIlya Biryukov       syntax::Node *Node;
2069b3f38f9SIlya Biryukov       NodeRole Role;
2079b3f38f9SIlya Biryukov     };
2089b3f38f9SIlya Biryukov 
2099b3f38f9SIlya Biryukov     /// Maps from the start token to a subtree starting at that token.
2109b3f38f9SIlya Biryukov     /// FIXME: storing the end tokens is redundant.
2119b3f38f9SIlya Biryukov     /// FIXME: the key of a map is redundant, it is also stored in NodeForRange.
2129b3f38f9SIlya Biryukov     std::map<const syntax::Token *, NodeAndRole> Trees;
2139b3f38f9SIlya Biryukov   };
2149b3f38f9SIlya Biryukov 
2159b3f38f9SIlya Biryukov   /// For debugging purposes.
2169b3f38f9SIlya Biryukov   std::string str() { return Pending.str(Arena); }
2179b3f38f9SIlya Biryukov 
2189b3f38f9SIlya Biryukov   syntax::Arena &Arena;
2199b3f38f9SIlya Biryukov   Forest Pending;
2209b3f38f9SIlya Biryukov };
2219b3f38f9SIlya Biryukov 
2229b3f38f9SIlya Biryukov namespace {
2239b3f38f9SIlya Biryukov class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
2249b3f38f9SIlya Biryukov public:
2259b3f38f9SIlya Biryukov   explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
2269b3f38f9SIlya Biryukov       : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
2279b3f38f9SIlya Biryukov 
2289b3f38f9SIlya Biryukov   bool shouldTraversePostOrder() const { return true; }
2299b3f38f9SIlya Biryukov 
2309b3f38f9SIlya Biryukov   bool TraverseDecl(Decl *D) {
2319b3f38f9SIlya Biryukov     if (!D || isa<TranslationUnitDecl>(D))
2329b3f38f9SIlya Biryukov       return RecursiveASTVisitor::TraverseDecl(D);
2339b3f38f9SIlya Biryukov     if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext()))
2349b3f38f9SIlya Biryukov       return true; // Only build top-level decls for now, do not recurse.
2359b3f38f9SIlya Biryukov     return RecursiveASTVisitor::TraverseDecl(D);
2369b3f38f9SIlya Biryukov   }
2379b3f38f9SIlya Biryukov 
2389b3f38f9SIlya Biryukov   bool VisitDecl(Decl *D) {
2399b3f38f9SIlya Biryukov     assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) &&
2409b3f38f9SIlya Biryukov            "expected a top-level decl");
2419b3f38f9SIlya Biryukov     assert(!D->isImplicit());
2429b3f38f9SIlya Biryukov     Builder.foldNode(Builder.getRange(D),
2439b3f38f9SIlya Biryukov                      new (allocator()) syntax::TopLevelDeclaration());
2449b3f38f9SIlya Biryukov     return true;
2459b3f38f9SIlya Biryukov   }
2469b3f38f9SIlya Biryukov 
2479b3f38f9SIlya Biryukov   bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
2489b3f38f9SIlya Biryukov     // (!) we do not want to call VisitDecl(), the declaration for translation
2499b3f38f9SIlya Biryukov     // unit is built by finalize().
2509b3f38f9SIlya Biryukov     return true;
2519b3f38f9SIlya Biryukov   }
2529b3f38f9SIlya Biryukov 
2539b3f38f9SIlya Biryukov   bool WalkUpFromCompoundStmt(CompoundStmt *S) {
25451dad419SIlya Biryukov     using NodeRole = syntax::NodeRole;
2559b3f38f9SIlya Biryukov 
256*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getLBracLoc(), tok::l_brace, NodeRole::OpenParen);
257*58fa50f4SIlya Biryukov     for (auto *Child : S->body())
258*58fa50f4SIlya Biryukov       Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement);
25951dad419SIlya Biryukov     Builder.markChildToken(S->getRBracLoc(), tok::r_brace,
260*58fa50f4SIlya Biryukov                            NodeRole::CloseParen);
2619b3f38f9SIlya Biryukov 
262*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
2639b3f38f9SIlya Biryukov                      new (allocator()) syntax::CompoundStatement);
2649b3f38f9SIlya Biryukov     return true;
2659b3f38f9SIlya Biryukov   }
2669b3f38f9SIlya Biryukov 
267*58fa50f4SIlya Biryukov   // Some statements are not yet handled by syntax trees.
268*58fa50f4SIlya Biryukov   bool WalkUpFromStmt(Stmt *S) {
269*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
270*58fa50f4SIlya Biryukov                      new (allocator()) syntax::UnknownStatement);
271*58fa50f4SIlya Biryukov     return true;
272*58fa50f4SIlya Biryukov   }
273*58fa50f4SIlya Biryukov 
274*58fa50f4SIlya Biryukov   bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
275*58fa50f4SIlya Biryukov     // We override to traverse range initializer as VarDecl.
276*58fa50f4SIlya Biryukov     // RAV traverses it as a statement, we produce invalid node kinds in that
277*58fa50f4SIlya Biryukov     // case.
278*58fa50f4SIlya Biryukov     // FIXME: should do this in RAV instead?
279*58fa50f4SIlya Biryukov     if (S->getInit() && !TraverseStmt(S->getInit()))
280*58fa50f4SIlya Biryukov       return false;
281*58fa50f4SIlya Biryukov     if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
282*58fa50f4SIlya Biryukov       return false;
283*58fa50f4SIlya Biryukov     if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
284*58fa50f4SIlya Biryukov       return false;
285*58fa50f4SIlya Biryukov     if (S->getBody() && !TraverseStmt(S->getBody()))
286*58fa50f4SIlya Biryukov       return false;
287*58fa50f4SIlya Biryukov     return true;
288*58fa50f4SIlya Biryukov   }
289*58fa50f4SIlya Biryukov 
290*58fa50f4SIlya Biryukov   bool TraverseStmt(Stmt *S) {
291*58fa50f4SIlya Biryukov     if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
292*58fa50f4SIlya Biryukov       // (!) do not recurse into subexpressions.
293*58fa50f4SIlya Biryukov       // we do not have syntax trees for expressions yet, so we only want to see
294*58fa50f4SIlya Biryukov       // the first top-level expression.
295*58fa50f4SIlya Biryukov       return WalkUpFromExpr(E->IgnoreImplicit());
296*58fa50f4SIlya Biryukov     }
297*58fa50f4SIlya Biryukov     return RecursiveASTVisitor::TraverseStmt(S);
298*58fa50f4SIlya Biryukov   }
299*58fa50f4SIlya Biryukov 
300*58fa50f4SIlya Biryukov   // Some expressions are not yet handled by syntax trees.
301*58fa50f4SIlya Biryukov   bool WalkUpFromExpr(Expr *E) {
302*58fa50f4SIlya Biryukov     assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");
303*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getExprRange(E),
304*58fa50f4SIlya Biryukov                      new (allocator()) syntax::UnknownExpression);
305*58fa50f4SIlya Biryukov     return true;
306*58fa50f4SIlya Biryukov   }
307*58fa50f4SIlya Biryukov 
308*58fa50f4SIlya Biryukov   // The code below is very regular, it could even be generated with some
309*58fa50f4SIlya Biryukov   // preprocessor magic. We merely assign roles to the corresponding children
310*58fa50f4SIlya Biryukov   // and fold resulting nodes.
311*58fa50f4SIlya Biryukov   bool WalkUpFromDeclStmt(DeclStmt *S) {
312*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
313*58fa50f4SIlya Biryukov                      new (allocator()) syntax::DeclarationStatement);
314*58fa50f4SIlya Biryukov     return true;
315*58fa50f4SIlya Biryukov   }
316*58fa50f4SIlya Biryukov 
317*58fa50f4SIlya Biryukov   bool WalkUpFromNullStmt(NullStmt *S) {
318*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
319*58fa50f4SIlya Biryukov                      new (allocator()) syntax::EmptyStatement);
320*58fa50f4SIlya Biryukov     return true;
321*58fa50f4SIlya Biryukov   }
322*58fa50f4SIlya Biryukov 
323*58fa50f4SIlya Biryukov   bool WalkUpFromSwitchStmt(SwitchStmt *S) {
324*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getSwitchLoc(), tok::kw_switch,
325*58fa50f4SIlya Biryukov                            syntax::NodeRole::IntroducerKeyword);
326*58fa50f4SIlya Biryukov     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
327*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
328*58fa50f4SIlya Biryukov                      new (allocator()) syntax::SwitchStatement);
329*58fa50f4SIlya Biryukov     return true;
330*58fa50f4SIlya Biryukov   }
331*58fa50f4SIlya Biryukov 
332*58fa50f4SIlya Biryukov   bool WalkUpFromCaseStmt(CaseStmt *S) {
333*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getKeywordLoc(), tok::kw_case,
334*58fa50f4SIlya Biryukov                            syntax::NodeRole::IntroducerKeyword);
335*58fa50f4SIlya Biryukov     Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value);
336*58fa50f4SIlya Biryukov     Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
337*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
338*58fa50f4SIlya Biryukov                      new (allocator()) syntax::CaseStatement);
339*58fa50f4SIlya Biryukov     return true;
340*58fa50f4SIlya Biryukov   }
341*58fa50f4SIlya Biryukov 
342*58fa50f4SIlya Biryukov   bool WalkUpFromDefaultStmt(DefaultStmt *S) {
343*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getKeywordLoc(), tok::kw_default,
344*58fa50f4SIlya Biryukov                            syntax::NodeRole::IntroducerKeyword);
345*58fa50f4SIlya Biryukov     Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
346*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
347*58fa50f4SIlya Biryukov                      new (allocator()) syntax::DefaultStatement);
348*58fa50f4SIlya Biryukov     return true;
349*58fa50f4SIlya Biryukov   }
350*58fa50f4SIlya Biryukov 
351*58fa50f4SIlya Biryukov   bool WalkUpFromIfStmt(IfStmt *S) {
352*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getIfLoc(), tok::kw_if,
353*58fa50f4SIlya Biryukov                            syntax::NodeRole::IntroducerKeyword);
354*58fa50f4SIlya Biryukov     Builder.markStmtChild(S->getThen(),
355*58fa50f4SIlya Biryukov                           syntax::NodeRole::IfStatement_thenStatement);
356*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getElseLoc(), tok::kw_else,
357*58fa50f4SIlya Biryukov                            syntax::NodeRole::IfStatement_elseKeyword);
358*58fa50f4SIlya Biryukov     Builder.markStmtChild(S->getElse(),
359*58fa50f4SIlya Biryukov                           syntax::NodeRole::IfStatement_elseStatement);
360*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
361*58fa50f4SIlya Biryukov                      new (allocator()) syntax::IfStatement);
362*58fa50f4SIlya Biryukov     return true;
363*58fa50f4SIlya Biryukov   }
364*58fa50f4SIlya Biryukov 
365*58fa50f4SIlya Biryukov   bool WalkUpFromForStmt(ForStmt *S) {
366*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getForLoc(), tok::kw_for,
367*58fa50f4SIlya Biryukov                            syntax::NodeRole::IntroducerKeyword);
368*58fa50f4SIlya Biryukov     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
369*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
370*58fa50f4SIlya Biryukov                      new (allocator()) syntax::ForStatement);
371*58fa50f4SIlya Biryukov     return true;
372*58fa50f4SIlya Biryukov   }
373*58fa50f4SIlya Biryukov 
374*58fa50f4SIlya Biryukov   bool WalkUpFromWhileStmt(WhileStmt *S) {
375*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getWhileLoc(), tok::kw_while,
376*58fa50f4SIlya Biryukov                            syntax::NodeRole::IntroducerKeyword);
377*58fa50f4SIlya Biryukov     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
378*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
379*58fa50f4SIlya Biryukov                      new (allocator()) syntax::WhileStatement);
380*58fa50f4SIlya Biryukov     return true;
381*58fa50f4SIlya Biryukov   }
382*58fa50f4SIlya Biryukov 
383*58fa50f4SIlya Biryukov   bool WalkUpFromContinueStmt(ContinueStmt *S) {
384*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getContinueLoc(), tok::kw_continue,
385*58fa50f4SIlya Biryukov                            syntax::NodeRole::IntroducerKeyword);
386*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
387*58fa50f4SIlya Biryukov                      new (allocator()) syntax::ContinueStatement);
388*58fa50f4SIlya Biryukov     return true;
389*58fa50f4SIlya Biryukov   }
390*58fa50f4SIlya Biryukov 
391*58fa50f4SIlya Biryukov   bool WalkUpFromBreakStmt(BreakStmt *S) {
392*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getBreakLoc(), tok::kw_break,
393*58fa50f4SIlya Biryukov                            syntax::NodeRole::IntroducerKeyword);
394*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
395*58fa50f4SIlya Biryukov                      new (allocator()) syntax::BreakStatement);
396*58fa50f4SIlya Biryukov     return true;
397*58fa50f4SIlya Biryukov   }
398*58fa50f4SIlya Biryukov 
399*58fa50f4SIlya Biryukov   bool WalkUpFromReturnStmt(ReturnStmt *S) {
400*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getReturnLoc(), tok::kw_return,
401*58fa50f4SIlya Biryukov                            syntax::NodeRole::IntroducerKeyword);
402*58fa50f4SIlya Biryukov     Builder.markExprChild(S->getRetValue(),
403*58fa50f4SIlya Biryukov                           syntax::NodeRole::ReturnStatement_value);
404*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
405*58fa50f4SIlya Biryukov                      new (allocator()) syntax::ReturnStatement);
406*58fa50f4SIlya Biryukov     return true;
407*58fa50f4SIlya Biryukov   }
408*58fa50f4SIlya Biryukov 
409*58fa50f4SIlya Biryukov   bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) {
410*58fa50f4SIlya Biryukov     Builder.markChildToken(S->getForLoc(), tok::kw_for,
411*58fa50f4SIlya Biryukov                            syntax::NodeRole::IntroducerKeyword);
412*58fa50f4SIlya Biryukov     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
413*58fa50f4SIlya Biryukov     Builder.foldNode(Builder.getStmtRange(S),
414*58fa50f4SIlya Biryukov                      new (allocator()) syntax::RangeBasedForStatement);
415*58fa50f4SIlya Biryukov     return true;
416*58fa50f4SIlya Biryukov   }
417*58fa50f4SIlya Biryukov 
4189b3f38f9SIlya Biryukov private:
4199b3f38f9SIlya Biryukov   /// A small helper to save some typing.
4209b3f38f9SIlya Biryukov   llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
4219b3f38f9SIlya Biryukov 
4229b3f38f9SIlya Biryukov   syntax::TreeBuilder &Builder;
4239b3f38f9SIlya Biryukov   const LangOptions &LangOpts;
4249b3f38f9SIlya Biryukov };
4259b3f38f9SIlya Biryukov } // namespace
4269b3f38f9SIlya Biryukov 
4279b3f38f9SIlya Biryukov void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range,
4289b3f38f9SIlya Biryukov                                    syntax::Tree *New) {
4299b3f38f9SIlya Biryukov   Pending.foldChildren(Range, New);
4309b3f38f9SIlya Biryukov }
4319b3f38f9SIlya Biryukov 
4329b3f38f9SIlya Biryukov void syntax::TreeBuilder::markChildToken(SourceLocation Loc,
4339b3f38f9SIlya Biryukov                                          tok::TokenKind Kind, NodeRole Role) {
4349b3f38f9SIlya Biryukov   if (Loc.isInvalid())
4359b3f38f9SIlya Biryukov     return;
4369b3f38f9SIlya Biryukov   Pending.assignRole(*findToken(Loc), Role);
4379b3f38f9SIlya Biryukov }
4389b3f38f9SIlya Biryukov 
439*58fa50f4SIlya Biryukov void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) {
440*58fa50f4SIlya Biryukov   if (!Child)
441*58fa50f4SIlya Biryukov     return;
442*58fa50f4SIlya Biryukov 
443*58fa50f4SIlya Biryukov   auto Range = getStmtRange(Child);
444*58fa50f4SIlya Biryukov   // This is an expression in a statement position, consume the trailing
445*58fa50f4SIlya Biryukov   // semicolon and form an 'ExpressionStatement' node.
446*58fa50f4SIlya Biryukov   if (auto *E = dyn_cast<Expr>(Child)) {
447*58fa50f4SIlya Biryukov     Pending.assignRole(getExprRange(E),
448*58fa50f4SIlya Biryukov                        NodeRole::ExpressionStatement_expression);
449*58fa50f4SIlya Biryukov     // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon.
450*58fa50f4SIlya Biryukov     Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement);
451*58fa50f4SIlya Biryukov   }
452*58fa50f4SIlya Biryukov   Pending.assignRole(Range, Role);
453*58fa50f4SIlya Biryukov }
454*58fa50f4SIlya Biryukov 
455*58fa50f4SIlya Biryukov void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) {
456*58fa50f4SIlya Biryukov   Pending.assignRole(getExprRange(Child), Role);
457*58fa50f4SIlya Biryukov }
458*58fa50f4SIlya Biryukov 
4599b3f38f9SIlya Biryukov const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
4609b3f38f9SIlya Biryukov   auto Tokens = Arena.tokenBuffer().expandedTokens();
4619b3f38f9SIlya Biryukov   auto &SM = Arena.sourceManager();
4629b3f38f9SIlya Biryukov   auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) {
4639b3f38f9SIlya Biryukov     return SM.isBeforeInTranslationUnit(T.location(), L);
4649b3f38f9SIlya Biryukov   });
4659b3f38f9SIlya Biryukov   assert(It != Tokens.end());
4669b3f38f9SIlya Biryukov   assert(It->location() == L);
4679b3f38f9SIlya Biryukov   return &*It;
4689b3f38f9SIlya Biryukov }
4699b3f38f9SIlya Biryukov 
4709b3f38f9SIlya Biryukov syntax::TranslationUnit *
4719b3f38f9SIlya Biryukov syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) {
4729b3f38f9SIlya Biryukov   TreeBuilder Builder(A);
4739b3f38f9SIlya Biryukov   BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext());
4749b3f38f9SIlya Biryukov   return std::move(Builder).finalize();
4759b3f38f9SIlya Biryukov }
476