19b3f38f9SIlya Biryukov //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 29b3f38f9SIlya Biryukov // 39b3f38f9SIlya Biryukov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49b3f38f9SIlya Biryukov // See https://llvm.org/LICENSE.txt for license information. 59b3f38f9SIlya Biryukov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69b3f38f9SIlya Biryukov // 79b3f38f9SIlya Biryukov //===----------------------------------------------------------------------===// 89b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/BuildTree.h" 99b3f38f9SIlya Biryukov #include "clang/AST/RecursiveASTVisitor.h" 109b3f38f9SIlya Biryukov #include "clang/AST/Stmt.h" 119b3f38f9SIlya Biryukov #include "clang/Basic/LLVM.h" 129b3f38f9SIlya Biryukov #include "clang/Basic/SourceLocation.h" 139b3f38f9SIlya Biryukov #include "clang/Basic/SourceManager.h" 149b3f38f9SIlya Biryukov #include "clang/Basic/TokenKinds.h" 159b3f38f9SIlya Biryukov #include "clang/Lex/Lexer.h" 169b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Nodes.h" 179b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tokens.h" 189b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tree.h" 199b3f38f9SIlya Biryukov #include "llvm/ADT/ArrayRef.h" 209b3f38f9SIlya Biryukov #include "llvm/ADT/STLExtras.h" 219b3f38f9SIlya Biryukov #include "llvm/ADT/SmallVector.h" 229b3f38f9SIlya Biryukov #include "llvm/Support/Allocator.h" 239b3f38f9SIlya Biryukov #include "llvm/Support/Casting.h" 249b3f38f9SIlya Biryukov #include "llvm/Support/FormatVariadic.h" 259b3f38f9SIlya Biryukov #include "llvm/Support/raw_ostream.h" 269b3f38f9SIlya Biryukov #include <map> 279b3f38f9SIlya Biryukov 289b3f38f9SIlya Biryukov using namespace clang; 299b3f38f9SIlya Biryukov 30*58fa50f4SIlya Biryukov static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } 31*58fa50f4SIlya Biryukov 329b3f38f9SIlya Biryukov /// A helper class for constructing the syntax tree while traversing a clang 339b3f38f9SIlya Biryukov /// AST. 349b3f38f9SIlya Biryukov /// 359b3f38f9SIlya Biryukov /// At each point of the traversal we maintain a list of pending nodes. 369b3f38f9SIlya Biryukov /// Initially all tokens are added as pending nodes. When processing a clang AST 379b3f38f9SIlya Biryukov /// node, the clients need to: 389b3f38f9SIlya Biryukov /// - create a corresponding syntax node, 399b3f38f9SIlya Biryukov /// - assign roles to all pending child nodes with 'markChild' and 409b3f38f9SIlya Biryukov /// 'markChildToken', 419b3f38f9SIlya Biryukov /// - replace the child nodes with the new syntax node in the pending list 429b3f38f9SIlya Biryukov /// with 'foldNode'. 439b3f38f9SIlya Biryukov /// 449b3f38f9SIlya Biryukov /// Note that all children are expected to be processed when building a node. 459b3f38f9SIlya Biryukov /// 469b3f38f9SIlya Biryukov /// Call finalize() to finish building the tree and consume the root node. 479b3f38f9SIlya Biryukov class syntax::TreeBuilder { 489b3f38f9SIlya Biryukov public: 499b3f38f9SIlya Biryukov TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {} 509b3f38f9SIlya Biryukov 519b3f38f9SIlya Biryukov llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 529b3f38f9SIlya Biryukov 539b3f38f9SIlya Biryukov /// Populate children for \p New node, assuming it covers tokens from \p 549b3f38f9SIlya Biryukov /// Range. 559b3f38f9SIlya Biryukov void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); 569b3f38f9SIlya Biryukov 57*58fa50f4SIlya Biryukov /// Mark the \p Child node with a corresponding \p Role. All marked children 58*58fa50f4SIlya Biryukov /// should be consumed by foldNode. 59*58fa50f4SIlya Biryukov /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), 60*58fa50f4SIlya Biryukov /// wraps expressions into expression statement. 61*58fa50f4SIlya Biryukov void markStmtChild(Stmt *Child, NodeRole Role); 62*58fa50f4SIlya Biryukov /// Should be called for expressions in non-statement position to avoid 63*58fa50f4SIlya Biryukov /// wrapping into expression statement. 64*58fa50f4SIlya Biryukov void markExprChild(Expr *Child, NodeRole Role); 65*58fa50f4SIlya Biryukov 669b3f38f9SIlya Biryukov /// Set role for a token starting at \p Loc. 679b3f38f9SIlya Biryukov void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R); 689b3f38f9SIlya Biryukov 699b3f38f9SIlya Biryukov /// Finish building the tree and consume the root node. 709b3f38f9SIlya Biryukov syntax::TranslationUnit *finalize() && { 719b3f38f9SIlya Biryukov auto Tokens = Arena.tokenBuffer().expandedTokens(); 72bfbf6b6cSIlya Biryukov assert(!Tokens.empty()); 73bfbf6b6cSIlya Biryukov assert(Tokens.back().kind() == tok::eof); 74bfbf6b6cSIlya Biryukov 759b3f38f9SIlya Biryukov // Build the root of the tree, consuming all the children. 76bfbf6b6cSIlya Biryukov Pending.foldChildren(Tokens.drop_back(), 779b3f38f9SIlya Biryukov new (Arena.allocator()) syntax::TranslationUnit); 789b3f38f9SIlya Biryukov 799b3f38f9SIlya Biryukov return cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 809b3f38f9SIlya Biryukov } 819b3f38f9SIlya Biryukov 829b3f38f9SIlya Biryukov /// getRange() finds the syntax tokens corresponding to the passed source 839b3f38f9SIlya Biryukov /// locations. 849b3f38f9SIlya Biryukov /// \p First is the start position of the first token and \p Last is the start 859b3f38f9SIlya Biryukov /// position of the last token. 869b3f38f9SIlya Biryukov llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 879b3f38f9SIlya Biryukov SourceLocation Last) const { 889b3f38f9SIlya Biryukov assert(First.isValid()); 899b3f38f9SIlya Biryukov assert(Last.isValid()); 909b3f38f9SIlya Biryukov assert(First == Last || 919b3f38f9SIlya Biryukov Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 929b3f38f9SIlya Biryukov return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 939b3f38f9SIlya Biryukov } 949b3f38f9SIlya Biryukov llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { 959b3f38f9SIlya Biryukov return getRange(D->getBeginLoc(), D->getEndLoc()); 969b3f38f9SIlya Biryukov } 97*58fa50f4SIlya Biryukov llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 98*58fa50f4SIlya Biryukov return getRange(E->getBeginLoc(), E->getEndLoc()); 99*58fa50f4SIlya Biryukov } 100*58fa50f4SIlya Biryukov /// Find the adjusted range for the statement, consuming the trailing 101*58fa50f4SIlya Biryukov /// semicolon when needed. 102*58fa50f4SIlya Biryukov llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 103*58fa50f4SIlya Biryukov auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); 104*58fa50f4SIlya Biryukov if (isa<CompoundStmt>(S)) 105*58fa50f4SIlya Biryukov return Tokens; 106*58fa50f4SIlya Biryukov 107*58fa50f4SIlya Biryukov // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 108*58fa50f4SIlya Biryukov // all statements that end with those. Consume this semicolon here. 109*58fa50f4SIlya Biryukov // 110*58fa50f4SIlya Biryukov // (!) statements never consume 'eof', so looking at the next token is ok. 111*58fa50f4SIlya Biryukov if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 112*58fa50f4SIlya Biryukov return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 113*58fa50f4SIlya Biryukov return Tokens; 1149b3f38f9SIlya Biryukov } 1159b3f38f9SIlya Biryukov 1169b3f38f9SIlya Biryukov private: 1179b3f38f9SIlya Biryukov /// Finds a token starting at \p L. The token must exist. 1189b3f38f9SIlya Biryukov const syntax::Token *findToken(SourceLocation L) const; 1199b3f38f9SIlya Biryukov 1209b3f38f9SIlya Biryukov /// A collection of trees covering the input tokens. 1219b3f38f9SIlya Biryukov /// When created, each tree corresponds to a single token in the file. 1229b3f38f9SIlya Biryukov /// Clients call 'foldChildren' to attach one or more subtrees to a parent 1239b3f38f9SIlya Biryukov /// node and update the list of trees accordingly. 1249b3f38f9SIlya Biryukov /// 1259b3f38f9SIlya Biryukov /// Ensures that added nodes properly nest and cover the whole token stream. 1269b3f38f9SIlya Biryukov struct Forest { 1279b3f38f9SIlya Biryukov Forest(syntax::Arena &A) { 128bfbf6b6cSIlya Biryukov assert(!A.tokenBuffer().expandedTokens().empty()); 129bfbf6b6cSIlya Biryukov assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 1309b3f38f9SIlya Biryukov // Create all leaf nodes. 131bfbf6b6cSIlya Biryukov // Note that we do not have 'eof' in the tree. 132bfbf6b6cSIlya Biryukov for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) 1339b3f38f9SIlya Biryukov Trees.insert(Trees.end(), 1349b3f38f9SIlya Biryukov {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); 1359b3f38f9SIlya Biryukov } 1369b3f38f9SIlya Biryukov 1379b3f38f9SIlya Biryukov void assignRole(llvm::ArrayRef<syntax::Token> Range, 1389b3f38f9SIlya Biryukov syntax::NodeRole Role) { 1399b3f38f9SIlya Biryukov assert(!Range.empty()); 1409b3f38f9SIlya Biryukov auto It = Trees.lower_bound(Range.begin()); 1419b3f38f9SIlya Biryukov assert(It != Trees.end() && "no node found"); 1429b3f38f9SIlya Biryukov assert(It->first == Range.begin() && "no child with the specified range"); 1439b3f38f9SIlya Biryukov assert((std::next(It) == Trees.end() || 1449b3f38f9SIlya Biryukov std::next(It)->first == Range.end()) && 1459b3f38f9SIlya Biryukov "no child with the specified range"); 1469b3f38f9SIlya Biryukov It->second.Role = Role; 1479b3f38f9SIlya Biryukov } 1489b3f38f9SIlya Biryukov 1499b3f38f9SIlya Biryukov /// Add \p Node to the forest and fill its children nodes based on the \p 1509b3f38f9SIlya Biryukov /// NodeRange. 1519b3f38f9SIlya Biryukov void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens, 1529b3f38f9SIlya Biryukov syntax::Tree *Node) { 1539b3f38f9SIlya Biryukov assert(!NodeTokens.empty()); 1549b3f38f9SIlya Biryukov assert(Node->firstChild() == nullptr && "node already has children"); 1559b3f38f9SIlya Biryukov 1569b3f38f9SIlya Biryukov auto *FirstToken = NodeTokens.begin(); 1579b3f38f9SIlya Biryukov auto BeginChildren = Trees.lower_bound(FirstToken); 1589b3f38f9SIlya Biryukov assert(BeginChildren != Trees.end() && 1599b3f38f9SIlya Biryukov BeginChildren->first == FirstToken && 1609b3f38f9SIlya Biryukov "fold crosses boundaries of existing subtrees"); 1619b3f38f9SIlya Biryukov auto EndChildren = Trees.lower_bound(NodeTokens.end()); 1629b3f38f9SIlya Biryukov assert((EndChildren == Trees.end() || 1639b3f38f9SIlya Biryukov EndChildren->first == NodeTokens.end()) && 1649b3f38f9SIlya Biryukov "fold crosses boundaries of existing subtrees"); 1659b3f38f9SIlya Biryukov 1669b3f38f9SIlya Biryukov // (!) we need to go in reverse order, because we can only prepend. 1679b3f38f9SIlya Biryukov for (auto It = EndChildren; It != BeginChildren; --It) 1689b3f38f9SIlya Biryukov Node->prependChildLowLevel(std::prev(It)->second.Node, 1699b3f38f9SIlya Biryukov std::prev(It)->second.Role); 1709b3f38f9SIlya Biryukov 1719b3f38f9SIlya Biryukov Trees.erase(BeginChildren, EndChildren); 1729b3f38f9SIlya Biryukov Trees.insert({FirstToken, NodeAndRole(Node)}); 1739b3f38f9SIlya Biryukov } 1749b3f38f9SIlya Biryukov 1759b3f38f9SIlya Biryukov // EXPECTS: all tokens were consumed and are owned by a single root node. 1769b3f38f9SIlya Biryukov syntax::Node *finalize() && { 1779b3f38f9SIlya Biryukov assert(Trees.size() == 1); 1789b3f38f9SIlya Biryukov auto *Root = Trees.begin()->second.Node; 1799b3f38f9SIlya Biryukov Trees = {}; 1809b3f38f9SIlya Biryukov return Root; 1819b3f38f9SIlya Biryukov } 1829b3f38f9SIlya Biryukov 1839b3f38f9SIlya Biryukov std::string str(const syntax::Arena &A) const { 1849b3f38f9SIlya Biryukov std::string R; 1859b3f38f9SIlya Biryukov for (auto It = Trees.begin(); It != Trees.end(); ++It) { 1869b3f38f9SIlya Biryukov unsigned CoveredTokens = 1879b3f38f9SIlya Biryukov It != Trees.end() 1889b3f38f9SIlya Biryukov ? (std::next(It)->first - It->first) 1899b3f38f9SIlya Biryukov : A.tokenBuffer().expandedTokens().end() - It->first; 1909b3f38f9SIlya Biryukov 1919b3f38f9SIlya Biryukov R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n", 1929b3f38f9SIlya Biryukov It->second.Node->kind(), 1939b3f38f9SIlya Biryukov It->first->text(A.sourceManager()), CoveredTokens); 1949b3f38f9SIlya Biryukov R += It->second.Node->dump(A); 1959b3f38f9SIlya Biryukov } 1969b3f38f9SIlya Biryukov return R; 1979b3f38f9SIlya Biryukov } 1989b3f38f9SIlya Biryukov 1999b3f38f9SIlya Biryukov private: 2009b3f38f9SIlya Biryukov /// A with a role that should be assigned to it when adding to a parent. 2019b3f38f9SIlya Biryukov struct NodeAndRole { 2029b3f38f9SIlya Biryukov explicit NodeAndRole(syntax::Node *Node) 20351dad419SIlya Biryukov : Node(Node), Role(NodeRole::Unknown) {} 2049b3f38f9SIlya Biryukov 2059b3f38f9SIlya Biryukov syntax::Node *Node; 2069b3f38f9SIlya Biryukov NodeRole Role; 2079b3f38f9SIlya Biryukov }; 2089b3f38f9SIlya Biryukov 2099b3f38f9SIlya Biryukov /// Maps from the start token to a subtree starting at that token. 2109b3f38f9SIlya Biryukov /// FIXME: storing the end tokens is redundant. 2119b3f38f9SIlya Biryukov /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. 2129b3f38f9SIlya Biryukov std::map<const syntax::Token *, NodeAndRole> Trees; 2139b3f38f9SIlya Biryukov }; 2149b3f38f9SIlya Biryukov 2159b3f38f9SIlya Biryukov /// For debugging purposes. 2169b3f38f9SIlya Biryukov std::string str() { return Pending.str(Arena); } 2179b3f38f9SIlya Biryukov 2189b3f38f9SIlya Biryukov syntax::Arena &Arena; 2199b3f38f9SIlya Biryukov Forest Pending; 2209b3f38f9SIlya Biryukov }; 2219b3f38f9SIlya Biryukov 2229b3f38f9SIlya Biryukov namespace { 2239b3f38f9SIlya Biryukov class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 2249b3f38f9SIlya Biryukov public: 2259b3f38f9SIlya Biryukov explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) 2269b3f38f9SIlya Biryukov : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} 2279b3f38f9SIlya Biryukov 2289b3f38f9SIlya Biryukov bool shouldTraversePostOrder() const { return true; } 2299b3f38f9SIlya Biryukov 2309b3f38f9SIlya Biryukov bool TraverseDecl(Decl *D) { 2319b3f38f9SIlya Biryukov if (!D || isa<TranslationUnitDecl>(D)) 2329b3f38f9SIlya Biryukov return RecursiveASTVisitor::TraverseDecl(D); 2339b3f38f9SIlya Biryukov if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext())) 2349b3f38f9SIlya Biryukov return true; // Only build top-level decls for now, do not recurse. 2359b3f38f9SIlya Biryukov return RecursiveASTVisitor::TraverseDecl(D); 2369b3f38f9SIlya Biryukov } 2379b3f38f9SIlya Biryukov 2389b3f38f9SIlya Biryukov bool VisitDecl(Decl *D) { 2399b3f38f9SIlya Biryukov assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) && 2409b3f38f9SIlya Biryukov "expected a top-level decl"); 2419b3f38f9SIlya Biryukov assert(!D->isImplicit()); 2429b3f38f9SIlya Biryukov Builder.foldNode(Builder.getRange(D), 2439b3f38f9SIlya Biryukov new (allocator()) syntax::TopLevelDeclaration()); 2449b3f38f9SIlya Biryukov return true; 2459b3f38f9SIlya Biryukov } 2469b3f38f9SIlya Biryukov 2479b3f38f9SIlya Biryukov bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 2489b3f38f9SIlya Biryukov // (!) we do not want to call VisitDecl(), the declaration for translation 2499b3f38f9SIlya Biryukov // unit is built by finalize(). 2509b3f38f9SIlya Biryukov return true; 2519b3f38f9SIlya Biryukov } 2529b3f38f9SIlya Biryukov 2539b3f38f9SIlya Biryukov bool WalkUpFromCompoundStmt(CompoundStmt *S) { 25451dad419SIlya Biryukov using NodeRole = syntax::NodeRole; 2559b3f38f9SIlya Biryukov 256*58fa50f4SIlya Biryukov Builder.markChildToken(S->getLBracLoc(), tok::l_brace, NodeRole::OpenParen); 257*58fa50f4SIlya Biryukov for (auto *Child : S->body()) 258*58fa50f4SIlya Biryukov Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 25951dad419SIlya Biryukov Builder.markChildToken(S->getRBracLoc(), tok::r_brace, 260*58fa50f4SIlya Biryukov NodeRole::CloseParen); 2619b3f38f9SIlya Biryukov 262*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 2639b3f38f9SIlya Biryukov new (allocator()) syntax::CompoundStatement); 2649b3f38f9SIlya Biryukov return true; 2659b3f38f9SIlya Biryukov } 2669b3f38f9SIlya Biryukov 267*58fa50f4SIlya Biryukov // Some statements are not yet handled by syntax trees. 268*58fa50f4SIlya Biryukov bool WalkUpFromStmt(Stmt *S) { 269*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 270*58fa50f4SIlya Biryukov new (allocator()) syntax::UnknownStatement); 271*58fa50f4SIlya Biryukov return true; 272*58fa50f4SIlya Biryukov } 273*58fa50f4SIlya Biryukov 274*58fa50f4SIlya Biryukov bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 275*58fa50f4SIlya Biryukov // We override to traverse range initializer as VarDecl. 276*58fa50f4SIlya Biryukov // RAV traverses it as a statement, we produce invalid node kinds in that 277*58fa50f4SIlya Biryukov // case. 278*58fa50f4SIlya Biryukov // FIXME: should do this in RAV instead? 279*58fa50f4SIlya Biryukov if (S->getInit() && !TraverseStmt(S->getInit())) 280*58fa50f4SIlya Biryukov return false; 281*58fa50f4SIlya Biryukov if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 282*58fa50f4SIlya Biryukov return false; 283*58fa50f4SIlya Biryukov if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 284*58fa50f4SIlya Biryukov return false; 285*58fa50f4SIlya Biryukov if (S->getBody() && !TraverseStmt(S->getBody())) 286*58fa50f4SIlya Biryukov return false; 287*58fa50f4SIlya Biryukov return true; 288*58fa50f4SIlya Biryukov } 289*58fa50f4SIlya Biryukov 290*58fa50f4SIlya Biryukov bool TraverseStmt(Stmt *S) { 291*58fa50f4SIlya Biryukov if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { 292*58fa50f4SIlya Biryukov // (!) do not recurse into subexpressions. 293*58fa50f4SIlya Biryukov // we do not have syntax trees for expressions yet, so we only want to see 294*58fa50f4SIlya Biryukov // the first top-level expression. 295*58fa50f4SIlya Biryukov return WalkUpFromExpr(E->IgnoreImplicit()); 296*58fa50f4SIlya Biryukov } 297*58fa50f4SIlya Biryukov return RecursiveASTVisitor::TraverseStmt(S); 298*58fa50f4SIlya Biryukov } 299*58fa50f4SIlya Biryukov 300*58fa50f4SIlya Biryukov // Some expressions are not yet handled by syntax trees. 301*58fa50f4SIlya Biryukov bool WalkUpFromExpr(Expr *E) { 302*58fa50f4SIlya Biryukov assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 303*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getExprRange(E), 304*58fa50f4SIlya Biryukov new (allocator()) syntax::UnknownExpression); 305*58fa50f4SIlya Biryukov return true; 306*58fa50f4SIlya Biryukov } 307*58fa50f4SIlya Biryukov 308*58fa50f4SIlya Biryukov // The code below is very regular, it could even be generated with some 309*58fa50f4SIlya Biryukov // preprocessor magic. We merely assign roles to the corresponding children 310*58fa50f4SIlya Biryukov // and fold resulting nodes. 311*58fa50f4SIlya Biryukov bool WalkUpFromDeclStmt(DeclStmt *S) { 312*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 313*58fa50f4SIlya Biryukov new (allocator()) syntax::DeclarationStatement); 314*58fa50f4SIlya Biryukov return true; 315*58fa50f4SIlya Biryukov } 316*58fa50f4SIlya Biryukov 317*58fa50f4SIlya Biryukov bool WalkUpFromNullStmt(NullStmt *S) { 318*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 319*58fa50f4SIlya Biryukov new (allocator()) syntax::EmptyStatement); 320*58fa50f4SIlya Biryukov return true; 321*58fa50f4SIlya Biryukov } 322*58fa50f4SIlya Biryukov 323*58fa50f4SIlya Biryukov bool WalkUpFromSwitchStmt(SwitchStmt *S) { 324*58fa50f4SIlya Biryukov Builder.markChildToken(S->getSwitchLoc(), tok::kw_switch, 325*58fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 326*58fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 327*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 328*58fa50f4SIlya Biryukov new (allocator()) syntax::SwitchStatement); 329*58fa50f4SIlya Biryukov return true; 330*58fa50f4SIlya Biryukov } 331*58fa50f4SIlya Biryukov 332*58fa50f4SIlya Biryukov bool WalkUpFromCaseStmt(CaseStmt *S) { 333*58fa50f4SIlya Biryukov Builder.markChildToken(S->getKeywordLoc(), tok::kw_case, 334*58fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 335*58fa50f4SIlya Biryukov Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 336*58fa50f4SIlya Biryukov Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 337*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 338*58fa50f4SIlya Biryukov new (allocator()) syntax::CaseStatement); 339*58fa50f4SIlya Biryukov return true; 340*58fa50f4SIlya Biryukov } 341*58fa50f4SIlya Biryukov 342*58fa50f4SIlya Biryukov bool WalkUpFromDefaultStmt(DefaultStmt *S) { 343*58fa50f4SIlya Biryukov Builder.markChildToken(S->getKeywordLoc(), tok::kw_default, 344*58fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 345*58fa50f4SIlya Biryukov Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 346*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 347*58fa50f4SIlya Biryukov new (allocator()) syntax::DefaultStatement); 348*58fa50f4SIlya Biryukov return true; 349*58fa50f4SIlya Biryukov } 350*58fa50f4SIlya Biryukov 351*58fa50f4SIlya Biryukov bool WalkUpFromIfStmt(IfStmt *S) { 352*58fa50f4SIlya Biryukov Builder.markChildToken(S->getIfLoc(), tok::kw_if, 353*58fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 354*58fa50f4SIlya Biryukov Builder.markStmtChild(S->getThen(), 355*58fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_thenStatement); 356*58fa50f4SIlya Biryukov Builder.markChildToken(S->getElseLoc(), tok::kw_else, 357*58fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_elseKeyword); 358*58fa50f4SIlya Biryukov Builder.markStmtChild(S->getElse(), 359*58fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_elseStatement); 360*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 361*58fa50f4SIlya Biryukov new (allocator()) syntax::IfStatement); 362*58fa50f4SIlya Biryukov return true; 363*58fa50f4SIlya Biryukov } 364*58fa50f4SIlya Biryukov 365*58fa50f4SIlya Biryukov bool WalkUpFromForStmt(ForStmt *S) { 366*58fa50f4SIlya Biryukov Builder.markChildToken(S->getForLoc(), tok::kw_for, 367*58fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 368*58fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 369*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 370*58fa50f4SIlya Biryukov new (allocator()) syntax::ForStatement); 371*58fa50f4SIlya Biryukov return true; 372*58fa50f4SIlya Biryukov } 373*58fa50f4SIlya Biryukov 374*58fa50f4SIlya Biryukov bool WalkUpFromWhileStmt(WhileStmt *S) { 375*58fa50f4SIlya Biryukov Builder.markChildToken(S->getWhileLoc(), tok::kw_while, 376*58fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 377*58fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 378*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 379*58fa50f4SIlya Biryukov new (allocator()) syntax::WhileStatement); 380*58fa50f4SIlya Biryukov return true; 381*58fa50f4SIlya Biryukov } 382*58fa50f4SIlya Biryukov 383*58fa50f4SIlya Biryukov bool WalkUpFromContinueStmt(ContinueStmt *S) { 384*58fa50f4SIlya Biryukov Builder.markChildToken(S->getContinueLoc(), tok::kw_continue, 385*58fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 386*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 387*58fa50f4SIlya Biryukov new (allocator()) syntax::ContinueStatement); 388*58fa50f4SIlya Biryukov return true; 389*58fa50f4SIlya Biryukov } 390*58fa50f4SIlya Biryukov 391*58fa50f4SIlya Biryukov bool WalkUpFromBreakStmt(BreakStmt *S) { 392*58fa50f4SIlya Biryukov Builder.markChildToken(S->getBreakLoc(), tok::kw_break, 393*58fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 394*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 395*58fa50f4SIlya Biryukov new (allocator()) syntax::BreakStatement); 396*58fa50f4SIlya Biryukov return true; 397*58fa50f4SIlya Biryukov } 398*58fa50f4SIlya Biryukov 399*58fa50f4SIlya Biryukov bool WalkUpFromReturnStmt(ReturnStmt *S) { 400*58fa50f4SIlya Biryukov Builder.markChildToken(S->getReturnLoc(), tok::kw_return, 401*58fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 402*58fa50f4SIlya Biryukov Builder.markExprChild(S->getRetValue(), 403*58fa50f4SIlya Biryukov syntax::NodeRole::ReturnStatement_value); 404*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 405*58fa50f4SIlya Biryukov new (allocator()) syntax::ReturnStatement); 406*58fa50f4SIlya Biryukov return true; 407*58fa50f4SIlya Biryukov } 408*58fa50f4SIlya Biryukov 409*58fa50f4SIlya Biryukov bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 410*58fa50f4SIlya Biryukov Builder.markChildToken(S->getForLoc(), tok::kw_for, 411*58fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 412*58fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 413*58fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 414*58fa50f4SIlya Biryukov new (allocator()) syntax::RangeBasedForStatement); 415*58fa50f4SIlya Biryukov return true; 416*58fa50f4SIlya Biryukov } 417*58fa50f4SIlya Biryukov 4189b3f38f9SIlya Biryukov private: 4199b3f38f9SIlya Biryukov /// A small helper to save some typing. 4209b3f38f9SIlya Biryukov llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 4219b3f38f9SIlya Biryukov 4229b3f38f9SIlya Biryukov syntax::TreeBuilder &Builder; 4239b3f38f9SIlya Biryukov const LangOptions &LangOpts; 4249b3f38f9SIlya Biryukov }; 4259b3f38f9SIlya Biryukov } // namespace 4269b3f38f9SIlya Biryukov 4279b3f38f9SIlya Biryukov void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range, 4289b3f38f9SIlya Biryukov syntax::Tree *New) { 4299b3f38f9SIlya Biryukov Pending.foldChildren(Range, New); 4309b3f38f9SIlya Biryukov } 4319b3f38f9SIlya Biryukov 4329b3f38f9SIlya Biryukov void syntax::TreeBuilder::markChildToken(SourceLocation Loc, 4339b3f38f9SIlya Biryukov tok::TokenKind Kind, NodeRole Role) { 4349b3f38f9SIlya Biryukov if (Loc.isInvalid()) 4359b3f38f9SIlya Biryukov return; 4369b3f38f9SIlya Biryukov Pending.assignRole(*findToken(Loc), Role); 4379b3f38f9SIlya Biryukov } 4389b3f38f9SIlya Biryukov 439*58fa50f4SIlya Biryukov void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 440*58fa50f4SIlya Biryukov if (!Child) 441*58fa50f4SIlya Biryukov return; 442*58fa50f4SIlya Biryukov 443*58fa50f4SIlya Biryukov auto Range = getStmtRange(Child); 444*58fa50f4SIlya Biryukov // This is an expression in a statement position, consume the trailing 445*58fa50f4SIlya Biryukov // semicolon and form an 'ExpressionStatement' node. 446*58fa50f4SIlya Biryukov if (auto *E = dyn_cast<Expr>(Child)) { 447*58fa50f4SIlya Biryukov Pending.assignRole(getExprRange(E), 448*58fa50f4SIlya Biryukov NodeRole::ExpressionStatement_expression); 449*58fa50f4SIlya Biryukov // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. 450*58fa50f4SIlya Biryukov Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement); 451*58fa50f4SIlya Biryukov } 452*58fa50f4SIlya Biryukov Pending.assignRole(Range, Role); 453*58fa50f4SIlya Biryukov } 454*58fa50f4SIlya Biryukov 455*58fa50f4SIlya Biryukov void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 456*58fa50f4SIlya Biryukov Pending.assignRole(getExprRange(Child), Role); 457*58fa50f4SIlya Biryukov } 458*58fa50f4SIlya Biryukov 4599b3f38f9SIlya Biryukov const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 4609b3f38f9SIlya Biryukov auto Tokens = Arena.tokenBuffer().expandedTokens(); 4619b3f38f9SIlya Biryukov auto &SM = Arena.sourceManager(); 4629b3f38f9SIlya Biryukov auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) { 4639b3f38f9SIlya Biryukov return SM.isBeforeInTranslationUnit(T.location(), L); 4649b3f38f9SIlya Biryukov }); 4659b3f38f9SIlya Biryukov assert(It != Tokens.end()); 4669b3f38f9SIlya Biryukov assert(It->location() == L); 4679b3f38f9SIlya Biryukov return &*It; 4689b3f38f9SIlya Biryukov } 4699b3f38f9SIlya Biryukov 4709b3f38f9SIlya Biryukov syntax::TranslationUnit * 4719b3f38f9SIlya Biryukov syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 4729b3f38f9SIlya Biryukov TreeBuilder Builder(A); 4739b3f38f9SIlya Biryukov BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 4749b3f38f9SIlya Biryukov return std::move(Builder).finalize(); 4759b3f38f9SIlya Biryukov } 476