19b3f38f9SIlya Biryukov //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 29b3f38f9SIlya Biryukov // 39b3f38f9SIlya Biryukov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49b3f38f9SIlya Biryukov // See https://llvm.org/LICENSE.txt for license information. 59b3f38f9SIlya Biryukov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69b3f38f9SIlya Biryukov // 79b3f38f9SIlya Biryukov //===----------------------------------------------------------------------===// 89b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/BuildTree.h" 99b3f38f9SIlya Biryukov #include "clang/AST/RecursiveASTVisitor.h" 109b3f38f9SIlya Biryukov #include "clang/AST/Stmt.h" 119b3f38f9SIlya Biryukov #include "clang/Basic/LLVM.h" 129b3f38f9SIlya Biryukov #include "clang/Basic/SourceLocation.h" 139b3f38f9SIlya Biryukov #include "clang/Basic/SourceManager.h" 149b3f38f9SIlya Biryukov #include "clang/Basic/TokenKinds.h" 159b3f38f9SIlya Biryukov #include "clang/Lex/Lexer.h" 169b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Nodes.h" 179b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tokens.h" 189b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tree.h" 199b3f38f9SIlya Biryukov #include "llvm/ADT/ArrayRef.h" 209b3f38f9SIlya Biryukov #include "llvm/ADT/STLExtras.h" 219b3f38f9SIlya Biryukov #include "llvm/ADT/SmallVector.h" 229b3f38f9SIlya Biryukov #include "llvm/Support/Allocator.h" 239b3f38f9SIlya Biryukov #include "llvm/Support/Casting.h" 2496065cf7SIlya Biryukov #include "llvm/Support/Compiler.h" 259b3f38f9SIlya Biryukov #include "llvm/Support/FormatVariadic.h" 269b3f38f9SIlya Biryukov #include "llvm/Support/raw_ostream.h" 279b3f38f9SIlya Biryukov #include <map> 289b3f38f9SIlya Biryukov 299b3f38f9SIlya Biryukov using namespace clang; 309b3f38f9SIlya Biryukov 3196065cf7SIlya Biryukov LLVM_ATTRIBUTE_UNUSED 3258fa50f4SIlya Biryukov static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } 3358fa50f4SIlya Biryukov 349b3f38f9SIlya Biryukov /// A helper class for constructing the syntax tree while traversing a clang 359b3f38f9SIlya Biryukov /// AST. 369b3f38f9SIlya Biryukov /// 379b3f38f9SIlya Biryukov /// At each point of the traversal we maintain a list of pending nodes. 389b3f38f9SIlya Biryukov /// Initially all tokens are added as pending nodes. When processing a clang AST 399b3f38f9SIlya Biryukov /// node, the clients need to: 409b3f38f9SIlya Biryukov /// - create a corresponding syntax node, 419b3f38f9SIlya Biryukov /// - assign roles to all pending child nodes with 'markChild' and 429b3f38f9SIlya Biryukov /// 'markChildToken', 439b3f38f9SIlya Biryukov /// - replace the child nodes with the new syntax node in the pending list 449b3f38f9SIlya Biryukov /// with 'foldNode'. 459b3f38f9SIlya Biryukov /// 469b3f38f9SIlya Biryukov /// Note that all children are expected to be processed when building a node. 479b3f38f9SIlya Biryukov /// 489b3f38f9SIlya Biryukov /// Call finalize() to finish building the tree and consume the root node. 499b3f38f9SIlya Biryukov class syntax::TreeBuilder { 509b3f38f9SIlya Biryukov public: 519b3f38f9SIlya Biryukov TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {} 529b3f38f9SIlya Biryukov 539b3f38f9SIlya Biryukov llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 549b3f38f9SIlya Biryukov 559b3f38f9SIlya Biryukov /// Populate children for \p New node, assuming it covers tokens from \p 569b3f38f9SIlya Biryukov /// Range. 579b3f38f9SIlya Biryukov void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); 589b3f38f9SIlya Biryukov 5958fa50f4SIlya Biryukov /// Mark the \p Child node with a corresponding \p Role. All marked children 6058fa50f4SIlya Biryukov /// should be consumed by foldNode. 6158fa50f4SIlya Biryukov /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), 6258fa50f4SIlya Biryukov /// wraps expressions into expression statement. 6358fa50f4SIlya Biryukov void markStmtChild(Stmt *Child, NodeRole Role); 6458fa50f4SIlya Biryukov /// Should be called for expressions in non-statement position to avoid 6558fa50f4SIlya Biryukov /// wrapping into expression statement. 6658fa50f4SIlya Biryukov void markExprChild(Expr *Child, NodeRole Role); 6758fa50f4SIlya Biryukov 689b3f38f9SIlya Biryukov /// Set role for a token starting at \p Loc. 69def65bb4SIlya Biryukov void markChildToken(SourceLocation Loc, NodeRole R); 709b3f38f9SIlya Biryukov 719b3f38f9SIlya Biryukov /// Finish building the tree and consume the root node. 729b3f38f9SIlya Biryukov syntax::TranslationUnit *finalize() && { 739b3f38f9SIlya Biryukov auto Tokens = Arena.tokenBuffer().expandedTokens(); 74bfbf6b6cSIlya Biryukov assert(!Tokens.empty()); 75bfbf6b6cSIlya Biryukov assert(Tokens.back().kind() == tok::eof); 76bfbf6b6cSIlya Biryukov 779b3f38f9SIlya Biryukov // Build the root of the tree, consuming all the children. 78bfbf6b6cSIlya Biryukov Pending.foldChildren(Tokens.drop_back(), 799b3f38f9SIlya Biryukov new (Arena.allocator()) syntax::TranslationUnit); 809b3f38f9SIlya Biryukov 819b3f38f9SIlya Biryukov return cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 829b3f38f9SIlya Biryukov } 839b3f38f9SIlya Biryukov 849b3f38f9SIlya Biryukov /// getRange() finds the syntax tokens corresponding to the passed source 859b3f38f9SIlya Biryukov /// locations. 869b3f38f9SIlya Biryukov /// \p First is the start position of the first token and \p Last is the start 879b3f38f9SIlya Biryukov /// position of the last token. 889b3f38f9SIlya Biryukov llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 899b3f38f9SIlya Biryukov SourceLocation Last) const { 909b3f38f9SIlya Biryukov assert(First.isValid()); 919b3f38f9SIlya Biryukov assert(Last.isValid()); 929b3f38f9SIlya Biryukov assert(First == Last || 939b3f38f9SIlya Biryukov Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 949b3f38f9SIlya Biryukov return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 959b3f38f9SIlya Biryukov } 969b3f38f9SIlya Biryukov llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { 979b3f38f9SIlya Biryukov return getRange(D->getBeginLoc(), D->getEndLoc()); 989b3f38f9SIlya Biryukov } 9958fa50f4SIlya Biryukov llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 10058fa50f4SIlya Biryukov return getRange(E->getBeginLoc(), E->getEndLoc()); 10158fa50f4SIlya Biryukov } 10258fa50f4SIlya Biryukov /// Find the adjusted range for the statement, consuming the trailing 10358fa50f4SIlya Biryukov /// semicolon when needed. 10458fa50f4SIlya Biryukov llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 10558fa50f4SIlya Biryukov auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); 10658fa50f4SIlya Biryukov if (isa<CompoundStmt>(S)) 10758fa50f4SIlya Biryukov return Tokens; 10858fa50f4SIlya Biryukov 10958fa50f4SIlya Biryukov // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 11058fa50f4SIlya Biryukov // all statements that end with those. Consume this semicolon here. 11158fa50f4SIlya Biryukov // 11258fa50f4SIlya Biryukov // (!) statements never consume 'eof', so looking at the next token is ok. 11358fa50f4SIlya Biryukov if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 11458fa50f4SIlya Biryukov return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 11558fa50f4SIlya Biryukov return Tokens; 1169b3f38f9SIlya Biryukov } 1179b3f38f9SIlya Biryukov 1189b3f38f9SIlya Biryukov private: 1199b3f38f9SIlya Biryukov /// Finds a token starting at \p L. The token must exist. 1209b3f38f9SIlya Biryukov const syntax::Token *findToken(SourceLocation L) const; 1219b3f38f9SIlya Biryukov 1229b3f38f9SIlya Biryukov /// A collection of trees covering the input tokens. 1239b3f38f9SIlya Biryukov /// When created, each tree corresponds to a single token in the file. 1249b3f38f9SIlya Biryukov /// Clients call 'foldChildren' to attach one or more subtrees to a parent 1259b3f38f9SIlya Biryukov /// node and update the list of trees accordingly. 1269b3f38f9SIlya Biryukov /// 1279b3f38f9SIlya Biryukov /// Ensures that added nodes properly nest and cover the whole token stream. 1289b3f38f9SIlya Biryukov struct Forest { 1299b3f38f9SIlya Biryukov Forest(syntax::Arena &A) { 130bfbf6b6cSIlya Biryukov assert(!A.tokenBuffer().expandedTokens().empty()); 131bfbf6b6cSIlya Biryukov assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 1329b3f38f9SIlya Biryukov // Create all leaf nodes. 133bfbf6b6cSIlya Biryukov // Note that we do not have 'eof' in the tree. 134bfbf6b6cSIlya Biryukov for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) 1359b3f38f9SIlya Biryukov Trees.insert(Trees.end(), 1369b3f38f9SIlya Biryukov {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); 1379b3f38f9SIlya Biryukov } 1389b3f38f9SIlya Biryukov 1399b3f38f9SIlya Biryukov void assignRole(llvm::ArrayRef<syntax::Token> Range, 1409b3f38f9SIlya Biryukov syntax::NodeRole Role) { 1419b3f38f9SIlya Biryukov assert(!Range.empty()); 1429b3f38f9SIlya Biryukov auto It = Trees.lower_bound(Range.begin()); 1439b3f38f9SIlya Biryukov assert(It != Trees.end() && "no node found"); 1449b3f38f9SIlya Biryukov assert(It->first == Range.begin() && "no child with the specified range"); 1459b3f38f9SIlya Biryukov assert((std::next(It) == Trees.end() || 1469b3f38f9SIlya Biryukov std::next(It)->first == Range.end()) && 1479b3f38f9SIlya Biryukov "no child with the specified range"); 1489b3f38f9SIlya Biryukov It->second.Role = Role; 1499b3f38f9SIlya Biryukov } 1509b3f38f9SIlya Biryukov 1519b3f38f9SIlya Biryukov /// Add \p Node to the forest and fill its children nodes based on the \p 1529b3f38f9SIlya Biryukov /// NodeRange. 1539b3f38f9SIlya Biryukov void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens, 1549b3f38f9SIlya Biryukov syntax::Tree *Node) { 1559b3f38f9SIlya Biryukov assert(!NodeTokens.empty()); 1569b3f38f9SIlya Biryukov assert(Node->firstChild() == nullptr && "node already has children"); 1579b3f38f9SIlya Biryukov 1589b3f38f9SIlya Biryukov auto *FirstToken = NodeTokens.begin(); 1599b3f38f9SIlya Biryukov auto BeginChildren = Trees.lower_bound(FirstToken); 1609b3f38f9SIlya Biryukov assert(BeginChildren != Trees.end() && 1619b3f38f9SIlya Biryukov BeginChildren->first == FirstToken && 1629b3f38f9SIlya Biryukov "fold crosses boundaries of existing subtrees"); 1639b3f38f9SIlya Biryukov auto EndChildren = Trees.lower_bound(NodeTokens.end()); 1649b3f38f9SIlya Biryukov assert((EndChildren == Trees.end() || 1659b3f38f9SIlya Biryukov EndChildren->first == NodeTokens.end()) && 1669b3f38f9SIlya Biryukov "fold crosses boundaries of existing subtrees"); 1679b3f38f9SIlya Biryukov 1689b3f38f9SIlya Biryukov // (!) we need to go in reverse order, because we can only prepend. 1699b3f38f9SIlya Biryukov for (auto It = EndChildren; It != BeginChildren; --It) 1709b3f38f9SIlya Biryukov Node->prependChildLowLevel(std::prev(It)->second.Node, 1719b3f38f9SIlya Biryukov std::prev(It)->second.Role); 1729b3f38f9SIlya Biryukov 1739b3f38f9SIlya Biryukov Trees.erase(BeginChildren, EndChildren); 1749b3f38f9SIlya Biryukov Trees.insert({FirstToken, NodeAndRole(Node)}); 1759b3f38f9SIlya Biryukov } 1769b3f38f9SIlya Biryukov 1779b3f38f9SIlya Biryukov // EXPECTS: all tokens were consumed and are owned by a single root node. 1789b3f38f9SIlya Biryukov syntax::Node *finalize() && { 1799b3f38f9SIlya Biryukov assert(Trees.size() == 1); 1809b3f38f9SIlya Biryukov auto *Root = Trees.begin()->second.Node; 1819b3f38f9SIlya Biryukov Trees = {}; 1829b3f38f9SIlya Biryukov return Root; 1839b3f38f9SIlya Biryukov } 1849b3f38f9SIlya Biryukov 1859b3f38f9SIlya Biryukov std::string str(const syntax::Arena &A) const { 1869b3f38f9SIlya Biryukov std::string R; 1879b3f38f9SIlya Biryukov for (auto It = Trees.begin(); It != Trees.end(); ++It) { 1889b3f38f9SIlya Biryukov unsigned CoveredTokens = 1899b3f38f9SIlya Biryukov It != Trees.end() 1909b3f38f9SIlya Biryukov ? (std::next(It)->first - It->first) 1919b3f38f9SIlya Biryukov : A.tokenBuffer().expandedTokens().end() - It->first; 1929b3f38f9SIlya Biryukov 1939b3f38f9SIlya Biryukov R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n", 1949b3f38f9SIlya Biryukov It->second.Node->kind(), 1959b3f38f9SIlya Biryukov It->first->text(A.sourceManager()), CoveredTokens); 1969b3f38f9SIlya Biryukov R += It->second.Node->dump(A); 1979b3f38f9SIlya Biryukov } 1989b3f38f9SIlya Biryukov return R; 1999b3f38f9SIlya Biryukov } 2009b3f38f9SIlya Biryukov 2019b3f38f9SIlya Biryukov private: 2029b3f38f9SIlya Biryukov /// A with a role that should be assigned to it when adding to a parent. 2039b3f38f9SIlya Biryukov struct NodeAndRole { 2049b3f38f9SIlya Biryukov explicit NodeAndRole(syntax::Node *Node) 20551dad419SIlya Biryukov : Node(Node), Role(NodeRole::Unknown) {} 2069b3f38f9SIlya Biryukov 2079b3f38f9SIlya Biryukov syntax::Node *Node; 2089b3f38f9SIlya Biryukov NodeRole Role; 2099b3f38f9SIlya Biryukov }; 2109b3f38f9SIlya Biryukov 2119b3f38f9SIlya Biryukov /// Maps from the start token to a subtree starting at that token. 212*302cb3bcSIlya Biryukov /// Keys in the map are pointers into the array of expanded tokens, so 213*302cb3bcSIlya Biryukov /// pointer order corresponds to the order of preprocessor tokens. 2149b3f38f9SIlya Biryukov /// FIXME: storing the end tokens is redundant. 2159b3f38f9SIlya Biryukov /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. 2169b3f38f9SIlya Biryukov std::map<const syntax::Token *, NodeAndRole> Trees; 2179b3f38f9SIlya Biryukov }; 2189b3f38f9SIlya Biryukov 2199b3f38f9SIlya Biryukov /// For debugging purposes. 2209b3f38f9SIlya Biryukov std::string str() { return Pending.str(Arena); } 2219b3f38f9SIlya Biryukov 2229b3f38f9SIlya Biryukov syntax::Arena &Arena; 2239b3f38f9SIlya Biryukov Forest Pending; 2249b3f38f9SIlya Biryukov }; 2259b3f38f9SIlya Biryukov 2269b3f38f9SIlya Biryukov namespace { 2279b3f38f9SIlya Biryukov class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 2289b3f38f9SIlya Biryukov public: 2299b3f38f9SIlya Biryukov explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) 2309b3f38f9SIlya Biryukov : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} 2319b3f38f9SIlya Biryukov 2329b3f38f9SIlya Biryukov bool shouldTraversePostOrder() const { return true; } 2339b3f38f9SIlya Biryukov 2349b3f38f9SIlya Biryukov bool TraverseDecl(Decl *D) { 2359b3f38f9SIlya Biryukov if (!D || isa<TranslationUnitDecl>(D)) 2369b3f38f9SIlya Biryukov return RecursiveASTVisitor::TraverseDecl(D); 2379b3f38f9SIlya Biryukov if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext())) 2389b3f38f9SIlya Biryukov return true; // Only build top-level decls for now, do not recurse. 2399b3f38f9SIlya Biryukov return RecursiveASTVisitor::TraverseDecl(D); 2409b3f38f9SIlya Biryukov } 2419b3f38f9SIlya Biryukov 2429b3f38f9SIlya Biryukov bool VisitDecl(Decl *D) { 2439b3f38f9SIlya Biryukov assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) && 2449b3f38f9SIlya Biryukov "expected a top-level decl"); 2459b3f38f9SIlya Biryukov assert(!D->isImplicit()); 2469b3f38f9SIlya Biryukov Builder.foldNode(Builder.getRange(D), 2479b3f38f9SIlya Biryukov new (allocator()) syntax::TopLevelDeclaration()); 2489b3f38f9SIlya Biryukov return true; 2499b3f38f9SIlya Biryukov } 2509b3f38f9SIlya Biryukov 2519b3f38f9SIlya Biryukov bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 2529b3f38f9SIlya Biryukov // (!) we do not want to call VisitDecl(), the declaration for translation 2539b3f38f9SIlya Biryukov // unit is built by finalize(). 2549b3f38f9SIlya Biryukov return true; 2559b3f38f9SIlya Biryukov } 2569b3f38f9SIlya Biryukov 2579b3f38f9SIlya Biryukov bool WalkUpFromCompoundStmt(CompoundStmt *S) { 25851dad419SIlya Biryukov using NodeRole = syntax::NodeRole; 2599b3f38f9SIlya Biryukov 260def65bb4SIlya Biryukov Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 26158fa50f4SIlya Biryukov for (auto *Child : S->body()) 26258fa50f4SIlya Biryukov Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 263def65bb4SIlya Biryukov Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 2649b3f38f9SIlya Biryukov 26558fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 2669b3f38f9SIlya Biryukov new (allocator()) syntax::CompoundStatement); 2679b3f38f9SIlya Biryukov return true; 2689b3f38f9SIlya Biryukov } 2699b3f38f9SIlya Biryukov 27058fa50f4SIlya Biryukov // Some statements are not yet handled by syntax trees. 27158fa50f4SIlya Biryukov bool WalkUpFromStmt(Stmt *S) { 27258fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 27358fa50f4SIlya Biryukov new (allocator()) syntax::UnknownStatement); 27458fa50f4SIlya Biryukov return true; 27558fa50f4SIlya Biryukov } 27658fa50f4SIlya Biryukov 27758fa50f4SIlya Biryukov bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 27858fa50f4SIlya Biryukov // We override to traverse range initializer as VarDecl. 27958fa50f4SIlya Biryukov // RAV traverses it as a statement, we produce invalid node kinds in that 28058fa50f4SIlya Biryukov // case. 28158fa50f4SIlya Biryukov // FIXME: should do this in RAV instead? 28258fa50f4SIlya Biryukov if (S->getInit() && !TraverseStmt(S->getInit())) 28358fa50f4SIlya Biryukov return false; 28458fa50f4SIlya Biryukov if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 28558fa50f4SIlya Biryukov return false; 28658fa50f4SIlya Biryukov if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 28758fa50f4SIlya Biryukov return false; 28858fa50f4SIlya Biryukov if (S->getBody() && !TraverseStmt(S->getBody())) 28958fa50f4SIlya Biryukov return false; 29058fa50f4SIlya Biryukov return true; 29158fa50f4SIlya Biryukov } 29258fa50f4SIlya Biryukov 29358fa50f4SIlya Biryukov bool TraverseStmt(Stmt *S) { 29458fa50f4SIlya Biryukov if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { 29558fa50f4SIlya Biryukov // (!) do not recurse into subexpressions. 29658fa50f4SIlya Biryukov // we do not have syntax trees for expressions yet, so we only want to see 29758fa50f4SIlya Biryukov // the first top-level expression. 29858fa50f4SIlya Biryukov return WalkUpFromExpr(E->IgnoreImplicit()); 29958fa50f4SIlya Biryukov } 30058fa50f4SIlya Biryukov return RecursiveASTVisitor::TraverseStmt(S); 30158fa50f4SIlya Biryukov } 30258fa50f4SIlya Biryukov 30358fa50f4SIlya Biryukov // Some expressions are not yet handled by syntax trees. 30458fa50f4SIlya Biryukov bool WalkUpFromExpr(Expr *E) { 30558fa50f4SIlya Biryukov assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 30658fa50f4SIlya Biryukov Builder.foldNode(Builder.getExprRange(E), 30758fa50f4SIlya Biryukov new (allocator()) syntax::UnknownExpression); 30858fa50f4SIlya Biryukov return true; 30958fa50f4SIlya Biryukov } 31058fa50f4SIlya Biryukov 31158fa50f4SIlya Biryukov // The code below is very regular, it could even be generated with some 31258fa50f4SIlya Biryukov // preprocessor magic. We merely assign roles to the corresponding children 31358fa50f4SIlya Biryukov // and fold resulting nodes. 31458fa50f4SIlya Biryukov bool WalkUpFromDeclStmt(DeclStmt *S) { 31558fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 31658fa50f4SIlya Biryukov new (allocator()) syntax::DeclarationStatement); 31758fa50f4SIlya Biryukov return true; 31858fa50f4SIlya Biryukov } 31958fa50f4SIlya Biryukov 32058fa50f4SIlya Biryukov bool WalkUpFromNullStmt(NullStmt *S) { 32158fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 32258fa50f4SIlya Biryukov new (allocator()) syntax::EmptyStatement); 32358fa50f4SIlya Biryukov return true; 32458fa50f4SIlya Biryukov } 32558fa50f4SIlya Biryukov 32658fa50f4SIlya Biryukov bool WalkUpFromSwitchStmt(SwitchStmt *S) { 327def65bb4SIlya Biryukov Builder.markChildToken(S->getSwitchLoc(), 32858fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 32958fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 33058fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 33158fa50f4SIlya Biryukov new (allocator()) syntax::SwitchStatement); 33258fa50f4SIlya Biryukov return true; 33358fa50f4SIlya Biryukov } 33458fa50f4SIlya Biryukov 33558fa50f4SIlya Biryukov bool WalkUpFromCaseStmt(CaseStmt *S) { 336def65bb4SIlya Biryukov Builder.markChildToken(S->getKeywordLoc(), 33758fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 33858fa50f4SIlya Biryukov Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 33958fa50f4SIlya Biryukov Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 34058fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 34158fa50f4SIlya Biryukov new (allocator()) syntax::CaseStatement); 34258fa50f4SIlya Biryukov return true; 34358fa50f4SIlya Biryukov } 34458fa50f4SIlya Biryukov 34558fa50f4SIlya Biryukov bool WalkUpFromDefaultStmt(DefaultStmt *S) { 346def65bb4SIlya Biryukov Builder.markChildToken(S->getKeywordLoc(), 34758fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 34858fa50f4SIlya Biryukov Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 34958fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 35058fa50f4SIlya Biryukov new (allocator()) syntax::DefaultStatement); 35158fa50f4SIlya Biryukov return true; 35258fa50f4SIlya Biryukov } 35358fa50f4SIlya Biryukov 35458fa50f4SIlya Biryukov bool WalkUpFromIfStmt(IfStmt *S) { 355def65bb4SIlya Biryukov Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 35658fa50f4SIlya Biryukov Builder.markStmtChild(S->getThen(), 35758fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_thenStatement); 358def65bb4SIlya Biryukov Builder.markChildToken(S->getElseLoc(), 35958fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_elseKeyword); 36058fa50f4SIlya Biryukov Builder.markStmtChild(S->getElse(), 36158fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_elseStatement); 36258fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 36358fa50f4SIlya Biryukov new (allocator()) syntax::IfStatement); 36458fa50f4SIlya Biryukov return true; 36558fa50f4SIlya Biryukov } 36658fa50f4SIlya Biryukov 36758fa50f4SIlya Biryukov bool WalkUpFromForStmt(ForStmt *S) { 368def65bb4SIlya Biryukov Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 36958fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 37058fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 37158fa50f4SIlya Biryukov new (allocator()) syntax::ForStatement); 37258fa50f4SIlya Biryukov return true; 37358fa50f4SIlya Biryukov } 37458fa50f4SIlya Biryukov 37558fa50f4SIlya Biryukov bool WalkUpFromWhileStmt(WhileStmt *S) { 376def65bb4SIlya Biryukov Builder.markChildToken(S->getWhileLoc(), 37758fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 37858fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 37958fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 38058fa50f4SIlya Biryukov new (allocator()) syntax::WhileStatement); 38158fa50f4SIlya Biryukov return true; 38258fa50f4SIlya Biryukov } 38358fa50f4SIlya Biryukov 38458fa50f4SIlya Biryukov bool WalkUpFromContinueStmt(ContinueStmt *S) { 385def65bb4SIlya Biryukov Builder.markChildToken(S->getContinueLoc(), 38658fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 38758fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 38858fa50f4SIlya Biryukov new (allocator()) syntax::ContinueStatement); 38958fa50f4SIlya Biryukov return true; 39058fa50f4SIlya Biryukov } 39158fa50f4SIlya Biryukov 39258fa50f4SIlya Biryukov bool WalkUpFromBreakStmt(BreakStmt *S) { 393def65bb4SIlya Biryukov Builder.markChildToken(S->getBreakLoc(), 39458fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 39558fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 39658fa50f4SIlya Biryukov new (allocator()) syntax::BreakStatement); 39758fa50f4SIlya Biryukov return true; 39858fa50f4SIlya Biryukov } 39958fa50f4SIlya Biryukov 40058fa50f4SIlya Biryukov bool WalkUpFromReturnStmt(ReturnStmt *S) { 401def65bb4SIlya Biryukov Builder.markChildToken(S->getReturnLoc(), 40258fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 40358fa50f4SIlya Biryukov Builder.markExprChild(S->getRetValue(), 40458fa50f4SIlya Biryukov syntax::NodeRole::ReturnStatement_value); 40558fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 40658fa50f4SIlya Biryukov new (allocator()) syntax::ReturnStatement); 40758fa50f4SIlya Biryukov return true; 40858fa50f4SIlya Biryukov } 40958fa50f4SIlya Biryukov 41058fa50f4SIlya Biryukov bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 411def65bb4SIlya Biryukov Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 41258fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 41358fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 41458fa50f4SIlya Biryukov new (allocator()) syntax::RangeBasedForStatement); 41558fa50f4SIlya Biryukov return true; 41658fa50f4SIlya Biryukov } 41758fa50f4SIlya Biryukov 4189b3f38f9SIlya Biryukov private: 4199b3f38f9SIlya Biryukov /// A small helper to save some typing. 4209b3f38f9SIlya Biryukov llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 4219b3f38f9SIlya Biryukov 4229b3f38f9SIlya Biryukov syntax::TreeBuilder &Builder; 4239b3f38f9SIlya Biryukov const LangOptions &LangOpts; 4249b3f38f9SIlya Biryukov }; 4259b3f38f9SIlya Biryukov } // namespace 4269b3f38f9SIlya Biryukov 4279b3f38f9SIlya Biryukov void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range, 4289b3f38f9SIlya Biryukov syntax::Tree *New) { 4299b3f38f9SIlya Biryukov Pending.foldChildren(Range, New); 4309b3f38f9SIlya Biryukov } 4319b3f38f9SIlya Biryukov 432def65bb4SIlya Biryukov void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 4339b3f38f9SIlya Biryukov if (Loc.isInvalid()) 4349b3f38f9SIlya Biryukov return; 4359b3f38f9SIlya Biryukov Pending.assignRole(*findToken(Loc), Role); 4369b3f38f9SIlya Biryukov } 4379b3f38f9SIlya Biryukov 43858fa50f4SIlya Biryukov void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 43958fa50f4SIlya Biryukov if (!Child) 44058fa50f4SIlya Biryukov return; 44158fa50f4SIlya Biryukov 44258fa50f4SIlya Biryukov auto Range = getStmtRange(Child); 44358fa50f4SIlya Biryukov // This is an expression in a statement position, consume the trailing 44458fa50f4SIlya Biryukov // semicolon and form an 'ExpressionStatement' node. 44558fa50f4SIlya Biryukov if (auto *E = dyn_cast<Expr>(Child)) { 44658fa50f4SIlya Biryukov Pending.assignRole(getExprRange(E), 44758fa50f4SIlya Biryukov NodeRole::ExpressionStatement_expression); 44858fa50f4SIlya Biryukov // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. 44958fa50f4SIlya Biryukov Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement); 45058fa50f4SIlya Biryukov } 45158fa50f4SIlya Biryukov Pending.assignRole(Range, Role); 45258fa50f4SIlya Biryukov } 45358fa50f4SIlya Biryukov 45458fa50f4SIlya Biryukov void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 45558fa50f4SIlya Biryukov Pending.assignRole(getExprRange(Child), Role); 45658fa50f4SIlya Biryukov } 45758fa50f4SIlya Biryukov 4589b3f38f9SIlya Biryukov const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 4599b3f38f9SIlya Biryukov auto Tokens = Arena.tokenBuffer().expandedTokens(); 4609b3f38f9SIlya Biryukov auto &SM = Arena.sourceManager(); 4619b3f38f9SIlya Biryukov auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) { 4629b3f38f9SIlya Biryukov return SM.isBeforeInTranslationUnit(T.location(), L); 4639b3f38f9SIlya Biryukov }); 4649b3f38f9SIlya Biryukov assert(It != Tokens.end()); 4659b3f38f9SIlya Biryukov assert(It->location() == L); 4669b3f38f9SIlya Biryukov return &*It; 4679b3f38f9SIlya Biryukov } 4689b3f38f9SIlya Biryukov 4699b3f38f9SIlya Biryukov syntax::TranslationUnit * 4709b3f38f9SIlya Biryukov syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 4719b3f38f9SIlya Biryukov TreeBuilder Builder(A); 4729b3f38f9SIlya Biryukov BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 4739b3f38f9SIlya Biryukov return std::move(Builder).finalize(); 4749b3f38f9SIlya Biryukov } 475