19b3f38f9SIlya Biryukov //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 29b3f38f9SIlya Biryukov // 39b3f38f9SIlya Biryukov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49b3f38f9SIlya Biryukov // See https://llvm.org/LICENSE.txt for license information. 59b3f38f9SIlya Biryukov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69b3f38f9SIlya Biryukov // 79b3f38f9SIlya Biryukov //===----------------------------------------------------------------------===// 89b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/BuildTree.h" 99b3f38f9SIlya Biryukov #include "clang/AST/RecursiveASTVisitor.h" 109b3f38f9SIlya Biryukov #include "clang/AST/Stmt.h" 119b3f38f9SIlya Biryukov #include "clang/Basic/LLVM.h" 129b3f38f9SIlya Biryukov #include "clang/Basic/SourceLocation.h" 139b3f38f9SIlya Biryukov #include "clang/Basic/SourceManager.h" 149b3f38f9SIlya Biryukov #include "clang/Basic/TokenKinds.h" 159b3f38f9SIlya Biryukov #include "clang/Lex/Lexer.h" 169b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Nodes.h" 179b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tokens.h" 189b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tree.h" 199b3f38f9SIlya Biryukov #include "llvm/ADT/ArrayRef.h" 209b3f38f9SIlya Biryukov #include "llvm/ADT/STLExtras.h" 219b3f38f9SIlya Biryukov #include "llvm/ADT/SmallVector.h" 229b3f38f9SIlya Biryukov #include "llvm/Support/Allocator.h" 239b3f38f9SIlya Biryukov #include "llvm/Support/Casting.h" 2496065cf7SIlya Biryukov #include "llvm/Support/Compiler.h" 259b3f38f9SIlya Biryukov #include "llvm/Support/FormatVariadic.h" 269b3f38f9SIlya Biryukov #include "llvm/Support/raw_ostream.h" 279b3f38f9SIlya Biryukov #include <map> 289b3f38f9SIlya Biryukov 299b3f38f9SIlya Biryukov using namespace clang; 309b3f38f9SIlya Biryukov 3196065cf7SIlya Biryukov LLVM_ATTRIBUTE_UNUSED 3258fa50f4SIlya Biryukov static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } 3358fa50f4SIlya Biryukov 349b3f38f9SIlya Biryukov /// A helper class for constructing the syntax tree while traversing a clang 359b3f38f9SIlya Biryukov /// AST. 369b3f38f9SIlya Biryukov /// 379b3f38f9SIlya Biryukov /// At each point of the traversal we maintain a list of pending nodes. 389b3f38f9SIlya Biryukov /// Initially all tokens are added as pending nodes. When processing a clang AST 399b3f38f9SIlya Biryukov /// node, the clients need to: 409b3f38f9SIlya Biryukov /// - create a corresponding syntax node, 419b3f38f9SIlya Biryukov /// - assign roles to all pending child nodes with 'markChild' and 429b3f38f9SIlya Biryukov /// 'markChildToken', 439b3f38f9SIlya Biryukov /// - replace the child nodes with the new syntax node in the pending list 449b3f38f9SIlya Biryukov /// with 'foldNode'. 459b3f38f9SIlya Biryukov /// 469b3f38f9SIlya Biryukov /// Note that all children are expected to be processed when building a node. 479b3f38f9SIlya Biryukov /// 489b3f38f9SIlya Biryukov /// Call finalize() to finish building the tree and consume the root node. 499b3f38f9SIlya Biryukov class syntax::TreeBuilder { 509b3f38f9SIlya Biryukov public: 519b3f38f9SIlya Biryukov TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {} 529b3f38f9SIlya Biryukov 539b3f38f9SIlya Biryukov llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 549b3f38f9SIlya Biryukov 559b3f38f9SIlya Biryukov /// Populate children for \p New node, assuming it covers tokens from \p 569b3f38f9SIlya Biryukov /// Range. 579b3f38f9SIlya Biryukov void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); 589b3f38f9SIlya Biryukov 5958fa50f4SIlya Biryukov /// Mark the \p Child node with a corresponding \p Role. All marked children 6058fa50f4SIlya Biryukov /// should be consumed by foldNode. 6158fa50f4SIlya Biryukov /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), 6258fa50f4SIlya Biryukov /// wraps expressions into expression statement. 6358fa50f4SIlya Biryukov void markStmtChild(Stmt *Child, NodeRole Role); 6458fa50f4SIlya Biryukov /// Should be called for expressions in non-statement position to avoid 6558fa50f4SIlya Biryukov /// wrapping into expression statement. 6658fa50f4SIlya Biryukov void markExprChild(Expr *Child, NodeRole Role); 6758fa50f4SIlya Biryukov 689b3f38f9SIlya Biryukov /// Set role for a token starting at \p Loc. 69*def65bb4SIlya Biryukov void markChildToken(SourceLocation Loc, NodeRole R); 709b3f38f9SIlya Biryukov 719b3f38f9SIlya Biryukov /// Finish building the tree and consume the root node. 729b3f38f9SIlya Biryukov syntax::TranslationUnit *finalize() && { 739b3f38f9SIlya Biryukov auto Tokens = Arena.tokenBuffer().expandedTokens(); 74bfbf6b6cSIlya Biryukov assert(!Tokens.empty()); 75bfbf6b6cSIlya Biryukov assert(Tokens.back().kind() == tok::eof); 76bfbf6b6cSIlya Biryukov 779b3f38f9SIlya Biryukov // Build the root of the tree, consuming all the children. 78bfbf6b6cSIlya Biryukov Pending.foldChildren(Tokens.drop_back(), 799b3f38f9SIlya Biryukov new (Arena.allocator()) syntax::TranslationUnit); 809b3f38f9SIlya Biryukov 819b3f38f9SIlya Biryukov return cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 829b3f38f9SIlya Biryukov } 839b3f38f9SIlya Biryukov 849b3f38f9SIlya Biryukov /// getRange() finds the syntax tokens corresponding to the passed source 859b3f38f9SIlya Biryukov /// locations. 869b3f38f9SIlya Biryukov /// \p First is the start position of the first token and \p Last is the start 879b3f38f9SIlya Biryukov /// position of the last token. 889b3f38f9SIlya Biryukov llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 899b3f38f9SIlya Biryukov SourceLocation Last) const { 909b3f38f9SIlya Biryukov assert(First.isValid()); 919b3f38f9SIlya Biryukov assert(Last.isValid()); 929b3f38f9SIlya Biryukov assert(First == Last || 939b3f38f9SIlya Biryukov Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 949b3f38f9SIlya Biryukov return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 959b3f38f9SIlya Biryukov } 969b3f38f9SIlya Biryukov llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { 979b3f38f9SIlya Biryukov return getRange(D->getBeginLoc(), D->getEndLoc()); 989b3f38f9SIlya Biryukov } 9958fa50f4SIlya Biryukov llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 10058fa50f4SIlya Biryukov return getRange(E->getBeginLoc(), E->getEndLoc()); 10158fa50f4SIlya Biryukov } 10258fa50f4SIlya Biryukov /// Find the adjusted range for the statement, consuming the trailing 10358fa50f4SIlya Biryukov /// semicolon when needed. 10458fa50f4SIlya Biryukov llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 10558fa50f4SIlya Biryukov auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); 10658fa50f4SIlya Biryukov if (isa<CompoundStmt>(S)) 10758fa50f4SIlya Biryukov return Tokens; 10858fa50f4SIlya Biryukov 10958fa50f4SIlya Biryukov // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 11058fa50f4SIlya Biryukov // all statements that end with those. Consume this semicolon here. 11158fa50f4SIlya Biryukov // 11258fa50f4SIlya Biryukov // (!) statements never consume 'eof', so looking at the next token is ok. 11358fa50f4SIlya Biryukov if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 11458fa50f4SIlya Biryukov return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 11558fa50f4SIlya Biryukov return Tokens; 1169b3f38f9SIlya Biryukov } 1179b3f38f9SIlya Biryukov 1189b3f38f9SIlya Biryukov private: 1199b3f38f9SIlya Biryukov /// Finds a token starting at \p L. The token must exist. 1209b3f38f9SIlya Biryukov const syntax::Token *findToken(SourceLocation L) const; 1219b3f38f9SIlya Biryukov 1229b3f38f9SIlya Biryukov /// A collection of trees covering the input tokens. 1239b3f38f9SIlya Biryukov /// When created, each tree corresponds to a single token in the file. 1249b3f38f9SIlya Biryukov /// Clients call 'foldChildren' to attach one or more subtrees to a parent 1259b3f38f9SIlya Biryukov /// node and update the list of trees accordingly. 1269b3f38f9SIlya Biryukov /// 1279b3f38f9SIlya Biryukov /// Ensures that added nodes properly nest and cover the whole token stream. 1289b3f38f9SIlya Biryukov struct Forest { 1299b3f38f9SIlya Biryukov Forest(syntax::Arena &A) { 130bfbf6b6cSIlya Biryukov assert(!A.tokenBuffer().expandedTokens().empty()); 131bfbf6b6cSIlya Biryukov assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 1329b3f38f9SIlya Biryukov // Create all leaf nodes. 133bfbf6b6cSIlya Biryukov // Note that we do not have 'eof' in the tree. 134bfbf6b6cSIlya Biryukov for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) 1359b3f38f9SIlya Biryukov Trees.insert(Trees.end(), 1369b3f38f9SIlya Biryukov {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); 1379b3f38f9SIlya Biryukov } 1389b3f38f9SIlya Biryukov 1399b3f38f9SIlya Biryukov void assignRole(llvm::ArrayRef<syntax::Token> Range, 1409b3f38f9SIlya Biryukov syntax::NodeRole Role) { 1419b3f38f9SIlya Biryukov assert(!Range.empty()); 1429b3f38f9SIlya Biryukov auto It = Trees.lower_bound(Range.begin()); 1439b3f38f9SIlya Biryukov assert(It != Trees.end() && "no node found"); 1449b3f38f9SIlya Biryukov assert(It->first == Range.begin() && "no child with the specified range"); 1459b3f38f9SIlya Biryukov assert((std::next(It) == Trees.end() || 1469b3f38f9SIlya Biryukov std::next(It)->first == Range.end()) && 1479b3f38f9SIlya Biryukov "no child with the specified range"); 1489b3f38f9SIlya Biryukov It->second.Role = Role; 1499b3f38f9SIlya Biryukov } 1509b3f38f9SIlya Biryukov 1519b3f38f9SIlya Biryukov /// Add \p Node to the forest and fill its children nodes based on the \p 1529b3f38f9SIlya Biryukov /// NodeRange. 1539b3f38f9SIlya Biryukov void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens, 1549b3f38f9SIlya Biryukov syntax::Tree *Node) { 1559b3f38f9SIlya Biryukov assert(!NodeTokens.empty()); 1569b3f38f9SIlya Biryukov assert(Node->firstChild() == nullptr && "node already has children"); 1579b3f38f9SIlya Biryukov 1589b3f38f9SIlya Biryukov auto *FirstToken = NodeTokens.begin(); 1599b3f38f9SIlya Biryukov auto BeginChildren = Trees.lower_bound(FirstToken); 1609b3f38f9SIlya Biryukov assert(BeginChildren != Trees.end() && 1619b3f38f9SIlya Biryukov BeginChildren->first == FirstToken && 1629b3f38f9SIlya Biryukov "fold crosses boundaries of existing subtrees"); 1639b3f38f9SIlya Biryukov auto EndChildren = Trees.lower_bound(NodeTokens.end()); 1649b3f38f9SIlya Biryukov assert((EndChildren == Trees.end() || 1659b3f38f9SIlya Biryukov EndChildren->first == NodeTokens.end()) && 1669b3f38f9SIlya Biryukov "fold crosses boundaries of existing subtrees"); 1679b3f38f9SIlya Biryukov 1689b3f38f9SIlya Biryukov // (!) we need to go in reverse order, because we can only prepend. 1699b3f38f9SIlya Biryukov for (auto It = EndChildren; It != BeginChildren; --It) 1709b3f38f9SIlya Biryukov Node->prependChildLowLevel(std::prev(It)->second.Node, 1719b3f38f9SIlya Biryukov std::prev(It)->second.Role); 1729b3f38f9SIlya Biryukov 1739b3f38f9SIlya Biryukov Trees.erase(BeginChildren, EndChildren); 1749b3f38f9SIlya Biryukov Trees.insert({FirstToken, NodeAndRole(Node)}); 1759b3f38f9SIlya Biryukov } 1769b3f38f9SIlya Biryukov 1779b3f38f9SIlya Biryukov // EXPECTS: all tokens were consumed and are owned by a single root node. 1789b3f38f9SIlya Biryukov syntax::Node *finalize() && { 1799b3f38f9SIlya Biryukov assert(Trees.size() == 1); 1809b3f38f9SIlya Biryukov auto *Root = Trees.begin()->second.Node; 1819b3f38f9SIlya Biryukov Trees = {}; 1829b3f38f9SIlya Biryukov return Root; 1839b3f38f9SIlya Biryukov } 1849b3f38f9SIlya Biryukov 1859b3f38f9SIlya Biryukov std::string str(const syntax::Arena &A) const { 1869b3f38f9SIlya Biryukov std::string R; 1879b3f38f9SIlya Biryukov for (auto It = Trees.begin(); It != Trees.end(); ++It) { 1889b3f38f9SIlya Biryukov unsigned CoveredTokens = 1899b3f38f9SIlya Biryukov It != Trees.end() 1909b3f38f9SIlya Biryukov ? (std::next(It)->first - It->first) 1919b3f38f9SIlya Biryukov : A.tokenBuffer().expandedTokens().end() - It->first; 1929b3f38f9SIlya Biryukov 1939b3f38f9SIlya Biryukov R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n", 1949b3f38f9SIlya Biryukov It->second.Node->kind(), 1959b3f38f9SIlya Biryukov It->first->text(A.sourceManager()), CoveredTokens); 1969b3f38f9SIlya Biryukov R += It->second.Node->dump(A); 1979b3f38f9SIlya Biryukov } 1989b3f38f9SIlya Biryukov return R; 1999b3f38f9SIlya Biryukov } 2009b3f38f9SIlya Biryukov 2019b3f38f9SIlya Biryukov private: 2029b3f38f9SIlya Biryukov /// A with a role that should be assigned to it when adding to a parent. 2039b3f38f9SIlya Biryukov struct NodeAndRole { 2049b3f38f9SIlya Biryukov explicit NodeAndRole(syntax::Node *Node) 20551dad419SIlya Biryukov : Node(Node), Role(NodeRole::Unknown) {} 2069b3f38f9SIlya Biryukov 2079b3f38f9SIlya Biryukov syntax::Node *Node; 2089b3f38f9SIlya Biryukov NodeRole Role; 2099b3f38f9SIlya Biryukov }; 2109b3f38f9SIlya Biryukov 2119b3f38f9SIlya Biryukov /// Maps from the start token to a subtree starting at that token. 2129b3f38f9SIlya Biryukov /// FIXME: storing the end tokens is redundant. 2139b3f38f9SIlya Biryukov /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. 2149b3f38f9SIlya Biryukov std::map<const syntax::Token *, NodeAndRole> Trees; 2159b3f38f9SIlya Biryukov }; 2169b3f38f9SIlya Biryukov 2179b3f38f9SIlya Biryukov /// For debugging purposes. 2189b3f38f9SIlya Biryukov std::string str() { return Pending.str(Arena); } 2199b3f38f9SIlya Biryukov 2209b3f38f9SIlya Biryukov syntax::Arena &Arena; 2219b3f38f9SIlya Biryukov Forest Pending; 2229b3f38f9SIlya Biryukov }; 2239b3f38f9SIlya Biryukov 2249b3f38f9SIlya Biryukov namespace { 2259b3f38f9SIlya Biryukov class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 2269b3f38f9SIlya Biryukov public: 2279b3f38f9SIlya Biryukov explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) 2289b3f38f9SIlya Biryukov : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} 2299b3f38f9SIlya Biryukov 2309b3f38f9SIlya Biryukov bool shouldTraversePostOrder() const { return true; } 2319b3f38f9SIlya Biryukov 2329b3f38f9SIlya Biryukov bool TraverseDecl(Decl *D) { 2339b3f38f9SIlya Biryukov if (!D || isa<TranslationUnitDecl>(D)) 2349b3f38f9SIlya Biryukov return RecursiveASTVisitor::TraverseDecl(D); 2359b3f38f9SIlya Biryukov if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext())) 2369b3f38f9SIlya Biryukov return true; // Only build top-level decls for now, do not recurse. 2379b3f38f9SIlya Biryukov return RecursiveASTVisitor::TraverseDecl(D); 2389b3f38f9SIlya Biryukov } 2399b3f38f9SIlya Biryukov 2409b3f38f9SIlya Biryukov bool VisitDecl(Decl *D) { 2419b3f38f9SIlya Biryukov assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) && 2429b3f38f9SIlya Biryukov "expected a top-level decl"); 2439b3f38f9SIlya Biryukov assert(!D->isImplicit()); 2449b3f38f9SIlya Biryukov Builder.foldNode(Builder.getRange(D), 2459b3f38f9SIlya Biryukov new (allocator()) syntax::TopLevelDeclaration()); 2469b3f38f9SIlya Biryukov return true; 2479b3f38f9SIlya Biryukov } 2489b3f38f9SIlya Biryukov 2499b3f38f9SIlya Biryukov bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 2509b3f38f9SIlya Biryukov // (!) we do not want to call VisitDecl(), the declaration for translation 2519b3f38f9SIlya Biryukov // unit is built by finalize(). 2529b3f38f9SIlya Biryukov return true; 2539b3f38f9SIlya Biryukov } 2549b3f38f9SIlya Biryukov 2559b3f38f9SIlya Biryukov bool WalkUpFromCompoundStmt(CompoundStmt *S) { 25651dad419SIlya Biryukov using NodeRole = syntax::NodeRole; 2579b3f38f9SIlya Biryukov 258*def65bb4SIlya Biryukov Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 25958fa50f4SIlya Biryukov for (auto *Child : S->body()) 26058fa50f4SIlya Biryukov Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 261*def65bb4SIlya Biryukov Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 2629b3f38f9SIlya Biryukov 26358fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 2649b3f38f9SIlya Biryukov new (allocator()) syntax::CompoundStatement); 2659b3f38f9SIlya Biryukov return true; 2669b3f38f9SIlya Biryukov } 2679b3f38f9SIlya Biryukov 26858fa50f4SIlya Biryukov // Some statements are not yet handled by syntax trees. 26958fa50f4SIlya Biryukov bool WalkUpFromStmt(Stmt *S) { 27058fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 27158fa50f4SIlya Biryukov new (allocator()) syntax::UnknownStatement); 27258fa50f4SIlya Biryukov return true; 27358fa50f4SIlya Biryukov } 27458fa50f4SIlya Biryukov 27558fa50f4SIlya Biryukov bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 27658fa50f4SIlya Biryukov // We override to traverse range initializer as VarDecl. 27758fa50f4SIlya Biryukov // RAV traverses it as a statement, we produce invalid node kinds in that 27858fa50f4SIlya Biryukov // case. 27958fa50f4SIlya Biryukov // FIXME: should do this in RAV instead? 28058fa50f4SIlya Biryukov if (S->getInit() && !TraverseStmt(S->getInit())) 28158fa50f4SIlya Biryukov return false; 28258fa50f4SIlya Biryukov if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 28358fa50f4SIlya Biryukov return false; 28458fa50f4SIlya Biryukov if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 28558fa50f4SIlya Biryukov return false; 28658fa50f4SIlya Biryukov if (S->getBody() && !TraverseStmt(S->getBody())) 28758fa50f4SIlya Biryukov return false; 28858fa50f4SIlya Biryukov return true; 28958fa50f4SIlya Biryukov } 29058fa50f4SIlya Biryukov 29158fa50f4SIlya Biryukov bool TraverseStmt(Stmt *S) { 29258fa50f4SIlya Biryukov if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { 29358fa50f4SIlya Biryukov // (!) do not recurse into subexpressions. 29458fa50f4SIlya Biryukov // we do not have syntax trees for expressions yet, so we only want to see 29558fa50f4SIlya Biryukov // the first top-level expression. 29658fa50f4SIlya Biryukov return WalkUpFromExpr(E->IgnoreImplicit()); 29758fa50f4SIlya Biryukov } 29858fa50f4SIlya Biryukov return RecursiveASTVisitor::TraverseStmt(S); 29958fa50f4SIlya Biryukov } 30058fa50f4SIlya Biryukov 30158fa50f4SIlya Biryukov // Some expressions are not yet handled by syntax trees. 30258fa50f4SIlya Biryukov bool WalkUpFromExpr(Expr *E) { 30358fa50f4SIlya Biryukov assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 30458fa50f4SIlya Biryukov Builder.foldNode(Builder.getExprRange(E), 30558fa50f4SIlya Biryukov new (allocator()) syntax::UnknownExpression); 30658fa50f4SIlya Biryukov return true; 30758fa50f4SIlya Biryukov } 30858fa50f4SIlya Biryukov 30958fa50f4SIlya Biryukov // The code below is very regular, it could even be generated with some 31058fa50f4SIlya Biryukov // preprocessor magic. We merely assign roles to the corresponding children 31158fa50f4SIlya Biryukov // and fold resulting nodes. 31258fa50f4SIlya Biryukov bool WalkUpFromDeclStmt(DeclStmt *S) { 31358fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 31458fa50f4SIlya Biryukov new (allocator()) syntax::DeclarationStatement); 31558fa50f4SIlya Biryukov return true; 31658fa50f4SIlya Biryukov } 31758fa50f4SIlya Biryukov 31858fa50f4SIlya Biryukov bool WalkUpFromNullStmt(NullStmt *S) { 31958fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 32058fa50f4SIlya Biryukov new (allocator()) syntax::EmptyStatement); 32158fa50f4SIlya Biryukov return true; 32258fa50f4SIlya Biryukov } 32358fa50f4SIlya Biryukov 32458fa50f4SIlya Biryukov bool WalkUpFromSwitchStmt(SwitchStmt *S) { 325*def65bb4SIlya Biryukov Builder.markChildToken(S->getSwitchLoc(), 32658fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 32758fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 32858fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 32958fa50f4SIlya Biryukov new (allocator()) syntax::SwitchStatement); 33058fa50f4SIlya Biryukov return true; 33158fa50f4SIlya Biryukov } 33258fa50f4SIlya Biryukov 33358fa50f4SIlya Biryukov bool WalkUpFromCaseStmt(CaseStmt *S) { 334*def65bb4SIlya Biryukov Builder.markChildToken(S->getKeywordLoc(), 33558fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 33658fa50f4SIlya Biryukov Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 33758fa50f4SIlya Biryukov Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 33858fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 33958fa50f4SIlya Biryukov new (allocator()) syntax::CaseStatement); 34058fa50f4SIlya Biryukov return true; 34158fa50f4SIlya Biryukov } 34258fa50f4SIlya Biryukov 34358fa50f4SIlya Biryukov bool WalkUpFromDefaultStmt(DefaultStmt *S) { 344*def65bb4SIlya Biryukov Builder.markChildToken(S->getKeywordLoc(), 34558fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 34658fa50f4SIlya Biryukov Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 34758fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 34858fa50f4SIlya Biryukov new (allocator()) syntax::DefaultStatement); 34958fa50f4SIlya Biryukov return true; 35058fa50f4SIlya Biryukov } 35158fa50f4SIlya Biryukov 35258fa50f4SIlya Biryukov bool WalkUpFromIfStmt(IfStmt *S) { 353*def65bb4SIlya Biryukov Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 35458fa50f4SIlya Biryukov Builder.markStmtChild(S->getThen(), 35558fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_thenStatement); 356*def65bb4SIlya Biryukov Builder.markChildToken(S->getElseLoc(), 35758fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_elseKeyword); 35858fa50f4SIlya Biryukov Builder.markStmtChild(S->getElse(), 35958fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_elseStatement); 36058fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 36158fa50f4SIlya Biryukov new (allocator()) syntax::IfStatement); 36258fa50f4SIlya Biryukov return true; 36358fa50f4SIlya Biryukov } 36458fa50f4SIlya Biryukov 36558fa50f4SIlya Biryukov bool WalkUpFromForStmt(ForStmt *S) { 366*def65bb4SIlya Biryukov Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 36758fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 36858fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 36958fa50f4SIlya Biryukov new (allocator()) syntax::ForStatement); 37058fa50f4SIlya Biryukov return true; 37158fa50f4SIlya Biryukov } 37258fa50f4SIlya Biryukov 37358fa50f4SIlya Biryukov bool WalkUpFromWhileStmt(WhileStmt *S) { 374*def65bb4SIlya Biryukov Builder.markChildToken(S->getWhileLoc(), 37558fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 37658fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 37758fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 37858fa50f4SIlya Biryukov new (allocator()) syntax::WhileStatement); 37958fa50f4SIlya Biryukov return true; 38058fa50f4SIlya Biryukov } 38158fa50f4SIlya Biryukov 38258fa50f4SIlya Biryukov bool WalkUpFromContinueStmt(ContinueStmt *S) { 383*def65bb4SIlya Biryukov Builder.markChildToken(S->getContinueLoc(), 38458fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 38558fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 38658fa50f4SIlya Biryukov new (allocator()) syntax::ContinueStatement); 38758fa50f4SIlya Biryukov return true; 38858fa50f4SIlya Biryukov } 38958fa50f4SIlya Biryukov 39058fa50f4SIlya Biryukov bool WalkUpFromBreakStmt(BreakStmt *S) { 391*def65bb4SIlya Biryukov Builder.markChildToken(S->getBreakLoc(), 39258fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 39358fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 39458fa50f4SIlya Biryukov new (allocator()) syntax::BreakStatement); 39558fa50f4SIlya Biryukov return true; 39658fa50f4SIlya Biryukov } 39758fa50f4SIlya Biryukov 39858fa50f4SIlya Biryukov bool WalkUpFromReturnStmt(ReturnStmt *S) { 399*def65bb4SIlya Biryukov Builder.markChildToken(S->getReturnLoc(), 40058fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 40158fa50f4SIlya Biryukov Builder.markExprChild(S->getRetValue(), 40258fa50f4SIlya Biryukov syntax::NodeRole::ReturnStatement_value); 40358fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 40458fa50f4SIlya Biryukov new (allocator()) syntax::ReturnStatement); 40558fa50f4SIlya Biryukov return true; 40658fa50f4SIlya Biryukov } 40758fa50f4SIlya Biryukov 40858fa50f4SIlya Biryukov bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 409*def65bb4SIlya Biryukov Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 41058fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 41158fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 41258fa50f4SIlya Biryukov new (allocator()) syntax::RangeBasedForStatement); 41358fa50f4SIlya Biryukov return true; 41458fa50f4SIlya Biryukov } 41558fa50f4SIlya Biryukov 4169b3f38f9SIlya Biryukov private: 4179b3f38f9SIlya Biryukov /// A small helper to save some typing. 4189b3f38f9SIlya Biryukov llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 4199b3f38f9SIlya Biryukov 4209b3f38f9SIlya Biryukov syntax::TreeBuilder &Builder; 4219b3f38f9SIlya Biryukov const LangOptions &LangOpts; 4229b3f38f9SIlya Biryukov }; 4239b3f38f9SIlya Biryukov } // namespace 4249b3f38f9SIlya Biryukov 4259b3f38f9SIlya Biryukov void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range, 4269b3f38f9SIlya Biryukov syntax::Tree *New) { 4279b3f38f9SIlya Biryukov Pending.foldChildren(Range, New); 4289b3f38f9SIlya Biryukov } 4299b3f38f9SIlya Biryukov 430*def65bb4SIlya Biryukov void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 4319b3f38f9SIlya Biryukov if (Loc.isInvalid()) 4329b3f38f9SIlya Biryukov return; 4339b3f38f9SIlya Biryukov Pending.assignRole(*findToken(Loc), Role); 4349b3f38f9SIlya Biryukov } 4359b3f38f9SIlya Biryukov 43658fa50f4SIlya Biryukov void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 43758fa50f4SIlya Biryukov if (!Child) 43858fa50f4SIlya Biryukov return; 43958fa50f4SIlya Biryukov 44058fa50f4SIlya Biryukov auto Range = getStmtRange(Child); 44158fa50f4SIlya Biryukov // This is an expression in a statement position, consume the trailing 44258fa50f4SIlya Biryukov // semicolon and form an 'ExpressionStatement' node. 44358fa50f4SIlya Biryukov if (auto *E = dyn_cast<Expr>(Child)) { 44458fa50f4SIlya Biryukov Pending.assignRole(getExprRange(E), 44558fa50f4SIlya Biryukov NodeRole::ExpressionStatement_expression); 44658fa50f4SIlya Biryukov // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. 44758fa50f4SIlya Biryukov Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement); 44858fa50f4SIlya Biryukov } 44958fa50f4SIlya Biryukov Pending.assignRole(Range, Role); 45058fa50f4SIlya Biryukov } 45158fa50f4SIlya Biryukov 45258fa50f4SIlya Biryukov void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 45358fa50f4SIlya Biryukov Pending.assignRole(getExprRange(Child), Role); 45458fa50f4SIlya Biryukov } 45558fa50f4SIlya Biryukov 4569b3f38f9SIlya Biryukov const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 4579b3f38f9SIlya Biryukov auto Tokens = Arena.tokenBuffer().expandedTokens(); 4589b3f38f9SIlya Biryukov auto &SM = Arena.sourceManager(); 4599b3f38f9SIlya Biryukov auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) { 4609b3f38f9SIlya Biryukov return SM.isBeforeInTranslationUnit(T.location(), L); 4619b3f38f9SIlya Biryukov }); 4629b3f38f9SIlya Biryukov assert(It != Tokens.end()); 4639b3f38f9SIlya Biryukov assert(It->location() == L); 4649b3f38f9SIlya Biryukov return &*It; 4659b3f38f9SIlya Biryukov } 4669b3f38f9SIlya Biryukov 4679b3f38f9SIlya Biryukov syntax::TranslationUnit * 4689b3f38f9SIlya Biryukov syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 4699b3f38f9SIlya Biryukov TreeBuilder Builder(A); 4709b3f38f9SIlya Biryukov BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 4719b3f38f9SIlya Biryukov return std::move(Builder).finalize(); 4729b3f38f9SIlya Biryukov } 473