19b3f38f9SIlya Biryukov //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 29b3f38f9SIlya Biryukov // 39b3f38f9SIlya Biryukov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49b3f38f9SIlya Biryukov // See https://llvm.org/LICENSE.txt for license information. 59b3f38f9SIlya Biryukov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69b3f38f9SIlya Biryukov // 79b3f38f9SIlya Biryukov //===----------------------------------------------------------------------===// 89b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/BuildTree.h" 99b3f38f9SIlya Biryukov #include "clang/AST/RecursiveASTVisitor.h" 109b3f38f9SIlya Biryukov #include "clang/AST/Stmt.h" 119b3f38f9SIlya Biryukov #include "clang/Basic/LLVM.h" 129b3f38f9SIlya Biryukov #include "clang/Basic/SourceLocation.h" 139b3f38f9SIlya Biryukov #include "clang/Basic/SourceManager.h" 149b3f38f9SIlya Biryukov #include "clang/Basic/TokenKinds.h" 159b3f38f9SIlya Biryukov #include "clang/Lex/Lexer.h" 169b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Nodes.h" 179b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tokens.h" 189b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tree.h" 199b3f38f9SIlya Biryukov #include "llvm/ADT/ArrayRef.h" 209b3f38f9SIlya Biryukov #include "llvm/ADT/STLExtras.h" 219b3f38f9SIlya Biryukov #include "llvm/ADT/SmallVector.h" 229b3f38f9SIlya Biryukov #include "llvm/Support/Allocator.h" 239b3f38f9SIlya Biryukov #include "llvm/Support/Casting.h" 249b3f38f9SIlya Biryukov #include "llvm/Support/FormatVariadic.h" 259b3f38f9SIlya Biryukov #include "llvm/Support/raw_ostream.h" 269b3f38f9SIlya Biryukov #include <map> 279b3f38f9SIlya Biryukov 289b3f38f9SIlya Biryukov using namespace clang; 299b3f38f9SIlya Biryukov 309b3f38f9SIlya Biryukov /// A helper class for constructing the syntax tree while traversing a clang 319b3f38f9SIlya Biryukov /// AST. 329b3f38f9SIlya Biryukov /// 339b3f38f9SIlya Biryukov /// At each point of the traversal we maintain a list of pending nodes. 349b3f38f9SIlya Biryukov /// Initially all tokens are added as pending nodes. When processing a clang AST 359b3f38f9SIlya Biryukov /// node, the clients need to: 369b3f38f9SIlya Biryukov /// - create a corresponding syntax node, 379b3f38f9SIlya Biryukov /// - assign roles to all pending child nodes with 'markChild' and 389b3f38f9SIlya Biryukov /// 'markChildToken', 399b3f38f9SIlya Biryukov /// - replace the child nodes with the new syntax node in the pending list 409b3f38f9SIlya Biryukov /// with 'foldNode'. 419b3f38f9SIlya Biryukov /// 429b3f38f9SIlya Biryukov /// Note that all children are expected to be processed when building a node. 439b3f38f9SIlya Biryukov /// 449b3f38f9SIlya Biryukov /// Call finalize() to finish building the tree and consume the root node. 459b3f38f9SIlya Biryukov class syntax::TreeBuilder { 469b3f38f9SIlya Biryukov public: 479b3f38f9SIlya Biryukov TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {} 489b3f38f9SIlya Biryukov 499b3f38f9SIlya Biryukov llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 509b3f38f9SIlya Biryukov 519b3f38f9SIlya Biryukov /// Populate children for \p New node, assuming it covers tokens from \p 529b3f38f9SIlya Biryukov /// Range. 539b3f38f9SIlya Biryukov void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); 549b3f38f9SIlya Biryukov 559b3f38f9SIlya Biryukov /// Set role for a token starting at \p Loc. 569b3f38f9SIlya Biryukov void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R); 579b3f38f9SIlya Biryukov 589b3f38f9SIlya Biryukov /// Finish building the tree and consume the root node. 599b3f38f9SIlya Biryukov syntax::TranslationUnit *finalize() && { 609b3f38f9SIlya Biryukov auto Tokens = Arena.tokenBuffer().expandedTokens(); 61*bfbf6b6cSIlya Biryukov assert(!Tokens.empty()); 62*bfbf6b6cSIlya Biryukov assert(Tokens.back().kind() == tok::eof); 63*bfbf6b6cSIlya Biryukov 649b3f38f9SIlya Biryukov // Build the root of the tree, consuming all the children. 65*bfbf6b6cSIlya Biryukov Pending.foldChildren(Tokens.drop_back(), 669b3f38f9SIlya Biryukov new (Arena.allocator()) syntax::TranslationUnit); 679b3f38f9SIlya Biryukov 689b3f38f9SIlya Biryukov return cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 699b3f38f9SIlya Biryukov } 709b3f38f9SIlya Biryukov 719b3f38f9SIlya Biryukov /// getRange() finds the syntax tokens corresponding to the passed source 729b3f38f9SIlya Biryukov /// locations. 739b3f38f9SIlya Biryukov /// \p First is the start position of the first token and \p Last is the start 749b3f38f9SIlya Biryukov /// position of the last token. 759b3f38f9SIlya Biryukov llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 769b3f38f9SIlya Biryukov SourceLocation Last) const { 779b3f38f9SIlya Biryukov assert(First.isValid()); 789b3f38f9SIlya Biryukov assert(Last.isValid()); 799b3f38f9SIlya Biryukov assert(First == Last || 809b3f38f9SIlya Biryukov Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 819b3f38f9SIlya Biryukov return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 829b3f38f9SIlya Biryukov } 839b3f38f9SIlya Biryukov llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { 849b3f38f9SIlya Biryukov return getRange(D->getBeginLoc(), D->getEndLoc()); 859b3f38f9SIlya Biryukov } 869b3f38f9SIlya Biryukov llvm::ArrayRef<syntax::Token> getRange(const Stmt *S) const { 879b3f38f9SIlya Biryukov return getRange(S->getBeginLoc(), S->getEndLoc()); 889b3f38f9SIlya Biryukov } 899b3f38f9SIlya Biryukov 909b3f38f9SIlya Biryukov private: 919b3f38f9SIlya Biryukov /// Finds a token starting at \p L. The token must exist. 929b3f38f9SIlya Biryukov const syntax::Token *findToken(SourceLocation L) const; 939b3f38f9SIlya Biryukov 949b3f38f9SIlya Biryukov /// A collection of trees covering the input tokens. 959b3f38f9SIlya Biryukov /// When created, each tree corresponds to a single token in the file. 969b3f38f9SIlya Biryukov /// Clients call 'foldChildren' to attach one or more subtrees to a parent 979b3f38f9SIlya Biryukov /// node and update the list of trees accordingly. 989b3f38f9SIlya Biryukov /// 999b3f38f9SIlya Biryukov /// Ensures that added nodes properly nest and cover the whole token stream. 1009b3f38f9SIlya Biryukov struct Forest { 1019b3f38f9SIlya Biryukov Forest(syntax::Arena &A) { 102*bfbf6b6cSIlya Biryukov assert(!A.tokenBuffer().expandedTokens().empty()); 103*bfbf6b6cSIlya Biryukov assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 1049b3f38f9SIlya Biryukov // Create all leaf nodes. 105*bfbf6b6cSIlya Biryukov // Note that we do not have 'eof' in the tree. 106*bfbf6b6cSIlya Biryukov for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) 1079b3f38f9SIlya Biryukov Trees.insert(Trees.end(), 1089b3f38f9SIlya Biryukov {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); 1099b3f38f9SIlya Biryukov } 1109b3f38f9SIlya Biryukov 1119b3f38f9SIlya Biryukov void assignRole(llvm::ArrayRef<syntax::Token> Range, 1129b3f38f9SIlya Biryukov syntax::NodeRole Role) { 1139b3f38f9SIlya Biryukov assert(!Range.empty()); 1149b3f38f9SIlya Biryukov auto It = Trees.lower_bound(Range.begin()); 1159b3f38f9SIlya Biryukov assert(It != Trees.end() && "no node found"); 1169b3f38f9SIlya Biryukov assert(It->first == Range.begin() && "no child with the specified range"); 1179b3f38f9SIlya Biryukov assert((std::next(It) == Trees.end() || 1189b3f38f9SIlya Biryukov std::next(It)->first == Range.end()) && 1199b3f38f9SIlya Biryukov "no child with the specified range"); 1209b3f38f9SIlya Biryukov It->second.Role = Role; 1219b3f38f9SIlya Biryukov } 1229b3f38f9SIlya Biryukov 1239b3f38f9SIlya Biryukov /// Add \p Node to the forest and fill its children nodes based on the \p 1249b3f38f9SIlya Biryukov /// NodeRange. 1259b3f38f9SIlya Biryukov void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens, 1269b3f38f9SIlya Biryukov syntax::Tree *Node) { 1279b3f38f9SIlya Biryukov assert(!NodeTokens.empty()); 1289b3f38f9SIlya Biryukov assert(Node->firstChild() == nullptr && "node already has children"); 1299b3f38f9SIlya Biryukov 1309b3f38f9SIlya Biryukov auto *FirstToken = NodeTokens.begin(); 1319b3f38f9SIlya Biryukov auto BeginChildren = Trees.lower_bound(FirstToken); 1329b3f38f9SIlya Biryukov assert(BeginChildren != Trees.end() && 1339b3f38f9SIlya Biryukov BeginChildren->first == FirstToken && 1349b3f38f9SIlya Biryukov "fold crosses boundaries of existing subtrees"); 1359b3f38f9SIlya Biryukov auto EndChildren = Trees.lower_bound(NodeTokens.end()); 1369b3f38f9SIlya Biryukov assert((EndChildren == Trees.end() || 1379b3f38f9SIlya Biryukov EndChildren->first == NodeTokens.end()) && 1389b3f38f9SIlya Biryukov "fold crosses boundaries of existing subtrees"); 1399b3f38f9SIlya Biryukov 1409b3f38f9SIlya Biryukov // (!) we need to go in reverse order, because we can only prepend. 1419b3f38f9SIlya Biryukov for (auto It = EndChildren; It != BeginChildren; --It) 1429b3f38f9SIlya Biryukov Node->prependChildLowLevel(std::prev(It)->second.Node, 1439b3f38f9SIlya Biryukov std::prev(It)->second.Role); 1449b3f38f9SIlya Biryukov 1459b3f38f9SIlya Biryukov Trees.erase(BeginChildren, EndChildren); 1469b3f38f9SIlya Biryukov Trees.insert({FirstToken, NodeAndRole(Node)}); 1479b3f38f9SIlya Biryukov } 1489b3f38f9SIlya Biryukov 1499b3f38f9SIlya Biryukov // EXPECTS: all tokens were consumed and are owned by a single root node. 1509b3f38f9SIlya Biryukov syntax::Node *finalize() && { 1519b3f38f9SIlya Biryukov assert(Trees.size() == 1); 1529b3f38f9SIlya Biryukov auto *Root = Trees.begin()->second.Node; 1539b3f38f9SIlya Biryukov Trees = {}; 1549b3f38f9SIlya Biryukov return Root; 1559b3f38f9SIlya Biryukov } 1569b3f38f9SIlya Biryukov 1579b3f38f9SIlya Biryukov std::string str(const syntax::Arena &A) const { 1589b3f38f9SIlya Biryukov std::string R; 1599b3f38f9SIlya Biryukov for (auto It = Trees.begin(); It != Trees.end(); ++It) { 1609b3f38f9SIlya Biryukov unsigned CoveredTokens = 1619b3f38f9SIlya Biryukov It != Trees.end() 1629b3f38f9SIlya Biryukov ? (std::next(It)->first - It->first) 1639b3f38f9SIlya Biryukov : A.tokenBuffer().expandedTokens().end() - It->first; 1649b3f38f9SIlya Biryukov 1659b3f38f9SIlya Biryukov R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n", 1669b3f38f9SIlya Biryukov It->second.Node->kind(), 1679b3f38f9SIlya Biryukov It->first->text(A.sourceManager()), CoveredTokens); 1689b3f38f9SIlya Biryukov R += It->second.Node->dump(A); 1699b3f38f9SIlya Biryukov } 1709b3f38f9SIlya Biryukov return R; 1719b3f38f9SIlya Biryukov } 1729b3f38f9SIlya Biryukov 1739b3f38f9SIlya Biryukov private: 1749b3f38f9SIlya Biryukov /// A with a role that should be assigned to it when adding to a parent. 1759b3f38f9SIlya Biryukov struct NodeAndRole { 1769b3f38f9SIlya Biryukov explicit NodeAndRole(syntax::Node *Node) 17751dad419SIlya Biryukov : Node(Node), Role(NodeRole::Unknown) {} 1789b3f38f9SIlya Biryukov 1799b3f38f9SIlya Biryukov syntax::Node *Node; 1809b3f38f9SIlya Biryukov NodeRole Role; 1819b3f38f9SIlya Biryukov }; 1829b3f38f9SIlya Biryukov 1839b3f38f9SIlya Biryukov /// Maps from the start token to a subtree starting at that token. 1849b3f38f9SIlya Biryukov /// FIXME: storing the end tokens is redundant. 1859b3f38f9SIlya Biryukov /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. 1869b3f38f9SIlya Biryukov std::map<const syntax::Token *, NodeAndRole> Trees; 1879b3f38f9SIlya Biryukov }; 1889b3f38f9SIlya Biryukov 1899b3f38f9SIlya Biryukov /// For debugging purposes. 1909b3f38f9SIlya Biryukov std::string str() { return Pending.str(Arena); } 1919b3f38f9SIlya Biryukov 1929b3f38f9SIlya Biryukov syntax::Arena &Arena; 1939b3f38f9SIlya Biryukov Forest Pending; 1949b3f38f9SIlya Biryukov }; 1959b3f38f9SIlya Biryukov 1969b3f38f9SIlya Biryukov namespace { 1979b3f38f9SIlya Biryukov class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 1989b3f38f9SIlya Biryukov public: 1999b3f38f9SIlya Biryukov explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) 2009b3f38f9SIlya Biryukov : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} 2019b3f38f9SIlya Biryukov 2029b3f38f9SIlya Biryukov bool shouldTraversePostOrder() const { return true; } 2039b3f38f9SIlya Biryukov 2049b3f38f9SIlya Biryukov bool TraverseDecl(Decl *D) { 2059b3f38f9SIlya Biryukov if (!D || isa<TranslationUnitDecl>(D)) 2069b3f38f9SIlya Biryukov return RecursiveASTVisitor::TraverseDecl(D); 2079b3f38f9SIlya Biryukov if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext())) 2089b3f38f9SIlya Biryukov return true; // Only build top-level decls for now, do not recurse. 2099b3f38f9SIlya Biryukov return RecursiveASTVisitor::TraverseDecl(D); 2109b3f38f9SIlya Biryukov } 2119b3f38f9SIlya Biryukov 2129b3f38f9SIlya Biryukov bool VisitDecl(Decl *D) { 2139b3f38f9SIlya Biryukov assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) && 2149b3f38f9SIlya Biryukov "expected a top-level decl"); 2159b3f38f9SIlya Biryukov assert(!D->isImplicit()); 2169b3f38f9SIlya Biryukov Builder.foldNode(Builder.getRange(D), 2179b3f38f9SIlya Biryukov new (allocator()) syntax::TopLevelDeclaration()); 2189b3f38f9SIlya Biryukov return true; 2199b3f38f9SIlya Biryukov } 2209b3f38f9SIlya Biryukov 2219b3f38f9SIlya Biryukov bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 2229b3f38f9SIlya Biryukov // (!) we do not want to call VisitDecl(), the declaration for translation 2239b3f38f9SIlya Biryukov // unit is built by finalize(). 2249b3f38f9SIlya Biryukov return true; 2259b3f38f9SIlya Biryukov } 2269b3f38f9SIlya Biryukov 2279b3f38f9SIlya Biryukov bool WalkUpFromCompoundStmt(CompoundStmt *S) { 22851dad419SIlya Biryukov using NodeRole = syntax::NodeRole; 2299b3f38f9SIlya Biryukov 23051dad419SIlya Biryukov Builder.markChildToken(S->getLBracLoc(), tok::l_brace, 23151dad419SIlya Biryukov NodeRole::CompoundStatement_lbrace); 23251dad419SIlya Biryukov Builder.markChildToken(S->getRBracLoc(), tok::r_brace, 23351dad419SIlya Biryukov NodeRole::CompoundStatement_rbrace); 2349b3f38f9SIlya Biryukov 2359b3f38f9SIlya Biryukov Builder.foldNode(Builder.getRange(S), 2369b3f38f9SIlya Biryukov new (allocator()) syntax::CompoundStatement); 2379b3f38f9SIlya Biryukov return true; 2389b3f38f9SIlya Biryukov } 2399b3f38f9SIlya Biryukov 2409b3f38f9SIlya Biryukov private: 2419b3f38f9SIlya Biryukov /// A small helper to save some typing. 2429b3f38f9SIlya Biryukov llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 2439b3f38f9SIlya Biryukov 2449b3f38f9SIlya Biryukov syntax::TreeBuilder &Builder; 2459b3f38f9SIlya Biryukov const LangOptions &LangOpts; 2469b3f38f9SIlya Biryukov }; 2479b3f38f9SIlya Biryukov } // namespace 2489b3f38f9SIlya Biryukov 2499b3f38f9SIlya Biryukov void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range, 2509b3f38f9SIlya Biryukov syntax::Tree *New) { 2519b3f38f9SIlya Biryukov Pending.foldChildren(Range, New); 2529b3f38f9SIlya Biryukov } 2539b3f38f9SIlya Biryukov 2549b3f38f9SIlya Biryukov void syntax::TreeBuilder::markChildToken(SourceLocation Loc, 2559b3f38f9SIlya Biryukov tok::TokenKind Kind, NodeRole Role) { 2569b3f38f9SIlya Biryukov if (Loc.isInvalid()) 2579b3f38f9SIlya Biryukov return; 2589b3f38f9SIlya Biryukov Pending.assignRole(*findToken(Loc), Role); 2599b3f38f9SIlya Biryukov } 2609b3f38f9SIlya Biryukov 2619b3f38f9SIlya Biryukov const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 2629b3f38f9SIlya Biryukov auto Tokens = Arena.tokenBuffer().expandedTokens(); 2639b3f38f9SIlya Biryukov auto &SM = Arena.sourceManager(); 2649b3f38f9SIlya Biryukov auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) { 2659b3f38f9SIlya Biryukov return SM.isBeforeInTranslationUnit(T.location(), L); 2669b3f38f9SIlya Biryukov }); 2679b3f38f9SIlya Biryukov assert(It != Tokens.end()); 2689b3f38f9SIlya Biryukov assert(It->location() == L); 2699b3f38f9SIlya Biryukov return &*It; 2709b3f38f9SIlya Biryukov } 2719b3f38f9SIlya Biryukov 2729b3f38f9SIlya Biryukov syntax::TranslationUnit * 2739b3f38f9SIlya Biryukov syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 2749b3f38f9SIlya Biryukov TreeBuilder Builder(A); 2759b3f38f9SIlya Biryukov BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 2769b3f38f9SIlya Biryukov return std::move(Builder).finalize(); 2779b3f38f9SIlya Biryukov } 278