19b3f38f9SIlya Biryukov //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 29b3f38f9SIlya Biryukov // 39b3f38f9SIlya Biryukov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49b3f38f9SIlya Biryukov // See https://llvm.org/LICENSE.txt for license information. 59b3f38f9SIlya Biryukov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69b3f38f9SIlya Biryukov // 79b3f38f9SIlya Biryukov //===----------------------------------------------------------------------===// 89b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/BuildTree.h" 9*e702bdb8SIlya Biryukov #include "clang/AST/Decl.h" 10*e702bdb8SIlya Biryukov #include "clang/AST/DeclBase.h" 119b3f38f9SIlya Biryukov #include "clang/AST/RecursiveASTVisitor.h" 129b3f38f9SIlya Biryukov #include "clang/AST/Stmt.h" 139b3f38f9SIlya Biryukov #include "clang/Basic/LLVM.h" 149b3f38f9SIlya Biryukov #include "clang/Basic/SourceLocation.h" 159b3f38f9SIlya Biryukov #include "clang/Basic/SourceManager.h" 169b3f38f9SIlya Biryukov #include "clang/Basic/TokenKinds.h" 179b3f38f9SIlya Biryukov #include "clang/Lex/Lexer.h" 189b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Nodes.h" 199b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tokens.h" 209b3f38f9SIlya Biryukov #include "clang/Tooling/Syntax/Tree.h" 219b3f38f9SIlya Biryukov #include "llvm/ADT/ArrayRef.h" 229b3f38f9SIlya Biryukov #include "llvm/ADT/STLExtras.h" 239b3f38f9SIlya Biryukov #include "llvm/ADT/SmallVector.h" 249b3f38f9SIlya Biryukov #include "llvm/Support/Allocator.h" 259b3f38f9SIlya Biryukov #include "llvm/Support/Casting.h" 2696065cf7SIlya Biryukov #include "llvm/Support/Compiler.h" 279b3f38f9SIlya Biryukov #include "llvm/Support/FormatVariadic.h" 289b3f38f9SIlya Biryukov #include "llvm/Support/raw_ostream.h" 299b3f38f9SIlya Biryukov #include <map> 309b3f38f9SIlya Biryukov 319b3f38f9SIlya Biryukov using namespace clang; 329b3f38f9SIlya Biryukov 3396065cf7SIlya Biryukov LLVM_ATTRIBUTE_UNUSED 3458fa50f4SIlya Biryukov static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } 3558fa50f4SIlya Biryukov 369b3f38f9SIlya Biryukov /// A helper class for constructing the syntax tree while traversing a clang 379b3f38f9SIlya Biryukov /// AST. 389b3f38f9SIlya Biryukov /// 399b3f38f9SIlya Biryukov /// At each point of the traversal we maintain a list of pending nodes. 409b3f38f9SIlya Biryukov /// Initially all tokens are added as pending nodes. When processing a clang AST 419b3f38f9SIlya Biryukov /// node, the clients need to: 429b3f38f9SIlya Biryukov /// - create a corresponding syntax node, 439b3f38f9SIlya Biryukov /// - assign roles to all pending child nodes with 'markChild' and 449b3f38f9SIlya Biryukov /// 'markChildToken', 459b3f38f9SIlya Biryukov /// - replace the child nodes with the new syntax node in the pending list 469b3f38f9SIlya Biryukov /// with 'foldNode'. 479b3f38f9SIlya Biryukov /// 489b3f38f9SIlya Biryukov /// Note that all children are expected to be processed when building a node. 499b3f38f9SIlya Biryukov /// 509b3f38f9SIlya Biryukov /// Call finalize() to finish building the tree and consume the root node. 519b3f38f9SIlya Biryukov class syntax::TreeBuilder { 529b3f38f9SIlya Biryukov public: 539b3f38f9SIlya Biryukov TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {} 549b3f38f9SIlya Biryukov 559b3f38f9SIlya Biryukov llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 569b3f38f9SIlya Biryukov 579b3f38f9SIlya Biryukov /// Populate children for \p New node, assuming it covers tokens from \p 589b3f38f9SIlya Biryukov /// Range. 599b3f38f9SIlya Biryukov void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); 609b3f38f9SIlya Biryukov 61*e702bdb8SIlya Biryukov /// Must be called with the range of each `DeclaratorDecl`. Ensures the 62*e702bdb8SIlya Biryukov /// corresponding declarator nodes are covered by `SimpleDeclaration`. 63*e702bdb8SIlya Biryukov void noticeDeclaratorRange(llvm::ArrayRef<syntax::Token> Range); 64*e702bdb8SIlya Biryukov 65*e702bdb8SIlya Biryukov /// Notifies that we should not consume trailing semicolon when computing 66*e702bdb8SIlya Biryukov /// token range of \p D. 67*e702bdb8SIlya Biryukov void noticeDeclaratorWithoutSemicolon(Decl *D); 68*e702bdb8SIlya Biryukov 6958fa50f4SIlya Biryukov /// Mark the \p Child node with a corresponding \p Role. All marked children 7058fa50f4SIlya Biryukov /// should be consumed by foldNode. 7158fa50f4SIlya Biryukov /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), 7258fa50f4SIlya Biryukov /// wraps expressions into expression statement. 7358fa50f4SIlya Biryukov void markStmtChild(Stmt *Child, NodeRole Role); 7458fa50f4SIlya Biryukov /// Should be called for expressions in non-statement position to avoid 7558fa50f4SIlya Biryukov /// wrapping into expression statement. 7658fa50f4SIlya Biryukov void markExprChild(Expr *Child, NodeRole Role); 7758fa50f4SIlya Biryukov 789b3f38f9SIlya Biryukov /// Set role for a token starting at \p Loc. 79def65bb4SIlya Biryukov void markChildToken(SourceLocation Loc, NodeRole R); 809b3f38f9SIlya Biryukov 819b3f38f9SIlya Biryukov /// Finish building the tree and consume the root node. 829b3f38f9SIlya Biryukov syntax::TranslationUnit *finalize() && { 839b3f38f9SIlya Biryukov auto Tokens = Arena.tokenBuffer().expandedTokens(); 84bfbf6b6cSIlya Biryukov assert(!Tokens.empty()); 85bfbf6b6cSIlya Biryukov assert(Tokens.back().kind() == tok::eof); 86bfbf6b6cSIlya Biryukov 879b3f38f9SIlya Biryukov // Build the root of the tree, consuming all the children. 88bfbf6b6cSIlya Biryukov Pending.foldChildren(Tokens.drop_back(), 899b3f38f9SIlya Biryukov new (Arena.allocator()) syntax::TranslationUnit); 909b3f38f9SIlya Biryukov 919b3f38f9SIlya Biryukov return cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 929b3f38f9SIlya Biryukov } 939b3f38f9SIlya Biryukov 949b3f38f9SIlya Biryukov /// getRange() finds the syntax tokens corresponding to the passed source 959b3f38f9SIlya Biryukov /// locations. 969b3f38f9SIlya Biryukov /// \p First is the start position of the first token and \p Last is the start 979b3f38f9SIlya Biryukov /// position of the last token. 989b3f38f9SIlya Biryukov llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 999b3f38f9SIlya Biryukov SourceLocation Last) const { 1009b3f38f9SIlya Biryukov assert(First.isValid()); 1019b3f38f9SIlya Biryukov assert(Last.isValid()); 1029b3f38f9SIlya Biryukov assert(First == Last || 1039b3f38f9SIlya Biryukov Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 1049b3f38f9SIlya Biryukov return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 1059b3f38f9SIlya Biryukov } 1069b3f38f9SIlya Biryukov llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { 107*e702bdb8SIlya Biryukov auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc()); 108*e702bdb8SIlya Biryukov if (llvm::isa<NamespaceDecl>(D)) 109*e702bdb8SIlya Biryukov return Tokens; 110*e702bdb8SIlya Biryukov if (DeclsWithoutSemicolons.count(D)) 111*e702bdb8SIlya Biryukov return Tokens; 112*e702bdb8SIlya Biryukov // FIXME: do not consume trailing semicolon on function definitions. 113*e702bdb8SIlya Biryukov // Most declarations own a semicolon in syntax trees, but not in clang AST. 114*e702bdb8SIlya Biryukov return withTrailingSemicolon(Tokens); 1159b3f38f9SIlya Biryukov } 11658fa50f4SIlya Biryukov llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 11758fa50f4SIlya Biryukov return getRange(E->getBeginLoc(), E->getEndLoc()); 11858fa50f4SIlya Biryukov } 11958fa50f4SIlya Biryukov /// Find the adjusted range for the statement, consuming the trailing 12058fa50f4SIlya Biryukov /// semicolon when needed. 12158fa50f4SIlya Biryukov llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 12258fa50f4SIlya Biryukov auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); 12358fa50f4SIlya Biryukov if (isa<CompoundStmt>(S)) 12458fa50f4SIlya Biryukov return Tokens; 12558fa50f4SIlya Biryukov 12658fa50f4SIlya Biryukov // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 12758fa50f4SIlya Biryukov // all statements that end with those. Consume this semicolon here. 128*e702bdb8SIlya Biryukov if (Tokens.back().kind() == tok::semi) 129*e702bdb8SIlya Biryukov return Tokens; 130*e702bdb8SIlya Biryukov return withTrailingSemicolon(Tokens); 131*e702bdb8SIlya Biryukov } 132*e702bdb8SIlya Biryukov 133*e702bdb8SIlya Biryukov private: 134*e702bdb8SIlya Biryukov llvm::ArrayRef<syntax::Token> 135*e702bdb8SIlya Biryukov withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const { 136*e702bdb8SIlya Biryukov assert(!Tokens.empty()); 137*e702bdb8SIlya Biryukov assert(Tokens.back().kind() != tok::eof); 138*e702bdb8SIlya Biryukov // (!) we never consume 'eof', so looking at the next token is ok. 13958fa50f4SIlya Biryukov if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 14058fa50f4SIlya Biryukov return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 14158fa50f4SIlya Biryukov return Tokens; 1429b3f38f9SIlya Biryukov } 1439b3f38f9SIlya Biryukov 1449b3f38f9SIlya Biryukov /// Finds a token starting at \p L. The token must exist. 1459b3f38f9SIlya Biryukov const syntax::Token *findToken(SourceLocation L) const; 1469b3f38f9SIlya Biryukov 1479b3f38f9SIlya Biryukov /// A collection of trees covering the input tokens. 1489b3f38f9SIlya Biryukov /// When created, each tree corresponds to a single token in the file. 1499b3f38f9SIlya Biryukov /// Clients call 'foldChildren' to attach one or more subtrees to a parent 1509b3f38f9SIlya Biryukov /// node and update the list of trees accordingly. 1519b3f38f9SIlya Biryukov /// 1529b3f38f9SIlya Biryukov /// Ensures that added nodes properly nest and cover the whole token stream. 1539b3f38f9SIlya Biryukov struct Forest { 1549b3f38f9SIlya Biryukov Forest(syntax::Arena &A) { 155bfbf6b6cSIlya Biryukov assert(!A.tokenBuffer().expandedTokens().empty()); 156bfbf6b6cSIlya Biryukov assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 1579b3f38f9SIlya Biryukov // Create all leaf nodes. 158bfbf6b6cSIlya Biryukov // Note that we do not have 'eof' in the tree. 159bfbf6b6cSIlya Biryukov for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) 1609b3f38f9SIlya Biryukov Trees.insert(Trees.end(), 1619b3f38f9SIlya Biryukov {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); 1629b3f38f9SIlya Biryukov } 1639b3f38f9SIlya Biryukov 164*e702bdb8SIlya Biryukov ~Forest() { assert(DelayedFolds.empty()); } 165*e702bdb8SIlya Biryukov 1669b3f38f9SIlya Biryukov void assignRole(llvm::ArrayRef<syntax::Token> Range, 1679b3f38f9SIlya Biryukov syntax::NodeRole Role) { 1689b3f38f9SIlya Biryukov assert(!Range.empty()); 1699b3f38f9SIlya Biryukov auto It = Trees.lower_bound(Range.begin()); 1709b3f38f9SIlya Biryukov assert(It != Trees.end() && "no node found"); 1719b3f38f9SIlya Biryukov assert(It->first == Range.begin() && "no child with the specified range"); 1729b3f38f9SIlya Biryukov assert((std::next(It) == Trees.end() || 1739b3f38f9SIlya Biryukov std::next(It)->first == Range.end()) && 1749b3f38f9SIlya Biryukov "no child with the specified range"); 1759b3f38f9SIlya Biryukov It->second.Role = Role; 1769b3f38f9SIlya Biryukov } 1779b3f38f9SIlya Biryukov 178*e702bdb8SIlya Biryukov /// Add \p Node to the forest and attach child nodes based on \p Tokens. 179*e702bdb8SIlya Biryukov void foldChildren(llvm::ArrayRef<syntax::Token> Tokens, 1809b3f38f9SIlya Biryukov syntax::Tree *Node) { 181*e702bdb8SIlya Biryukov // Execute delayed folds inside `Tokens`. 182*e702bdb8SIlya Biryukov auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin()); 183*e702bdb8SIlya Biryukov auto It = BeginExecuted; 184*e702bdb8SIlya Biryukov for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It) 185*e702bdb8SIlya Biryukov foldChildrenEager(llvm::makeArrayRef(It->first, It->second.End), 186*e702bdb8SIlya Biryukov It->second.Node); 187*e702bdb8SIlya Biryukov DelayedFolds.erase(BeginExecuted, It); 1889b3f38f9SIlya Biryukov 189*e702bdb8SIlya Biryukov // Attach children to `Node`. 190*e702bdb8SIlya Biryukov foldChildrenEager(Tokens, Node); 191*e702bdb8SIlya Biryukov } 1929b3f38f9SIlya Biryukov 193*e702bdb8SIlya Biryukov /// Schedule a call to `foldChildren` that will only be executed when 194*e702bdb8SIlya Biryukov /// containing node is folded. The range of delayed nodes can be extended by 195*e702bdb8SIlya Biryukov /// calling `extendDelayedFold`. Only one delayed node for each starting 196*e702bdb8SIlya Biryukov /// token is allowed. 197*e702bdb8SIlya Biryukov void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens, 198*e702bdb8SIlya Biryukov syntax::Tree *Node) { 199*e702bdb8SIlya Biryukov assert(!Tokens.empty()); 200*e702bdb8SIlya Biryukov bool Inserted = 201*e702bdb8SIlya Biryukov DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}}) 202*e702bdb8SIlya Biryukov .second; 203*e702bdb8SIlya Biryukov (void)Inserted; 204*e702bdb8SIlya Biryukov assert(Inserted && "Multiple delayed folds start at the same token"); 205*e702bdb8SIlya Biryukov } 2069b3f38f9SIlya Biryukov 207*e702bdb8SIlya Biryukov /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends 208*e702bdb8SIlya Biryukov /// its endpoint to `ExtendedRange.end()` and returns true. 209*e702bdb8SIlya Biryukov /// Otherwise, returns false. 210*e702bdb8SIlya Biryukov bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) { 211*e702bdb8SIlya Biryukov assert(!ExtendedRange.empty()); 212*e702bdb8SIlya Biryukov auto It = DelayedFolds.find(ExtendedRange.data()); 213*e702bdb8SIlya Biryukov if (It == DelayedFolds.end()) 214*e702bdb8SIlya Biryukov return false; 215*e702bdb8SIlya Biryukov assert(It->second.End <= ExtendedRange.end()); 216*e702bdb8SIlya Biryukov It->second.End = ExtendedRange.end(); 217*e702bdb8SIlya Biryukov return true; 2189b3f38f9SIlya Biryukov } 2199b3f38f9SIlya Biryukov 2209b3f38f9SIlya Biryukov // EXPECTS: all tokens were consumed and are owned by a single root node. 2219b3f38f9SIlya Biryukov syntax::Node *finalize() && { 2229b3f38f9SIlya Biryukov assert(Trees.size() == 1); 2239b3f38f9SIlya Biryukov auto *Root = Trees.begin()->second.Node; 2249b3f38f9SIlya Biryukov Trees = {}; 2259b3f38f9SIlya Biryukov return Root; 2269b3f38f9SIlya Biryukov } 2279b3f38f9SIlya Biryukov 2289b3f38f9SIlya Biryukov std::string str(const syntax::Arena &A) const { 2299b3f38f9SIlya Biryukov std::string R; 2309b3f38f9SIlya Biryukov for (auto It = Trees.begin(); It != Trees.end(); ++It) { 2319b3f38f9SIlya Biryukov unsigned CoveredTokens = 2329b3f38f9SIlya Biryukov It != Trees.end() 2339b3f38f9SIlya Biryukov ? (std::next(It)->first - It->first) 2349b3f38f9SIlya Biryukov : A.tokenBuffer().expandedTokens().end() - It->first; 2359b3f38f9SIlya Biryukov 2369b3f38f9SIlya Biryukov R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n", 2379b3f38f9SIlya Biryukov It->second.Node->kind(), 2389b3f38f9SIlya Biryukov It->first->text(A.sourceManager()), CoveredTokens); 2399b3f38f9SIlya Biryukov R += It->second.Node->dump(A); 2409b3f38f9SIlya Biryukov } 2419b3f38f9SIlya Biryukov return R; 2429b3f38f9SIlya Biryukov } 2439b3f38f9SIlya Biryukov 2449b3f38f9SIlya Biryukov private: 245*e702bdb8SIlya Biryukov /// Implementation detail of `foldChildren`, does acutal folding ignoring 246*e702bdb8SIlya Biryukov /// delayed folds. 247*e702bdb8SIlya Biryukov void foldChildrenEager(llvm::ArrayRef<syntax::Token> Tokens, 248*e702bdb8SIlya Biryukov syntax::Tree *Node) { 249*e702bdb8SIlya Biryukov assert(Node->firstChild() == nullptr && "node already has children"); 250*e702bdb8SIlya Biryukov 251*e702bdb8SIlya Biryukov auto *FirstToken = Tokens.begin(); 252*e702bdb8SIlya Biryukov auto BeginChildren = Trees.lower_bound(FirstToken); 253*e702bdb8SIlya Biryukov assert((BeginChildren == Trees.end() || 254*e702bdb8SIlya Biryukov BeginChildren->first == FirstToken) && 255*e702bdb8SIlya Biryukov "fold crosses boundaries of existing subtrees"); 256*e702bdb8SIlya Biryukov auto EndChildren = Trees.lower_bound(Tokens.end()); 257*e702bdb8SIlya Biryukov assert( 258*e702bdb8SIlya Biryukov (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 259*e702bdb8SIlya Biryukov "fold crosses boundaries of existing subtrees"); 260*e702bdb8SIlya Biryukov 261*e702bdb8SIlya Biryukov // (!) we need to go in reverse order, because we can only prepend. 262*e702bdb8SIlya Biryukov for (auto It = EndChildren; It != BeginChildren; --It) 263*e702bdb8SIlya Biryukov Node->prependChildLowLevel(std::prev(It)->second.Node, 264*e702bdb8SIlya Biryukov std::prev(It)->second.Role); 265*e702bdb8SIlya Biryukov 266*e702bdb8SIlya Biryukov Trees.erase(BeginChildren, EndChildren); 267*e702bdb8SIlya Biryukov Trees.insert({FirstToken, NodeAndRole(Node)}); 268*e702bdb8SIlya Biryukov } 2699b3f38f9SIlya Biryukov /// A with a role that should be assigned to it when adding to a parent. 2709b3f38f9SIlya Biryukov struct NodeAndRole { 2719b3f38f9SIlya Biryukov explicit NodeAndRole(syntax::Node *Node) 27251dad419SIlya Biryukov : Node(Node), Role(NodeRole::Unknown) {} 2739b3f38f9SIlya Biryukov 2749b3f38f9SIlya Biryukov syntax::Node *Node; 2759b3f38f9SIlya Biryukov NodeRole Role; 2769b3f38f9SIlya Biryukov }; 2779b3f38f9SIlya Biryukov 2789b3f38f9SIlya Biryukov /// Maps from the start token to a subtree starting at that token. 279302cb3bcSIlya Biryukov /// Keys in the map are pointers into the array of expanded tokens, so 280302cb3bcSIlya Biryukov /// pointer order corresponds to the order of preprocessor tokens. 2819b3f38f9SIlya Biryukov /// FIXME: storing the end tokens is redundant. 2829b3f38f9SIlya Biryukov /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. 2839b3f38f9SIlya Biryukov std::map<const syntax::Token *, NodeAndRole> Trees; 284*e702bdb8SIlya Biryukov 285*e702bdb8SIlya Biryukov /// See documentation of `foldChildrenDelayed` for details. 286*e702bdb8SIlya Biryukov struct DelayedFold { 287*e702bdb8SIlya Biryukov const syntax::Token *End = nullptr; 288*e702bdb8SIlya Biryukov syntax::Tree *Node = nullptr; 289*e702bdb8SIlya Biryukov }; 290*e702bdb8SIlya Biryukov std::map<const syntax::Token *, DelayedFold> DelayedFolds; 2919b3f38f9SIlya Biryukov }; 2929b3f38f9SIlya Biryukov 2939b3f38f9SIlya Biryukov /// For debugging purposes. 2949b3f38f9SIlya Biryukov std::string str() { return Pending.str(Arena); } 2959b3f38f9SIlya Biryukov 2969b3f38f9SIlya Biryukov syntax::Arena &Arena; 2979b3f38f9SIlya Biryukov Forest Pending; 298*e702bdb8SIlya Biryukov llvm::DenseSet<Decl*> DeclsWithoutSemicolons; 2999b3f38f9SIlya Biryukov }; 3009b3f38f9SIlya Biryukov 3019b3f38f9SIlya Biryukov namespace { 3029b3f38f9SIlya Biryukov class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 3039b3f38f9SIlya Biryukov public: 3049b3f38f9SIlya Biryukov explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) 3059b3f38f9SIlya Biryukov : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} 3069b3f38f9SIlya Biryukov 3079b3f38f9SIlya Biryukov bool shouldTraversePostOrder() const { return true; } 3089b3f38f9SIlya Biryukov 309*e702bdb8SIlya Biryukov bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) { 310*e702bdb8SIlya Biryukov // Ensure declarators are covered by SimpleDeclaration. 311*e702bdb8SIlya Biryukov Builder.noticeDeclaratorRange(Builder.getRange(D)); 312*e702bdb8SIlya Biryukov // FIXME: build nodes for the declarator too. 313*e702bdb8SIlya Biryukov return true; 314*e702bdb8SIlya Biryukov } 315*e702bdb8SIlya Biryukov bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) { 316*e702bdb8SIlya Biryukov // Also a declarator. 317*e702bdb8SIlya Biryukov Builder.noticeDeclaratorRange(Builder.getRange(D)); 318*e702bdb8SIlya Biryukov // FIXME: build nodes for the declarator too. 319*e702bdb8SIlya Biryukov return true; 3209b3f38f9SIlya Biryukov } 3219b3f38f9SIlya Biryukov 3229b3f38f9SIlya Biryukov bool VisitDecl(Decl *D) { 3239b3f38f9SIlya Biryukov assert(!D->isImplicit()); 3249b3f38f9SIlya Biryukov Builder.foldNode(Builder.getRange(D), 325*e702bdb8SIlya Biryukov new (allocator()) syntax::UnknownDeclaration()); 326*e702bdb8SIlya Biryukov return true; 327*e702bdb8SIlya Biryukov } 328*e702bdb8SIlya Biryukov 329*e702bdb8SIlya Biryukov bool WalkUpFromTagDecl(TagDecl *C) { 330*e702bdb8SIlya Biryukov // Avoid building UnknownDeclaration here, syntatically 'struct X {}' and 331*e702bdb8SIlya Biryukov // similar are part of declaration specifiers and do not introduce a new 332*e702bdb8SIlya Biryukov // top-level declaration. 3339b3f38f9SIlya Biryukov return true; 3349b3f38f9SIlya Biryukov } 3359b3f38f9SIlya Biryukov 3369b3f38f9SIlya Biryukov bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 3379b3f38f9SIlya Biryukov // (!) we do not want to call VisitDecl(), the declaration for translation 3389b3f38f9SIlya Biryukov // unit is built by finalize(). 3399b3f38f9SIlya Biryukov return true; 3409b3f38f9SIlya Biryukov } 3419b3f38f9SIlya Biryukov 3429b3f38f9SIlya Biryukov bool WalkUpFromCompoundStmt(CompoundStmt *S) { 34351dad419SIlya Biryukov using NodeRole = syntax::NodeRole; 3449b3f38f9SIlya Biryukov 345def65bb4SIlya Biryukov Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 34658fa50f4SIlya Biryukov for (auto *Child : S->body()) 34758fa50f4SIlya Biryukov Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 348def65bb4SIlya Biryukov Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 3499b3f38f9SIlya Biryukov 35058fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 3519b3f38f9SIlya Biryukov new (allocator()) syntax::CompoundStatement); 3529b3f38f9SIlya Biryukov return true; 3539b3f38f9SIlya Biryukov } 3549b3f38f9SIlya Biryukov 35558fa50f4SIlya Biryukov // Some statements are not yet handled by syntax trees. 35658fa50f4SIlya Biryukov bool WalkUpFromStmt(Stmt *S) { 35758fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 35858fa50f4SIlya Biryukov new (allocator()) syntax::UnknownStatement); 35958fa50f4SIlya Biryukov return true; 36058fa50f4SIlya Biryukov } 36158fa50f4SIlya Biryukov 36258fa50f4SIlya Biryukov bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 36358fa50f4SIlya Biryukov // We override to traverse range initializer as VarDecl. 36458fa50f4SIlya Biryukov // RAV traverses it as a statement, we produce invalid node kinds in that 36558fa50f4SIlya Biryukov // case. 36658fa50f4SIlya Biryukov // FIXME: should do this in RAV instead? 36758fa50f4SIlya Biryukov if (S->getInit() && !TraverseStmt(S->getInit())) 36858fa50f4SIlya Biryukov return false; 36958fa50f4SIlya Biryukov if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 37058fa50f4SIlya Biryukov return false; 37158fa50f4SIlya Biryukov if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 37258fa50f4SIlya Biryukov return false; 37358fa50f4SIlya Biryukov if (S->getBody() && !TraverseStmt(S->getBody())) 37458fa50f4SIlya Biryukov return false; 37558fa50f4SIlya Biryukov return true; 37658fa50f4SIlya Biryukov } 37758fa50f4SIlya Biryukov 37858fa50f4SIlya Biryukov bool TraverseStmt(Stmt *S) { 379*e702bdb8SIlya Biryukov if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) { 380*e702bdb8SIlya Biryukov // We want to consume the semicolon, make sure SimpleDeclaration does not. 381*e702bdb8SIlya Biryukov for (auto *D : DS->decls()) 382*e702bdb8SIlya Biryukov Builder.noticeDeclaratorWithoutSemicolon(D); 383*e702bdb8SIlya Biryukov } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { 38458fa50f4SIlya Biryukov // (!) do not recurse into subexpressions. 38558fa50f4SIlya Biryukov // we do not have syntax trees for expressions yet, so we only want to see 38658fa50f4SIlya Biryukov // the first top-level expression. 38758fa50f4SIlya Biryukov return WalkUpFromExpr(E->IgnoreImplicit()); 38858fa50f4SIlya Biryukov } 38958fa50f4SIlya Biryukov return RecursiveASTVisitor::TraverseStmt(S); 39058fa50f4SIlya Biryukov } 39158fa50f4SIlya Biryukov 39258fa50f4SIlya Biryukov // Some expressions are not yet handled by syntax trees. 39358fa50f4SIlya Biryukov bool WalkUpFromExpr(Expr *E) { 39458fa50f4SIlya Biryukov assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 39558fa50f4SIlya Biryukov Builder.foldNode(Builder.getExprRange(E), 39658fa50f4SIlya Biryukov new (allocator()) syntax::UnknownExpression); 39758fa50f4SIlya Biryukov return true; 39858fa50f4SIlya Biryukov } 39958fa50f4SIlya Biryukov 40058fa50f4SIlya Biryukov // The code below is very regular, it could even be generated with some 40158fa50f4SIlya Biryukov // preprocessor magic. We merely assign roles to the corresponding children 40258fa50f4SIlya Biryukov // and fold resulting nodes. 40358fa50f4SIlya Biryukov bool WalkUpFromDeclStmt(DeclStmt *S) { 40458fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 40558fa50f4SIlya Biryukov new (allocator()) syntax::DeclarationStatement); 40658fa50f4SIlya Biryukov return true; 40758fa50f4SIlya Biryukov } 40858fa50f4SIlya Biryukov 40958fa50f4SIlya Biryukov bool WalkUpFromNullStmt(NullStmt *S) { 41058fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 41158fa50f4SIlya Biryukov new (allocator()) syntax::EmptyStatement); 41258fa50f4SIlya Biryukov return true; 41358fa50f4SIlya Biryukov } 41458fa50f4SIlya Biryukov 41558fa50f4SIlya Biryukov bool WalkUpFromSwitchStmt(SwitchStmt *S) { 416def65bb4SIlya Biryukov Builder.markChildToken(S->getSwitchLoc(), 41758fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 41858fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 41958fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 42058fa50f4SIlya Biryukov new (allocator()) syntax::SwitchStatement); 42158fa50f4SIlya Biryukov return true; 42258fa50f4SIlya Biryukov } 42358fa50f4SIlya Biryukov 42458fa50f4SIlya Biryukov bool WalkUpFromCaseStmt(CaseStmt *S) { 425def65bb4SIlya Biryukov Builder.markChildToken(S->getKeywordLoc(), 42658fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 42758fa50f4SIlya Biryukov Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 42858fa50f4SIlya Biryukov Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 42958fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 43058fa50f4SIlya Biryukov new (allocator()) syntax::CaseStatement); 43158fa50f4SIlya Biryukov return true; 43258fa50f4SIlya Biryukov } 43358fa50f4SIlya Biryukov 43458fa50f4SIlya Biryukov bool WalkUpFromDefaultStmt(DefaultStmt *S) { 435def65bb4SIlya Biryukov Builder.markChildToken(S->getKeywordLoc(), 43658fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 43758fa50f4SIlya Biryukov Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 43858fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 43958fa50f4SIlya Biryukov new (allocator()) syntax::DefaultStatement); 44058fa50f4SIlya Biryukov return true; 44158fa50f4SIlya Biryukov } 44258fa50f4SIlya Biryukov 44358fa50f4SIlya Biryukov bool WalkUpFromIfStmt(IfStmt *S) { 444def65bb4SIlya Biryukov Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 44558fa50f4SIlya Biryukov Builder.markStmtChild(S->getThen(), 44658fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_thenStatement); 447def65bb4SIlya Biryukov Builder.markChildToken(S->getElseLoc(), 44858fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_elseKeyword); 44958fa50f4SIlya Biryukov Builder.markStmtChild(S->getElse(), 45058fa50f4SIlya Biryukov syntax::NodeRole::IfStatement_elseStatement); 45158fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 45258fa50f4SIlya Biryukov new (allocator()) syntax::IfStatement); 45358fa50f4SIlya Biryukov return true; 45458fa50f4SIlya Biryukov } 45558fa50f4SIlya Biryukov 45658fa50f4SIlya Biryukov bool WalkUpFromForStmt(ForStmt *S) { 457def65bb4SIlya Biryukov Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 45858fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 45958fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 46058fa50f4SIlya Biryukov new (allocator()) syntax::ForStatement); 46158fa50f4SIlya Biryukov return true; 46258fa50f4SIlya Biryukov } 46358fa50f4SIlya Biryukov 46458fa50f4SIlya Biryukov bool WalkUpFromWhileStmt(WhileStmt *S) { 465def65bb4SIlya Biryukov Builder.markChildToken(S->getWhileLoc(), 46658fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 46758fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 46858fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 46958fa50f4SIlya Biryukov new (allocator()) syntax::WhileStatement); 47058fa50f4SIlya Biryukov return true; 47158fa50f4SIlya Biryukov } 47258fa50f4SIlya Biryukov 47358fa50f4SIlya Biryukov bool WalkUpFromContinueStmt(ContinueStmt *S) { 474def65bb4SIlya Biryukov Builder.markChildToken(S->getContinueLoc(), 47558fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 47658fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 47758fa50f4SIlya Biryukov new (allocator()) syntax::ContinueStatement); 47858fa50f4SIlya Biryukov return true; 47958fa50f4SIlya Biryukov } 48058fa50f4SIlya Biryukov 48158fa50f4SIlya Biryukov bool WalkUpFromBreakStmt(BreakStmt *S) { 482def65bb4SIlya Biryukov Builder.markChildToken(S->getBreakLoc(), 48358fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 48458fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 48558fa50f4SIlya Biryukov new (allocator()) syntax::BreakStatement); 48658fa50f4SIlya Biryukov return true; 48758fa50f4SIlya Biryukov } 48858fa50f4SIlya Biryukov 48958fa50f4SIlya Biryukov bool WalkUpFromReturnStmt(ReturnStmt *S) { 490def65bb4SIlya Biryukov Builder.markChildToken(S->getReturnLoc(), 49158fa50f4SIlya Biryukov syntax::NodeRole::IntroducerKeyword); 49258fa50f4SIlya Biryukov Builder.markExprChild(S->getRetValue(), 49358fa50f4SIlya Biryukov syntax::NodeRole::ReturnStatement_value); 49458fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 49558fa50f4SIlya Biryukov new (allocator()) syntax::ReturnStatement); 49658fa50f4SIlya Biryukov return true; 49758fa50f4SIlya Biryukov } 49858fa50f4SIlya Biryukov 49958fa50f4SIlya Biryukov bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 500def65bb4SIlya Biryukov Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 50158fa50f4SIlya Biryukov Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 50258fa50f4SIlya Biryukov Builder.foldNode(Builder.getStmtRange(S), 50358fa50f4SIlya Biryukov new (allocator()) syntax::RangeBasedForStatement); 50458fa50f4SIlya Biryukov return true; 50558fa50f4SIlya Biryukov } 50658fa50f4SIlya Biryukov 5079b3f38f9SIlya Biryukov private: 5089b3f38f9SIlya Biryukov /// A small helper to save some typing. 5099b3f38f9SIlya Biryukov llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 5109b3f38f9SIlya Biryukov 5119b3f38f9SIlya Biryukov syntax::TreeBuilder &Builder; 5129b3f38f9SIlya Biryukov const LangOptions &LangOpts; 5139b3f38f9SIlya Biryukov }; 5149b3f38f9SIlya Biryukov } // namespace 5159b3f38f9SIlya Biryukov 5169b3f38f9SIlya Biryukov void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range, 5179b3f38f9SIlya Biryukov syntax::Tree *New) { 5189b3f38f9SIlya Biryukov Pending.foldChildren(Range, New); 5199b3f38f9SIlya Biryukov } 5209b3f38f9SIlya Biryukov 521*e702bdb8SIlya Biryukov void syntax::TreeBuilder::noticeDeclaratorRange( 522*e702bdb8SIlya Biryukov llvm::ArrayRef<syntax::Token> Range) { 523*e702bdb8SIlya Biryukov if (Pending.extendDelayedFold(Range)) 524*e702bdb8SIlya Biryukov return; 525*e702bdb8SIlya Biryukov Pending.foldChildrenDelayed(Range, 526*e702bdb8SIlya Biryukov new (allocator()) syntax::SimpleDeclaration); 527*e702bdb8SIlya Biryukov } 528*e702bdb8SIlya Biryukov 529*e702bdb8SIlya Biryukov void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) { 530*e702bdb8SIlya Biryukov DeclsWithoutSemicolons.insert(D); 531*e702bdb8SIlya Biryukov } 532*e702bdb8SIlya Biryukov 533def65bb4SIlya Biryukov void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 5349b3f38f9SIlya Biryukov if (Loc.isInvalid()) 5359b3f38f9SIlya Biryukov return; 5369b3f38f9SIlya Biryukov Pending.assignRole(*findToken(Loc), Role); 5379b3f38f9SIlya Biryukov } 5389b3f38f9SIlya Biryukov 53958fa50f4SIlya Biryukov void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 54058fa50f4SIlya Biryukov if (!Child) 54158fa50f4SIlya Biryukov return; 54258fa50f4SIlya Biryukov 54358fa50f4SIlya Biryukov auto Range = getStmtRange(Child); 54458fa50f4SIlya Biryukov // This is an expression in a statement position, consume the trailing 54558fa50f4SIlya Biryukov // semicolon and form an 'ExpressionStatement' node. 54658fa50f4SIlya Biryukov if (auto *E = dyn_cast<Expr>(Child)) { 54758fa50f4SIlya Biryukov Pending.assignRole(getExprRange(E), 54858fa50f4SIlya Biryukov NodeRole::ExpressionStatement_expression); 54958fa50f4SIlya Biryukov // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. 55058fa50f4SIlya Biryukov Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement); 55158fa50f4SIlya Biryukov } 55258fa50f4SIlya Biryukov Pending.assignRole(Range, Role); 55358fa50f4SIlya Biryukov } 55458fa50f4SIlya Biryukov 55558fa50f4SIlya Biryukov void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 55658fa50f4SIlya Biryukov Pending.assignRole(getExprRange(Child), Role); 55758fa50f4SIlya Biryukov } 55858fa50f4SIlya Biryukov 5599b3f38f9SIlya Biryukov const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 5609b3f38f9SIlya Biryukov auto Tokens = Arena.tokenBuffer().expandedTokens(); 5619b3f38f9SIlya Biryukov auto &SM = Arena.sourceManager(); 5629b3f38f9SIlya Biryukov auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) { 5639b3f38f9SIlya Biryukov return SM.isBeforeInTranslationUnit(T.location(), L); 5649b3f38f9SIlya Biryukov }); 5659b3f38f9SIlya Biryukov assert(It != Tokens.end()); 5669b3f38f9SIlya Biryukov assert(It->location() == L); 5679b3f38f9SIlya Biryukov return &*It; 5689b3f38f9SIlya Biryukov } 5699b3f38f9SIlya Biryukov 5709b3f38f9SIlya Biryukov syntax::TranslationUnit * 5719b3f38f9SIlya Biryukov syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 5729b3f38f9SIlya Biryukov TreeBuilder Builder(A); 5739b3f38f9SIlya Biryukov BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 5749b3f38f9SIlya Biryukov return std::move(Builder).finalize(); 5759b3f38f9SIlya Biryukov } 576