1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/Decl.h" 10 #include "clang/AST/DeclBase.h" 11 #include "clang/AST/RecursiveASTVisitor.h" 12 #include "clang/AST/Stmt.h" 13 #include "clang/Basic/LLVM.h" 14 #include "clang/Basic/SourceLocation.h" 15 #include "clang/Basic/SourceManager.h" 16 #include "clang/Basic/TokenKinds.h" 17 #include "clang/Lex/Lexer.h" 18 #include "clang/Tooling/Syntax/Nodes.h" 19 #include "clang/Tooling/Syntax/Tokens.h" 20 #include "clang/Tooling/Syntax/Tree.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/Support/Allocator.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/Compiler.h" 27 #include "llvm/Support/FormatVariadic.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <map> 30 31 using namespace clang; 32 33 LLVM_ATTRIBUTE_UNUSED 34 static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } 35 36 /// A helper class for constructing the syntax tree while traversing a clang 37 /// AST. 38 /// 39 /// At each point of the traversal we maintain a list of pending nodes. 40 /// Initially all tokens are added as pending nodes. When processing a clang AST 41 /// node, the clients need to: 42 /// - create a corresponding syntax node, 43 /// - assign roles to all pending child nodes with 'markChild' and 44 /// 'markChildToken', 45 /// - replace the child nodes with the new syntax node in the pending list 46 /// with 'foldNode'. 47 /// 48 /// Note that all children are expected to be processed when building a node. 49 /// 50 /// Call finalize() to finish building the tree and consume the root node. 51 class syntax::TreeBuilder { 52 public: 53 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {} 54 55 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 56 57 /// Populate children for \p New node, assuming it covers tokens from \p 58 /// Range. 59 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); 60 61 /// Must be called with the range of each `DeclaratorDecl`. Ensures the 62 /// corresponding declarator nodes are covered by `SimpleDeclaration`. 63 void noticeDeclaratorRange(llvm::ArrayRef<syntax::Token> Range); 64 65 /// Notifies that we should not consume trailing semicolon when computing 66 /// token range of \p D. 67 void noticeDeclaratorWithoutSemicolon(Decl *D); 68 69 /// Mark the \p Child node with a corresponding \p Role. All marked children 70 /// should be consumed by foldNode. 71 /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), 72 /// wraps expressions into expression statement. 73 void markStmtChild(Stmt *Child, NodeRole Role); 74 /// Should be called for expressions in non-statement position to avoid 75 /// wrapping into expression statement. 76 void markExprChild(Expr *Child, NodeRole Role); 77 78 /// Set role for a token starting at \p Loc. 79 void markChildToken(SourceLocation Loc, NodeRole R); 80 81 /// Finish building the tree and consume the root node. 82 syntax::TranslationUnit *finalize() && { 83 auto Tokens = Arena.tokenBuffer().expandedTokens(); 84 assert(!Tokens.empty()); 85 assert(Tokens.back().kind() == tok::eof); 86 87 // Build the root of the tree, consuming all the children. 88 Pending.foldChildren(Tokens.drop_back(), 89 new (Arena.allocator()) syntax::TranslationUnit); 90 91 return cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 92 } 93 94 /// getRange() finds the syntax tokens corresponding to the passed source 95 /// locations. 96 /// \p First is the start position of the first token and \p Last is the start 97 /// position of the last token. 98 llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 99 SourceLocation Last) const { 100 assert(First.isValid()); 101 assert(Last.isValid()); 102 assert(First == Last || 103 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 104 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 105 } 106 llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { 107 auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc()); 108 if (llvm::isa<NamespaceDecl>(D)) 109 return Tokens; 110 if (DeclsWithoutSemicolons.count(D)) 111 return Tokens; 112 // FIXME: do not consume trailing semicolon on function definitions. 113 // Most declarations own a semicolon in syntax trees, but not in clang AST. 114 return withTrailingSemicolon(Tokens); 115 } 116 llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 117 return getRange(E->getBeginLoc(), E->getEndLoc()); 118 } 119 /// Find the adjusted range for the statement, consuming the trailing 120 /// semicolon when needed. 121 llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 122 auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); 123 if (isa<CompoundStmt>(S)) 124 return Tokens; 125 126 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 127 // all statements that end with those. Consume this semicolon here. 128 if (Tokens.back().kind() == tok::semi) 129 return Tokens; 130 return withTrailingSemicolon(Tokens); 131 } 132 133 private: 134 llvm::ArrayRef<syntax::Token> 135 withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const { 136 assert(!Tokens.empty()); 137 assert(Tokens.back().kind() != tok::eof); 138 // (!) we never consume 'eof', so looking at the next token is ok. 139 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 140 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 141 return Tokens; 142 } 143 144 /// Finds a token starting at \p L. The token must exist. 145 const syntax::Token *findToken(SourceLocation L) const; 146 147 /// A collection of trees covering the input tokens. 148 /// When created, each tree corresponds to a single token in the file. 149 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 150 /// node and update the list of trees accordingly. 151 /// 152 /// Ensures that added nodes properly nest and cover the whole token stream. 153 struct Forest { 154 Forest(syntax::Arena &A) { 155 assert(!A.tokenBuffer().expandedTokens().empty()); 156 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 157 // Create all leaf nodes. 158 // Note that we do not have 'eof' in the tree. 159 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) 160 Trees.insert(Trees.end(), 161 {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); 162 } 163 164 ~Forest() { assert(DelayedFolds.empty()); } 165 166 void assignRole(llvm::ArrayRef<syntax::Token> Range, 167 syntax::NodeRole Role) { 168 assert(!Range.empty()); 169 auto It = Trees.lower_bound(Range.begin()); 170 assert(It != Trees.end() && "no node found"); 171 assert(It->first == Range.begin() && "no child with the specified range"); 172 assert((std::next(It) == Trees.end() || 173 std::next(It)->first == Range.end()) && 174 "no child with the specified range"); 175 It->second.Role = Role; 176 } 177 178 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 179 void foldChildren(llvm::ArrayRef<syntax::Token> Tokens, 180 syntax::Tree *Node) { 181 // Execute delayed folds inside `Tokens`. 182 auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin()); 183 auto It = BeginExecuted; 184 for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It) 185 foldChildrenEager(llvm::makeArrayRef(It->first, It->second.End), 186 It->second.Node); 187 DelayedFolds.erase(BeginExecuted, It); 188 189 // Attach children to `Node`. 190 foldChildrenEager(Tokens, Node); 191 } 192 193 /// Schedule a call to `foldChildren` that will only be executed when 194 /// containing node is folded. The range of delayed nodes can be extended by 195 /// calling `extendDelayedFold`. Only one delayed node for each starting 196 /// token is allowed. 197 void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens, 198 syntax::Tree *Node) { 199 assert(!Tokens.empty()); 200 bool Inserted = 201 DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}}) 202 .second; 203 (void)Inserted; 204 assert(Inserted && "Multiple delayed folds start at the same token"); 205 } 206 207 /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends 208 /// its endpoint to `ExtendedRange.end()` and returns true. 209 /// Otherwise, returns false. 210 bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) { 211 assert(!ExtendedRange.empty()); 212 auto It = DelayedFolds.find(ExtendedRange.data()); 213 if (It == DelayedFolds.end()) 214 return false; 215 assert(It->second.End <= ExtendedRange.end()); 216 It->second.End = ExtendedRange.end(); 217 return true; 218 } 219 220 // EXPECTS: all tokens were consumed and are owned by a single root node. 221 syntax::Node *finalize() && { 222 assert(Trees.size() == 1); 223 auto *Root = Trees.begin()->second.Node; 224 Trees = {}; 225 return Root; 226 } 227 228 std::string str(const syntax::Arena &A) const { 229 std::string R; 230 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 231 unsigned CoveredTokens = 232 It != Trees.end() 233 ? (std::next(It)->first - It->first) 234 : A.tokenBuffer().expandedTokens().end() - It->first; 235 236 R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n", 237 It->second.Node->kind(), 238 It->first->text(A.sourceManager()), CoveredTokens); 239 R += It->second.Node->dump(A); 240 } 241 return R; 242 } 243 244 private: 245 /// Implementation detail of `foldChildren`, does acutal folding ignoring 246 /// delayed folds. 247 void foldChildrenEager(llvm::ArrayRef<syntax::Token> Tokens, 248 syntax::Tree *Node) { 249 assert(Node->firstChild() == nullptr && "node already has children"); 250 251 auto *FirstToken = Tokens.begin(); 252 auto BeginChildren = Trees.lower_bound(FirstToken); 253 assert((BeginChildren == Trees.end() || 254 BeginChildren->first == FirstToken) && 255 "fold crosses boundaries of existing subtrees"); 256 auto EndChildren = Trees.lower_bound(Tokens.end()); 257 assert( 258 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 259 "fold crosses boundaries of existing subtrees"); 260 261 // (!) we need to go in reverse order, because we can only prepend. 262 for (auto It = EndChildren; It != BeginChildren; --It) 263 Node->prependChildLowLevel(std::prev(It)->second.Node, 264 std::prev(It)->second.Role); 265 266 Trees.erase(BeginChildren, EndChildren); 267 Trees.insert({FirstToken, NodeAndRole(Node)}); 268 } 269 /// A with a role that should be assigned to it when adding to a parent. 270 struct NodeAndRole { 271 explicit NodeAndRole(syntax::Node *Node) 272 : Node(Node), Role(NodeRole::Unknown) {} 273 274 syntax::Node *Node; 275 NodeRole Role; 276 }; 277 278 /// Maps from the start token to a subtree starting at that token. 279 /// Keys in the map are pointers into the array of expanded tokens, so 280 /// pointer order corresponds to the order of preprocessor tokens. 281 /// FIXME: storing the end tokens is redundant. 282 /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. 283 std::map<const syntax::Token *, NodeAndRole> Trees; 284 285 /// See documentation of `foldChildrenDelayed` for details. 286 struct DelayedFold { 287 const syntax::Token *End = nullptr; 288 syntax::Tree *Node = nullptr; 289 }; 290 std::map<const syntax::Token *, DelayedFold> DelayedFolds; 291 }; 292 293 /// For debugging purposes. 294 std::string str() { return Pending.str(Arena); } 295 296 syntax::Arena &Arena; 297 Forest Pending; 298 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 299 }; 300 301 namespace { 302 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 303 public: 304 explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) 305 : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} 306 307 bool shouldTraversePostOrder() const { return true; } 308 309 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) { 310 // Ensure declarators are covered by SimpleDeclaration. 311 Builder.noticeDeclaratorRange(Builder.getRange(D)); 312 // FIXME: build nodes for the declarator too. 313 return true; 314 } 315 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) { 316 // Also a declarator. 317 Builder.noticeDeclaratorRange(Builder.getRange(D)); 318 // FIXME: build nodes for the declarator too. 319 return true; 320 } 321 322 bool VisitDecl(Decl *D) { 323 assert(!D->isImplicit()); 324 Builder.foldNode(Builder.getRange(D), 325 new (allocator()) syntax::UnknownDeclaration()); 326 return true; 327 } 328 329 bool WalkUpFromTagDecl(TagDecl *C) { 330 // Avoid building UnknownDeclaration here, syntatically 'struct X {}' and 331 // similar are part of declaration specifiers and do not introduce a new 332 // top-level declaration. 333 return true; 334 } 335 336 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 337 // (!) we do not want to call VisitDecl(), the declaration for translation 338 // unit is built by finalize(). 339 return true; 340 } 341 342 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 343 using NodeRole = syntax::NodeRole; 344 345 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 346 for (auto *Child : S->body()) 347 Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 348 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 349 350 Builder.foldNode(Builder.getStmtRange(S), 351 new (allocator()) syntax::CompoundStatement); 352 return true; 353 } 354 355 // Some statements are not yet handled by syntax trees. 356 bool WalkUpFromStmt(Stmt *S) { 357 Builder.foldNode(Builder.getStmtRange(S), 358 new (allocator()) syntax::UnknownStatement); 359 return true; 360 } 361 362 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 363 // We override to traverse range initializer as VarDecl. 364 // RAV traverses it as a statement, we produce invalid node kinds in that 365 // case. 366 // FIXME: should do this in RAV instead? 367 if (S->getInit() && !TraverseStmt(S->getInit())) 368 return false; 369 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 370 return false; 371 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 372 return false; 373 if (S->getBody() && !TraverseStmt(S->getBody())) 374 return false; 375 return true; 376 } 377 378 bool TraverseStmt(Stmt *S) { 379 if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) { 380 // We want to consume the semicolon, make sure SimpleDeclaration does not. 381 for (auto *D : DS->decls()) 382 Builder.noticeDeclaratorWithoutSemicolon(D); 383 } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { 384 // (!) do not recurse into subexpressions. 385 // we do not have syntax trees for expressions yet, so we only want to see 386 // the first top-level expression. 387 return WalkUpFromExpr(E->IgnoreImplicit()); 388 } 389 return RecursiveASTVisitor::TraverseStmt(S); 390 } 391 392 // Some expressions are not yet handled by syntax trees. 393 bool WalkUpFromExpr(Expr *E) { 394 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 395 Builder.foldNode(Builder.getExprRange(E), 396 new (allocator()) syntax::UnknownExpression); 397 return true; 398 } 399 400 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 401 auto Tokens = Builder.getRange(S); 402 if (Tokens.front().kind() == tok::coloncolon) { 403 // Handle nested namespace definitions. Those start at '::' token, e.g. 404 // namespace a^::b {} 405 // FIXME: build corresponding nodes for the name of this namespace. 406 return true; 407 } 408 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition); 409 return true; 410 } 411 412 // The code below is very regular, it could even be generated with some 413 // preprocessor magic. We merely assign roles to the corresponding children 414 // and fold resulting nodes. 415 bool WalkUpFromDeclStmt(DeclStmt *S) { 416 Builder.foldNode(Builder.getStmtRange(S), 417 new (allocator()) syntax::DeclarationStatement); 418 return true; 419 } 420 421 bool WalkUpFromNullStmt(NullStmt *S) { 422 Builder.foldNode(Builder.getStmtRange(S), 423 new (allocator()) syntax::EmptyStatement); 424 return true; 425 } 426 427 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 428 Builder.markChildToken(S->getSwitchLoc(), 429 syntax::NodeRole::IntroducerKeyword); 430 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 431 Builder.foldNode(Builder.getStmtRange(S), 432 new (allocator()) syntax::SwitchStatement); 433 return true; 434 } 435 436 bool WalkUpFromCaseStmt(CaseStmt *S) { 437 Builder.markChildToken(S->getKeywordLoc(), 438 syntax::NodeRole::IntroducerKeyword); 439 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 440 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 441 Builder.foldNode(Builder.getStmtRange(S), 442 new (allocator()) syntax::CaseStatement); 443 return true; 444 } 445 446 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 447 Builder.markChildToken(S->getKeywordLoc(), 448 syntax::NodeRole::IntroducerKeyword); 449 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 450 Builder.foldNode(Builder.getStmtRange(S), 451 new (allocator()) syntax::DefaultStatement); 452 return true; 453 } 454 455 bool WalkUpFromIfStmt(IfStmt *S) { 456 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 457 Builder.markStmtChild(S->getThen(), 458 syntax::NodeRole::IfStatement_thenStatement); 459 Builder.markChildToken(S->getElseLoc(), 460 syntax::NodeRole::IfStatement_elseKeyword); 461 Builder.markStmtChild(S->getElse(), 462 syntax::NodeRole::IfStatement_elseStatement); 463 Builder.foldNode(Builder.getStmtRange(S), 464 new (allocator()) syntax::IfStatement); 465 return true; 466 } 467 468 bool WalkUpFromForStmt(ForStmt *S) { 469 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 470 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 471 Builder.foldNode(Builder.getStmtRange(S), 472 new (allocator()) syntax::ForStatement); 473 return true; 474 } 475 476 bool WalkUpFromWhileStmt(WhileStmt *S) { 477 Builder.markChildToken(S->getWhileLoc(), 478 syntax::NodeRole::IntroducerKeyword); 479 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 480 Builder.foldNode(Builder.getStmtRange(S), 481 new (allocator()) syntax::WhileStatement); 482 return true; 483 } 484 485 bool WalkUpFromContinueStmt(ContinueStmt *S) { 486 Builder.markChildToken(S->getContinueLoc(), 487 syntax::NodeRole::IntroducerKeyword); 488 Builder.foldNode(Builder.getStmtRange(S), 489 new (allocator()) syntax::ContinueStatement); 490 return true; 491 } 492 493 bool WalkUpFromBreakStmt(BreakStmt *S) { 494 Builder.markChildToken(S->getBreakLoc(), 495 syntax::NodeRole::IntroducerKeyword); 496 Builder.foldNode(Builder.getStmtRange(S), 497 new (allocator()) syntax::BreakStatement); 498 return true; 499 } 500 501 bool WalkUpFromReturnStmt(ReturnStmt *S) { 502 Builder.markChildToken(S->getReturnLoc(), 503 syntax::NodeRole::IntroducerKeyword); 504 Builder.markExprChild(S->getRetValue(), 505 syntax::NodeRole::ReturnStatement_value); 506 Builder.foldNode(Builder.getStmtRange(S), 507 new (allocator()) syntax::ReturnStatement); 508 return true; 509 } 510 511 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 512 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 513 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 514 Builder.foldNode(Builder.getStmtRange(S), 515 new (allocator()) syntax::RangeBasedForStatement); 516 return true; 517 } 518 519 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 520 Builder.foldNode(Builder.getRange(S), 521 new (allocator()) syntax::EmptyDeclaration); 522 return true; 523 } 524 525 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 526 Builder.markExprChild(S->getAssertExpr(), 527 syntax::NodeRole::StaticAssertDeclaration_condition); 528 Builder.markExprChild(S->getMessage(), 529 syntax::NodeRole::StaticAssertDeclaration_message); 530 Builder.foldNode(Builder.getRange(S), 531 new (allocator()) syntax::StaticAssertDeclaration); 532 return true; 533 } 534 535 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 536 Builder.foldNode(Builder.getRange(S), 537 new (allocator()) syntax::LinkageSpecificationDeclaration); 538 return true; 539 } 540 541 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 542 Builder.foldNode(Builder.getRange(S), 543 new (allocator()) syntax::NamespaceAliasDefinition); 544 return true; 545 } 546 547 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 548 Builder.foldNode(Builder.getRange(S), 549 new (allocator()) syntax::UsingNamespaceDirective); 550 return true; 551 } 552 553 bool WalkUpFromUsingDecl(UsingDecl *S) { 554 Builder.foldNode(Builder.getRange(S), 555 new (allocator()) syntax::UsingDeclaration); 556 return true; 557 } 558 559 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 560 Builder.foldNode(Builder.getRange(S), 561 new (allocator()) syntax::UsingDeclaration); 562 return true; 563 } 564 565 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 566 Builder.foldNode(Builder.getRange(S), 567 new (allocator()) syntax::UsingDeclaration); 568 return true; 569 } 570 571 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 572 Builder.foldNode(Builder.getRange(S), 573 new (allocator()) syntax::TypeAliasDeclaration); 574 return true; 575 } 576 577 private: 578 /// A small helper to save some typing. 579 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 580 581 syntax::TreeBuilder &Builder; 582 const LangOptions &LangOpts; 583 }; 584 } // namespace 585 586 void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range, 587 syntax::Tree *New) { 588 Pending.foldChildren(Range, New); 589 } 590 591 void syntax::TreeBuilder::noticeDeclaratorRange( 592 llvm::ArrayRef<syntax::Token> Range) { 593 if (Pending.extendDelayedFold(Range)) 594 return; 595 Pending.foldChildrenDelayed(Range, 596 new (allocator()) syntax::SimpleDeclaration); 597 } 598 599 void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) { 600 DeclsWithoutSemicolons.insert(D); 601 } 602 603 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 604 if (Loc.isInvalid()) 605 return; 606 Pending.assignRole(*findToken(Loc), Role); 607 } 608 609 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 610 if (!Child) 611 return; 612 613 auto Range = getStmtRange(Child); 614 // This is an expression in a statement position, consume the trailing 615 // semicolon and form an 'ExpressionStatement' node. 616 if (auto *E = dyn_cast<Expr>(Child)) { 617 Pending.assignRole(getExprRange(E), 618 NodeRole::ExpressionStatement_expression); 619 // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. 620 Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement); 621 } 622 Pending.assignRole(Range, Role); 623 } 624 625 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 626 if (!Child) 627 return; 628 629 Pending.assignRole(getExprRange(Child), Role); 630 } 631 632 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 633 auto Tokens = Arena.tokenBuffer().expandedTokens(); 634 auto &SM = Arena.sourceManager(); 635 auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) { 636 return SM.isBeforeInTranslationUnit(T.location(), L); 637 }); 638 assert(It != Tokens.end()); 639 assert(It->location() == L); 640 return &*It; 641 } 642 643 syntax::TranslationUnit * 644 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 645 TreeBuilder Builder(A); 646 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 647 return std::move(Builder).finalize(); 648 } 649