1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/ASTFwd.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/DeclBase.h" 12 #include "clang/AST/DeclCXX.h" 13 #include "clang/AST/DeclarationName.h" 14 #include "clang/AST/RecursiveASTVisitor.h" 15 #include "clang/AST/Stmt.h" 16 #include "clang/AST/TypeLoc.h" 17 #include "clang/AST/TypeLocVisitor.h" 18 #include "clang/Basic/LLVM.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "clang/Lex/Lexer.h" 23 #include "clang/Tooling/Syntax/Nodes.h" 24 #include "clang/Tooling/Syntax/Tokens.h" 25 #include "clang/Tooling/Syntax/Tree.h" 26 #include "llvm/ADT/ArrayRef.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/ScopeExit.h" 29 #include "llvm/ADT/SmallVector.h" 30 #include "llvm/Support/Allocator.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/Compiler.h" 33 #include "llvm/Support/FormatVariadic.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include <map> 37 38 using namespace clang; 39 40 LLVM_ATTRIBUTE_UNUSED 41 static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } 42 43 static SourceLocation getQualifiedNameStart(DeclaratorDecl *D) { 44 auto DN = D->getDeclName(); 45 bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); 46 if (IsAnonymous) 47 return SourceLocation(); 48 return D->getQualifierLoc() ? D->getQualifierLoc().getBeginLoc() 49 : D->getLocation(); 50 } 51 52 namespace { 53 /// Get start location of the Declarator from the TypeLoc. 54 /// E.g.: 55 /// loc of `(` in `int (a)` 56 /// loc of `*` in `int *(a)` 57 /// loc of the first `(` in `int (*a)(int)` 58 /// loc of the `*` in `int *(a)(int)` 59 /// loc of the first `*` in `const int *const *volatile a;` 60 /// 61 /// It is non-trivial to get the start location because TypeLocs are stored 62 /// inside out. In the example above `*volatile` is the TypeLoc returned 63 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` 64 /// returns. 65 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { 66 SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { 67 auto L = Visit(T.getInnerLoc()); 68 if (L.isValid()) 69 return L; 70 return T.getLParenLoc(); 71 } 72 73 // Types spelled in the prefix part of the declarator. 74 SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { 75 return HandlePointer(T); 76 } 77 78 SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { 79 return HandlePointer(T); 80 } 81 82 SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { 83 return HandlePointer(T); 84 } 85 86 SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { 87 return HandlePointer(T); 88 } 89 90 SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { 91 return HandlePointer(T); 92 } 93 94 // All other cases are not important, as they are either part of declaration 95 // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on 96 // existing declarators (e.g. QualifiedTypeLoc). They cannot start the 97 // declarator themselves, but their underlying type can. 98 SourceLocation VisitTypeLoc(TypeLoc T) { 99 auto N = T.getNextTypeLoc(); 100 if (!N) 101 return SourceLocation(); 102 return Visit(N); 103 } 104 105 SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { 106 if (T.getTypePtr()->hasTrailingReturn()) 107 return SourceLocation(); // avoid recursing into the suffix of declarator. 108 return VisitTypeLoc(T); 109 } 110 111 private: 112 template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { 113 auto L = Visit(T.getPointeeLoc()); 114 if (L.isValid()) 115 return L; 116 return T.getLocalSourceRange().getBegin(); 117 } 118 }; 119 } // namespace 120 121 /// Gets the range of declarator as defined by the C++ grammar. E.g. 122 /// `int a;` -> range of `a`, 123 /// `int *a;` -> range of `*a`, 124 /// `int a[10];` -> range of `a[10]`, 125 /// `int a[1][2][3];` -> range of `a[1][2][3]`, 126 /// `int *a = nullptr` -> range of `*a = nullptr`. 127 /// FIMXE: \p Name must be a source range, e.g. for `operator+`. 128 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, 129 SourceLocation Name, 130 SourceRange Initializer) { 131 SourceLocation Start = GetStartLoc().Visit(T); 132 SourceLocation End = T.getSourceRange().getEnd(); 133 assert(End.isValid()); 134 if (Name.isValid()) { 135 if (Start.isInvalid()) 136 Start = Name; 137 if (SM.isBeforeInTranslationUnit(End, Name)) 138 End = Name; 139 } 140 if (Initializer.isValid()) { 141 assert(SM.isBeforeInTranslationUnit(End, Initializer.getEnd())); 142 End = Initializer.getEnd(); 143 } 144 return SourceRange(Start, End); 145 } 146 147 /// A helper class for constructing the syntax tree while traversing a clang 148 /// AST. 149 /// 150 /// At each point of the traversal we maintain a list of pending nodes. 151 /// Initially all tokens are added as pending nodes. When processing a clang AST 152 /// node, the clients need to: 153 /// - create a corresponding syntax node, 154 /// - assign roles to all pending child nodes with 'markChild' and 155 /// 'markChildToken', 156 /// - replace the child nodes with the new syntax node in the pending list 157 /// with 'foldNode'. 158 /// 159 /// Note that all children are expected to be processed when building a node. 160 /// 161 /// Call finalize() to finish building the tree and consume the root node. 162 class syntax::TreeBuilder { 163 public: 164 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { 165 for (const auto &T : Arena.tokenBuffer().expandedTokens()) 166 LocationToToken.insert({T.location().getRawEncoding(), &T}); 167 } 168 169 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 170 const SourceManager &sourceManager() const { return Arena.sourceManager(); } 171 172 /// Populate children for \p New node, assuming it covers tokens from \p 173 /// Range. 174 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); 175 176 /// Must be called with the range of each `DeclaratorDecl`. Ensures the 177 /// corresponding declarator nodes are covered by `SimpleDeclaration`. 178 void noticeDeclRange(llvm::ArrayRef<syntax::Token> Range); 179 180 /// Notifies that we should not consume trailing semicolon when computing 181 /// token range of \p D. 182 void noticeDeclWithoutSemicolon(Decl *D); 183 184 /// Mark the \p Child node with a corresponding \p Role. All marked children 185 /// should be consumed by foldNode. 186 /// When called on expressions (clang::Expr is derived from clang::Stmt), 187 /// wraps expressions into expression statement. 188 void markStmtChild(Stmt *Child, NodeRole Role); 189 /// Should be called for expressions in non-statement position to avoid 190 /// wrapping into expression statement. 191 void markExprChild(Expr *Child, NodeRole Role); 192 193 /// Set role for a token starting at \p Loc. 194 void markChildToken(SourceLocation Loc, NodeRole R); 195 /// Set role for \p T. 196 void markChildToken(const syntax::Token *T, NodeRole R); 197 198 /// Set role for the node that spans exactly \p Range. 199 void markChild(llvm::ArrayRef<syntax::Token> Range, NodeRole R); 200 /// Set role for the delayed node that spans exactly \p Range. 201 void markDelayedChild(llvm::ArrayRef<syntax::Token> Range, NodeRole R); 202 203 /// Finish building the tree and consume the root node. 204 syntax::TranslationUnit *finalize() && { 205 auto Tokens = Arena.tokenBuffer().expandedTokens(); 206 assert(!Tokens.empty()); 207 assert(Tokens.back().kind() == tok::eof); 208 209 // Build the root of the tree, consuming all the children. 210 Pending.foldChildren(Arena, Tokens.drop_back(), 211 new (Arena.allocator()) syntax::TranslationUnit); 212 213 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 214 TU->assertInvariantsRecursive(); 215 return TU; 216 } 217 218 /// getRange() finds the syntax tokens corresponding to the passed source 219 /// locations. 220 /// \p First is the start position of the first token and \p Last is the start 221 /// position of the last token. 222 llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 223 SourceLocation Last) const { 224 assert(First.isValid()); 225 assert(Last.isValid()); 226 assert(First == Last || 227 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 228 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 229 } 230 llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { 231 auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc()); 232 if (llvm::isa<NamespaceDecl>(D)) 233 return Tokens; 234 if (DeclsWithoutSemicolons.count(D)) 235 return Tokens; 236 // FIXME: do not consume trailing semicolon on function definitions. 237 // Most declarations own a semicolon in syntax trees, but not in clang AST. 238 return withTrailingSemicolon(Tokens); 239 } 240 llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 241 return getRange(E->getBeginLoc(), E->getEndLoc()); 242 } 243 /// Find the adjusted range for the statement, consuming the trailing 244 /// semicolon when needed. 245 llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 246 auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); 247 if (isa<CompoundStmt>(S)) 248 return Tokens; 249 250 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 251 // all statements that end with those. Consume this semicolon here. 252 if (Tokens.back().kind() == tok::semi) 253 return Tokens; 254 return withTrailingSemicolon(Tokens); 255 } 256 257 private: 258 llvm::ArrayRef<syntax::Token> 259 withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const { 260 assert(!Tokens.empty()); 261 assert(Tokens.back().kind() != tok::eof); 262 // We never consume 'eof', so looking at the next token is ok. 263 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 264 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 265 return Tokens; 266 } 267 268 /// Finds a token starting at \p L. The token must exist. 269 const syntax::Token *findToken(SourceLocation L) const; 270 271 /// A collection of trees covering the input tokens. 272 /// When created, each tree corresponds to a single token in the file. 273 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 274 /// node and update the list of trees accordingly. 275 /// 276 /// Ensures that added nodes properly nest and cover the whole token stream. 277 struct Forest { 278 Forest(syntax::Arena &A) { 279 assert(!A.tokenBuffer().expandedTokens().empty()); 280 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 281 // Create all leaf nodes. 282 // Note that we do not have 'eof' in the tree. 283 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { 284 auto *L = new (A.allocator()) syntax::Leaf(&T); 285 L->Original = true; 286 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); 287 Trees.insert(Trees.end(), {&T, NodeAndRole{L}}); 288 } 289 } 290 291 ~Forest() { assert(DelayedFolds.empty()); } 292 293 void assignRoleDelayed(llvm::ArrayRef<syntax::Token> Range, 294 syntax::NodeRole Role) { 295 auto It = DelayedFolds.find(Range.begin()); 296 assert(It != DelayedFolds.end()); 297 assert(It->second.End == Range.end()); 298 It->second.Role = Role; 299 } 300 301 void assignRole(llvm::ArrayRef<syntax::Token> Range, 302 syntax::NodeRole Role) { 303 assert(!Range.empty()); 304 auto It = Trees.lower_bound(Range.begin()); 305 assert(It != Trees.end() && "no node found"); 306 assert(It->first == Range.begin() && "no child with the specified range"); 307 assert((std::next(It) == Trees.end() || 308 std::next(It)->first == Range.end()) && 309 "no child with the specified range"); 310 It->second.Role = Role; 311 } 312 313 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 314 void foldChildren(const syntax::Arena &A, 315 llvm::ArrayRef<syntax::Token> Tokens, 316 syntax::Tree *Node) { 317 // Execute delayed folds inside `Tokens`. 318 auto BeginFolds = DelayedFolds.lower_bound(Tokens.begin()); 319 auto EndFolds = BeginFolds; 320 for (; EndFolds != DelayedFolds.end() && 321 EndFolds->second.End <= Tokens.end(); 322 ++EndFolds) 323 ; 324 // We go in reverse order to ensure we fold deeper nodes first. 325 for (auto RevIt = EndFolds; RevIt != BeginFolds; --RevIt) { 326 auto It = std::prev(RevIt); 327 foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End), 328 It->second.Node); 329 } 330 DelayedFolds.erase(BeginFolds, EndFolds); 331 332 // Attach children to `Node`. 333 foldChildrenEager(A, Tokens, Node); 334 } 335 336 /// Schedule a call to `foldChildren` that will only be executed when 337 /// containing node is folded. The range of delayed nodes can be extended by 338 /// calling `extendDelayedFold`. Only one delayed node for each starting 339 /// token is allowed. 340 void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens, 341 syntax::Tree *Node) { 342 assert(!Tokens.empty()); 343 bool Inserted = 344 DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}}) 345 .second; 346 (void)Inserted; 347 assert(Inserted && "Multiple delayed folds start at the same token"); 348 } 349 350 /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends 351 /// its endpoint to `ExtendedRange.end()` and returns true. 352 /// Otherwise, returns false. 353 bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) { 354 assert(!ExtendedRange.empty()); 355 auto It = DelayedFolds.find(ExtendedRange.data()); 356 if (It == DelayedFolds.end()) 357 return false; 358 assert(It->second.End <= ExtendedRange.end()); 359 It->second.End = ExtendedRange.end(); 360 return true; 361 } 362 363 // EXPECTS: all tokens were consumed and are owned by a single root node. 364 syntax::Node *finalize() && { 365 assert(Trees.size() == 1); 366 auto *Root = Trees.begin()->second.Node; 367 Trees = {}; 368 return Root; 369 } 370 371 std::string str(const syntax::Arena &A) const { 372 std::string R; 373 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 374 unsigned CoveredTokens = 375 It != Trees.end() 376 ? (std::next(It)->first - It->first) 377 : A.tokenBuffer().expandedTokens().end() - It->first; 378 379 R += std::string(llvm::formatv( 380 "- '{0}' covers '{1}'+{2} tokens\n", It->second.Node->kind(), 381 It->first->text(A.sourceManager()), CoveredTokens)); 382 R += It->second.Node->dump(A); 383 } 384 return R; 385 } 386 387 private: 388 /// Implementation detail of `foldChildren`, does acutal folding ignoring 389 /// delayed folds. 390 void foldChildrenEager(const syntax::Arena &A, 391 llvm::ArrayRef<syntax::Token> Tokens, 392 syntax::Tree *Node) { 393 assert(Node->firstChild() == nullptr && "node already has children"); 394 395 auto *FirstToken = Tokens.begin(); 396 auto BeginChildren = Trees.lower_bound(FirstToken); 397 assert((BeginChildren == Trees.end() || 398 BeginChildren->first == FirstToken) && 399 "fold crosses boundaries of existing subtrees"); 400 auto EndChildren = Trees.lower_bound(Tokens.end()); 401 assert( 402 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 403 "fold crosses boundaries of existing subtrees"); 404 405 // We need to go in reverse order, because we can only prepend. 406 for (auto It = EndChildren; It != BeginChildren; --It) 407 Node->prependChildLowLevel(std::prev(It)->second.Node, 408 std::prev(It)->second.Role); 409 410 // Mark that this node came from the AST and is backed by the source code. 411 Node->Original = true; 412 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); 413 414 Trees.erase(BeginChildren, EndChildren); 415 Trees.insert({FirstToken, NodeAndRole(Node)}); 416 } 417 /// A with a role that should be assigned to it when adding to a parent. 418 struct NodeAndRole { 419 explicit NodeAndRole(syntax::Node *Node) 420 : Node(Node), Role(NodeRole::Unknown) {} 421 422 syntax::Node *Node; 423 NodeRole Role; 424 }; 425 426 /// Maps from the start token to a subtree starting at that token. 427 /// Keys in the map are pointers into the array of expanded tokens, so 428 /// pointer order corresponds to the order of preprocessor tokens. 429 /// FIXME: storing the end tokens is redundant. 430 /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. 431 std::map<const syntax::Token *, NodeAndRole> Trees; 432 433 /// See documentation of `foldChildrenDelayed` for details. 434 struct DelayedFold { 435 const syntax::Token *End = nullptr; 436 syntax::Tree *Node = nullptr; 437 NodeRole Role = NodeRole::Unknown; 438 }; 439 std::map<const syntax::Token *, DelayedFold> DelayedFolds; 440 }; 441 442 /// For debugging purposes. 443 std::string str() { return Pending.str(Arena); } 444 445 syntax::Arena &Arena; 446 /// To quickly find tokens by their start location. 447 llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> 448 LocationToToken; 449 Forest Pending; 450 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 451 }; 452 453 namespace { 454 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 455 public: 456 explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) 457 : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} 458 459 bool shouldTraversePostOrder() const { return true; } 460 461 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { 462 // Ensure declarators are covered by SimpleDeclaration. 463 Builder.noticeDeclRange(Builder.getRange(DD)); 464 465 // Build the declarator node. 466 SourceRange Initializer; 467 if (auto *V = llvm::dyn_cast<VarDecl>(DD)) { 468 auto *I = V->getInit(); 469 // Initializers in range-based-for are not part of the declarator 470 if (I && !V->isCXXForRangeDecl()) 471 Initializer = I->getSourceRange(); 472 } 473 auto Declarator = getDeclaratorRange( 474 Builder.sourceManager(), DD->getTypeSourceInfo()->getTypeLoc(), 475 getQualifiedNameStart(DD), Initializer); 476 if (Declarator.isValid()) { 477 auto Tokens = 478 Builder.getRange(Declarator.getBegin(), Declarator.getEnd()); 479 Builder.foldNode(Tokens, new (allocator()) syntax::SimpleDeclarator); 480 Builder.markChild(Tokens, syntax::NodeRole::SimpleDeclaration_declarator); 481 } 482 483 return true; 484 } 485 486 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) { 487 // Ensure declarators are covered by SimpleDeclaration. 488 Builder.noticeDeclRange(Builder.getRange(D)); 489 490 auto R = getDeclaratorRange( 491 Builder.sourceManager(), D->getTypeSourceInfo()->getTypeLoc(), 492 /*Name=*/D->getLocation(), /*Initializer=*/SourceRange()); 493 if (R.isValid()) { 494 auto Tokens = Builder.getRange(R.getBegin(), R.getEnd()); 495 Builder.foldNode(Tokens, new (allocator()) syntax::SimpleDeclarator); 496 Builder.markChild(Tokens, syntax::NodeRole::SimpleDeclaration_declarator); 497 } 498 return true; 499 } 500 501 bool VisitDecl(Decl *D) { 502 assert(!D->isImplicit()); 503 Builder.foldNode(Builder.getRange(D), 504 new (allocator()) syntax::UnknownDeclaration()); 505 return true; 506 } 507 508 bool WalkUpFromTagDecl(TagDecl *C) { 509 // FIXME: build the ClassSpecifier node. 510 if (C->isFreeStanding()) { 511 // Class is a declaration specifier and needs a spanning declaration node. 512 Builder.foldNode(Builder.getRange(C), 513 new (allocator()) syntax::SimpleDeclaration); 514 return true; 515 } 516 return true; 517 } 518 519 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 520 // We do not want to call VisitDecl(), the declaration for translation 521 // unit is built by finalize(). 522 return true; 523 } 524 525 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 526 using NodeRole = syntax::NodeRole; 527 528 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 529 for (auto *Child : S->body()) 530 Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 531 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 532 533 Builder.foldNode(Builder.getStmtRange(S), 534 new (allocator()) syntax::CompoundStatement); 535 return true; 536 } 537 538 // Some statements are not yet handled by syntax trees. 539 bool WalkUpFromStmt(Stmt *S) { 540 Builder.foldNode(Builder.getStmtRange(S), 541 new (allocator()) syntax::UnknownStatement); 542 return true; 543 } 544 545 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 546 // We override to traverse range initializer as VarDecl. 547 // RAV traverses it as a statement, we produce invalid node kinds in that 548 // case. 549 // FIXME: should do this in RAV instead? 550 if (S->getInit() && !TraverseStmt(S->getInit())) 551 return false; 552 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 553 return false; 554 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 555 return false; 556 if (S->getBody() && !TraverseStmt(S->getBody())) 557 return false; 558 return true; 559 } 560 561 bool TraverseStmt(Stmt *S) { 562 if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) { 563 // We want to consume the semicolon, make sure SimpleDeclaration does not. 564 for (auto *D : DS->decls()) 565 Builder.noticeDeclWithoutSemicolon(D); 566 } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { 567 // Do not recurse into subexpressions. 568 // We do not have syntax trees for expressions yet, so we only want to see 569 // the first top-level expression. 570 return WalkUpFromExpr(E->IgnoreImplicit()); 571 } 572 return RecursiveASTVisitor::TraverseStmt(S); 573 } 574 575 // Some expressions are not yet handled by syntax trees. 576 bool WalkUpFromExpr(Expr *E) { 577 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 578 Builder.foldNode(Builder.getExprRange(E), 579 new (allocator()) syntax::UnknownExpression); 580 return true; 581 } 582 583 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 584 auto Tokens = Builder.getRange(S); 585 if (Tokens.front().kind() == tok::coloncolon) { 586 // Handle nested namespace definitions. Those start at '::' token, e.g. 587 // namespace a^::b {} 588 // FIXME: build corresponding nodes for the name of this namespace. 589 return true; 590 } 591 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition); 592 return true; 593 } 594 595 bool TraverseParenTypeLoc(ParenTypeLoc L) { 596 // We reverse order of traversal to get the proper syntax structure. 597 if (!WalkUpFromParenTypeLoc(L)) 598 return false; 599 return TraverseTypeLoc(L.getInnerLoc()); 600 } 601 602 bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { 603 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 604 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 605 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), 606 new (allocator()) syntax::ParenDeclarator); 607 return true; 608 } 609 610 // Declarator chunks, they are produced by type locs and some clang::Decls. 611 bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { 612 Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); 613 Builder.markExprChild(L.getSizeExpr(), 614 syntax::NodeRole::ArraySubscript_sizeExpression); 615 Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); 616 Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), 617 new (allocator()) syntax::ArraySubscript); 618 return true; 619 } 620 621 bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { 622 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 623 for (auto *P : L.getParams()) 624 Builder.markDelayedChild( 625 Builder.getRange(P), 626 syntax::NodeRole::ParametersAndQualifiers_parameter); 627 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 628 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), 629 new (allocator()) syntax::ParametersAndQualifiers); 630 return true; 631 } 632 633 bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { 634 if (!L.getTypePtr()->hasTrailingReturn()) 635 return WalkUpFromFunctionTypeLoc(L); 636 637 auto TrailingReturnTokens = BuildTrailingReturn(L); 638 // Finish building the node for parameters. 639 Builder.markChild(TrailingReturnTokens, 640 syntax::NodeRole::ParametersAndQualifiers_trailingReturn); 641 return WalkUpFromFunctionTypeLoc(L); 642 } 643 644 bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { 645 auto SR = L.getLocalSourceRange(); 646 Builder.foldNode(Builder.getRange(SR.getBegin(), SR.getEnd()), 647 new (allocator()) syntax::MemberPointer); 648 return true; 649 } 650 651 // The code below is very regular, it could even be generated with some 652 // preprocessor magic. We merely assign roles to the corresponding children 653 // and fold resulting nodes. 654 bool WalkUpFromDeclStmt(DeclStmt *S) { 655 Builder.foldNode(Builder.getStmtRange(S), 656 new (allocator()) syntax::DeclarationStatement); 657 return true; 658 } 659 660 bool WalkUpFromNullStmt(NullStmt *S) { 661 Builder.foldNode(Builder.getStmtRange(S), 662 new (allocator()) syntax::EmptyStatement); 663 return true; 664 } 665 666 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 667 Builder.markChildToken(S->getSwitchLoc(), 668 syntax::NodeRole::IntroducerKeyword); 669 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 670 Builder.foldNode(Builder.getStmtRange(S), 671 new (allocator()) syntax::SwitchStatement); 672 return true; 673 } 674 675 bool WalkUpFromCaseStmt(CaseStmt *S) { 676 Builder.markChildToken(S->getKeywordLoc(), 677 syntax::NodeRole::IntroducerKeyword); 678 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 679 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 680 Builder.foldNode(Builder.getStmtRange(S), 681 new (allocator()) syntax::CaseStatement); 682 return true; 683 } 684 685 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 686 Builder.markChildToken(S->getKeywordLoc(), 687 syntax::NodeRole::IntroducerKeyword); 688 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 689 Builder.foldNode(Builder.getStmtRange(S), 690 new (allocator()) syntax::DefaultStatement); 691 return true; 692 } 693 694 bool WalkUpFromIfStmt(IfStmt *S) { 695 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 696 Builder.markStmtChild(S->getThen(), 697 syntax::NodeRole::IfStatement_thenStatement); 698 Builder.markChildToken(S->getElseLoc(), 699 syntax::NodeRole::IfStatement_elseKeyword); 700 Builder.markStmtChild(S->getElse(), 701 syntax::NodeRole::IfStatement_elseStatement); 702 Builder.foldNode(Builder.getStmtRange(S), 703 new (allocator()) syntax::IfStatement); 704 return true; 705 } 706 707 bool WalkUpFromForStmt(ForStmt *S) { 708 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 709 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 710 Builder.foldNode(Builder.getStmtRange(S), 711 new (allocator()) syntax::ForStatement); 712 return true; 713 } 714 715 bool WalkUpFromWhileStmt(WhileStmt *S) { 716 Builder.markChildToken(S->getWhileLoc(), 717 syntax::NodeRole::IntroducerKeyword); 718 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 719 Builder.foldNode(Builder.getStmtRange(S), 720 new (allocator()) syntax::WhileStatement); 721 return true; 722 } 723 724 bool WalkUpFromContinueStmt(ContinueStmt *S) { 725 Builder.markChildToken(S->getContinueLoc(), 726 syntax::NodeRole::IntroducerKeyword); 727 Builder.foldNode(Builder.getStmtRange(S), 728 new (allocator()) syntax::ContinueStatement); 729 return true; 730 } 731 732 bool WalkUpFromBreakStmt(BreakStmt *S) { 733 Builder.markChildToken(S->getBreakLoc(), 734 syntax::NodeRole::IntroducerKeyword); 735 Builder.foldNode(Builder.getStmtRange(S), 736 new (allocator()) syntax::BreakStatement); 737 return true; 738 } 739 740 bool WalkUpFromReturnStmt(ReturnStmt *S) { 741 Builder.markChildToken(S->getReturnLoc(), 742 syntax::NodeRole::IntroducerKeyword); 743 Builder.markExprChild(S->getRetValue(), 744 syntax::NodeRole::ReturnStatement_value); 745 Builder.foldNode(Builder.getStmtRange(S), 746 new (allocator()) syntax::ReturnStatement); 747 return true; 748 } 749 750 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 751 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 752 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 753 Builder.foldNode(Builder.getStmtRange(S), 754 new (allocator()) syntax::RangeBasedForStatement); 755 return true; 756 } 757 758 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 759 Builder.foldNode(Builder.getRange(S), 760 new (allocator()) syntax::EmptyDeclaration); 761 return true; 762 } 763 764 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 765 Builder.markExprChild(S->getAssertExpr(), 766 syntax::NodeRole::StaticAssertDeclaration_condition); 767 Builder.markExprChild(S->getMessage(), 768 syntax::NodeRole::StaticAssertDeclaration_message); 769 Builder.foldNode(Builder.getRange(S), 770 new (allocator()) syntax::StaticAssertDeclaration); 771 return true; 772 } 773 774 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 775 Builder.foldNode(Builder.getRange(S), 776 new (allocator()) syntax::LinkageSpecificationDeclaration); 777 return true; 778 } 779 780 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 781 Builder.foldNode(Builder.getRange(S), 782 new (allocator()) syntax::NamespaceAliasDefinition); 783 return true; 784 } 785 786 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 787 Builder.foldNode(Builder.getRange(S), 788 new (allocator()) syntax::UsingNamespaceDirective); 789 return true; 790 } 791 792 bool WalkUpFromUsingDecl(UsingDecl *S) { 793 Builder.foldNode(Builder.getRange(S), 794 new (allocator()) syntax::UsingDeclaration); 795 return true; 796 } 797 798 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 799 Builder.foldNode(Builder.getRange(S), 800 new (allocator()) syntax::UsingDeclaration); 801 return true; 802 } 803 804 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 805 Builder.foldNode(Builder.getRange(S), 806 new (allocator()) syntax::UsingDeclaration); 807 return true; 808 } 809 810 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 811 Builder.foldNode(Builder.getRange(S), 812 new (allocator()) syntax::TypeAliasDeclaration); 813 return true; 814 } 815 816 private: 817 /// Returns the range of the built node. 818 llvm::ArrayRef<syntax::Token> BuildTrailingReturn(FunctionProtoTypeLoc L) { 819 assert(L.getTypePtr()->hasTrailingReturn()); 820 821 auto ReturnedType = L.getReturnLoc(); 822 // Build node for the declarator, if any. 823 auto ReturnDeclaratorRange = 824 getDeclaratorRange(this->Builder.sourceManager(), ReturnedType, 825 /*Name=*/SourceLocation(), 826 /*Initializer=*/SourceLocation()); 827 llvm::ArrayRef<syntax::Token> ReturnDeclaratorTokens; 828 if (ReturnDeclaratorRange.isValid()) { 829 ReturnDeclaratorTokens = Builder.getRange( 830 ReturnDeclaratorRange.getBegin(), ReturnDeclaratorRange.getEnd()); 831 Builder.foldNode(ReturnDeclaratorTokens, 832 new (allocator()) syntax::SimpleDeclarator); 833 } 834 835 // Build node for trailing return type. 836 auto Return = 837 Builder.getRange(ReturnedType.getBeginLoc(), ReturnedType.getEndLoc()); 838 const auto *Arrow = Return.begin() - 1; 839 assert(Arrow->kind() == tok::arrow); 840 auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); 841 Builder.markChildToken(Arrow, syntax::NodeRole::TrailingReturnType_arrow); 842 if (!ReturnDeclaratorTokens.empty()) 843 Builder.markChild(ReturnDeclaratorTokens, 844 syntax::NodeRole::TrailingReturnType_declarator); 845 Builder.foldNode(Tokens, new (allocator()) syntax::TrailingReturnType); 846 return Tokens; 847 } 848 /// A small helper to save some typing. 849 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 850 851 syntax::TreeBuilder &Builder; 852 const LangOptions &LangOpts; 853 }; 854 } // namespace 855 856 void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range, 857 syntax::Tree *New) { 858 Pending.foldChildren(Arena, Range, New); 859 } 860 861 void syntax::TreeBuilder::noticeDeclRange(llvm::ArrayRef<syntax::Token> Range) { 862 if (Pending.extendDelayedFold(Range)) 863 return; 864 Pending.foldChildrenDelayed(Range, 865 new (allocator()) syntax::SimpleDeclaration); 866 } 867 868 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { 869 DeclsWithoutSemicolons.insert(D); 870 } 871 872 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 873 if (Loc.isInvalid()) 874 return; 875 Pending.assignRole(*findToken(Loc), Role); 876 } 877 878 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { 879 if (!T) 880 return; 881 Pending.assignRole(*T, R); 882 } 883 884 void syntax::TreeBuilder::markChild(llvm::ArrayRef<syntax::Token> Range, 885 NodeRole R) { 886 Pending.assignRole(Range, R); 887 } 888 889 void syntax::TreeBuilder::markDelayedChild(llvm::ArrayRef<syntax::Token> Range, 890 NodeRole R) { 891 Pending.assignRoleDelayed(Range, R); 892 } 893 894 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 895 if (!Child) 896 return; 897 898 auto Range = getStmtRange(Child); 899 // This is an expression in a statement position, consume the trailing 900 // semicolon and form an 'ExpressionStatement' node. 901 if (auto *E = dyn_cast<Expr>(Child)) { 902 Pending.assignRole(getExprRange(E), 903 NodeRole::ExpressionStatement_expression); 904 // 'getRange(Stmt)' ensures this already covers a trailing semicolon. 905 Pending.foldChildren(Arena, Range, 906 new (allocator()) syntax::ExpressionStatement); 907 } 908 Pending.assignRole(Range, Role); 909 } 910 911 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 912 if (!Child) 913 return; 914 915 Pending.assignRole(getExprRange(Child), Role); 916 } 917 918 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 919 auto It = LocationToToken.find(L.getRawEncoding()); 920 assert(It != LocationToToken.end()); 921 return It->second; 922 } 923 924 syntax::TranslationUnit * 925 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 926 TreeBuilder Builder(A); 927 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 928 return std::move(Builder).finalize(); 929 } 930