1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/ASTFwd.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/DeclBase.h" 12 #include "clang/AST/DeclCXX.h" 13 #include "clang/AST/DeclarationName.h" 14 #include "clang/AST/RecursiveASTVisitor.h" 15 #include "clang/AST/Stmt.h" 16 #include "clang/AST/TypeLoc.h" 17 #include "clang/AST/TypeLocVisitor.h" 18 #include "clang/Basic/LLVM.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/Specifiers.h" 22 #include "clang/Basic/TokenKinds.h" 23 #include "clang/Lex/Lexer.h" 24 #include "clang/Tooling/Syntax/Nodes.h" 25 #include "clang/Tooling/Syntax/Tokens.h" 26 #include "clang/Tooling/Syntax/Tree.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/PointerUnion.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/ScopeExit.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/Support/Allocator.h" 34 #include "llvm/Support/Casting.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cstddef> 40 #include <map> 41 42 using namespace clang; 43 44 LLVM_ATTRIBUTE_UNUSED 45 static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } 46 47 static SourceLocation getQualifiedNameStart(DeclaratorDecl *D) { 48 auto DN = D->getDeclName(); 49 bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); 50 if (IsAnonymous) 51 return SourceLocation(); 52 return D->getQualifierLoc() ? D->getQualifierLoc().getBeginLoc() 53 : D->getLocation(); 54 } 55 56 namespace { 57 /// Get start location of the Declarator from the TypeLoc. 58 /// E.g.: 59 /// loc of `(` in `int (a)` 60 /// loc of `*` in `int *(a)` 61 /// loc of the first `(` in `int (*a)(int)` 62 /// loc of the `*` in `int *(a)(int)` 63 /// loc of the first `*` in `const int *const *volatile a;` 64 /// 65 /// It is non-trivial to get the start location because TypeLocs are stored 66 /// inside out. In the example above `*volatile` is the TypeLoc returned 67 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` 68 /// returns. 69 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { 70 SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { 71 auto L = Visit(T.getInnerLoc()); 72 if (L.isValid()) 73 return L; 74 return T.getLParenLoc(); 75 } 76 77 // Types spelled in the prefix part of the declarator. 78 SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { 79 return HandlePointer(T); 80 } 81 82 SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { 83 return HandlePointer(T); 84 } 85 86 SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { 87 return HandlePointer(T); 88 } 89 90 SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { 91 return HandlePointer(T); 92 } 93 94 SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { 95 return HandlePointer(T); 96 } 97 98 // All other cases are not important, as they are either part of declaration 99 // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on 100 // existing declarators (e.g. QualifiedTypeLoc). They cannot start the 101 // declarator themselves, but their underlying type can. 102 SourceLocation VisitTypeLoc(TypeLoc T) { 103 auto N = T.getNextTypeLoc(); 104 if (!N) 105 return SourceLocation(); 106 return Visit(N); 107 } 108 109 SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { 110 if (T.getTypePtr()->hasTrailingReturn()) 111 return SourceLocation(); // avoid recursing into the suffix of declarator. 112 return VisitTypeLoc(T); 113 } 114 115 private: 116 template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { 117 auto L = Visit(T.getPointeeLoc()); 118 if (L.isValid()) 119 return L; 120 return T.getLocalSourceRange().getBegin(); 121 } 122 }; 123 } // namespace 124 125 /// Gets the range of declarator as defined by the C++ grammar. E.g. 126 /// `int a;` -> range of `a`, 127 /// `int *a;` -> range of `*a`, 128 /// `int a[10];` -> range of `a[10]`, 129 /// `int a[1][2][3];` -> range of `a[1][2][3]`, 130 /// `int *a = nullptr` -> range of `*a = nullptr`. 131 /// FIMXE: \p Name must be a source range, e.g. for `operator+`. 132 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, 133 SourceLocation Name, 134 SourceRange Initializer) { 135 SourceLocation Start = GetStartLoc().Visit(T); 136 SourceLocation End = T.getSourceRange().getEnd(); 137 assert(End.isValid()); 138 if (Name.isValid()) { 139 if (Start.isInvalid()) 140 Start = Name; 141 if (SM.isBeforeInTranslationUnit(End, Name)) 142 End = Name; 143 } 144 if (Initializer.isValid()) { 145 assert(SM.isBeforeInTranslationUnit(End, Initializer.getEnd())); 146 End = Initializer.getEnd(); 147 } 148 return SourceRange(Start, End); 149 } 150 151 namespace { 152 /// All AST hierarchy roots that can be represented as pointers. 153 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; 154 /// Maintains a mapping from AST to syntax tree nodes. This class will get more 155 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs. 156 /// FIXME: expose this as public API. 157 class ASTToSyntaxMapping { 158 public: 159 void add(ASTPtr From, syntax::Tree *To) { 160 assert(To != nullptr); 161 assert(!From.isNull()); 162 163 bool Added = Nodes.insert({From, To}).second; 164 (void)Added; 165 assert(Added && "mapping added twice"); 166 } 167 168 syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } 169 170 private: 171 llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; 172 }; 173 } // namespace 174 175 /// A helper class for constructing the syntax tree while traversing a clang 176 /// AST. 177 /// 178 /// At each point of the traversal we maintain a list of pending nodes. 179 /// Initially all tokens are added as pending nodes. When processing a clang AST 180 /// node, the clients need to: 181 /// - create a corresponding syntax node, 182 /// - assign roles to all pending child nodes with 'markChild' and 183 /// 'markChildToken', 184 /// - replace the child nodes with the new syntax node in the pending list 185 /// with 'foldNode'. 186 /// 187 /// Note that all children are expected to be processed when building a node. 188 /// 189 /// Call finalize() to finish building the tree and consume the root node. 190 class syntax::TreeBuilder { 191 public: 192 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { 193 for (const auto &T : Arena.tokenBuffer().expandedTokens()) 194 LocationToToken.insert({T.location().getRawEncoding(), &T}); 195 } 196 197 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 198 const SourceManager &sourceManager() const { return Arena.sourceManager(); } 199 200 /// Populate children for \p New node, assuming it covers tokens from \p 201 /// Range. 202 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 203 ASTPtr From) { 204 assert(New); 205 Pending.foldChildren(Arena, Range, New); 206 if (From) 207 Mapping.add(From, New); 208 } 209 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 210 TypeLoc L) { 211 // FIXME: add mapping for TypeLocs 212 foldNode(Range, New, nullptr); 213 } 214 215 /// Must be called with the range of each `DeclaratorDecl`. Ensures the 216 /// corresponding declarator nodes are covered by `SimpleDeclaration`. 217 void noticeDeclRange(llvm::ArrayRef<syntax::Token> Range); 218 219 /// Notifies that we should not consume trailing semicolon when computing 220 /// token range of \p D. 221 void noticeDeclWithoutSemicolon(Decl *D); 222 223 /// Mark the \p Child node with a corresponding \p Role. All marked children 224 /// should be consumed by foldNode. 225 /// When called on expressions (clang::Expr is derived from clang::Stmt), 226 /// wraps expressions into expression statement. 227 void markStmtChild(Stmt *Child, NodeRole Role); 228 /// Should be called for expressions in non-statement position to avoid 229 /// wrapping into expression statement. 230 void markExprChild(Expr *Child, NodeRole Role); 231 /// Set role for a token starting at \p Loc. 232 void markChildToken(SourceLocation Loc, NodeRole R); 233 /// Set role for \p T. 234 void markChildToken(const syntax::Token *T, NodeRole R); 235 236 /// Set role for \p N. 237 void markChild(syntax::Node *N, NodeRole R); 238 /// Set role for the syntax node matching \p N. 239 void markChild(ASTPtr N, NodeRole R); 240 /// Set role for the delayed node that spans exactly \p Range. 241 void markDelayedChild(llvm::ArrayRef<syntax::Token> Range, NodeRole R); 242 /// Set role for the node that may or may not be delayed. Node must span 243 /// exactly \p Range. 244 void markMaybeDelayedChild(llvm::ArrayRef<syntax::Token> Range, NodeRole R); 245 246 /// Finish building the tree and consume the root node. 247 syntax::TranslationUnit *finalize() && { 248 auto Tokens = Arena.tokenBuffer().expandedTokens(); 249 assert(!Tokens.empty()); 250 assert(Tokens.back().kind() == tok::eof); 251 252 // Build the root of the tree, consuming all the children. 253 Pending.foldChildren(Arena, Tokens.drop_back(), 254 new (Arena.allocator()) syntax::TranslationUnit); 255 256 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 257 TU->assertInvariantsRecursive(); 258 return TU; 259 } 260 261 /// Finds a token starting at \p L. The token must exist if \p L is valid. 262 const syntax::Token *findToken(SourceLocation L) const; 263 264 /// Finds the syntax tokens corresponding to the \p SourceRange. 265 llvm::ArrayRef<syntax::Token> getRange(SourceRange Range) const { 266 assert(Range.isValid()); 267 return getRange(Range.getBegin(), Range.getEnd()); 268 } 269 270 /// Finds the syntax tokens corresponding to the passed source locations. 271 /// \p First is the start position of the first token and \p Last is the start 272 /// position of the last token. 273 llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 274 SourceLocation Last) const { 275 assert(First.isValid()); 276 assert(Last.isValid()); 277 assert(First == Last || 278 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 279 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 280 } 281 282 llvm::ArrayRef<syntax::Token> 283 getTemplateRange(const ClassTemplateSpecializationDecl *D) const { 284 auto Tokens = getRange(D->getSourceRange()); 285 return maybeAppendSemicolon(Tokens, D); 286 } 287 288 llvm::ArrayRef<syntax::Token> getDeclRange(const Decl *D) const { 289 llvm::ArrayRef<clang::syntax::Token> Tokens; 290 // We want to drop the template parameters for specializations. 291 if (const auto *S = llvm::dyn_cast<TagDecl>(D)) 292 Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); 293 else 294 Tokens = getRange(D->getSourceRange()); 295 return maybeAppendSemicolon(Tokens, D); 296 } 297 llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 298 return getRange(E->getSourceRange()); 299 } 300 /// Find the adjusted range for the statement, consuming the trailing 301 /// semicolon when needed. 302 llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 303 auto Tokens = getRange(S->getSourceRange()); 304 if (isa<CompoundStmt>(S)) 305 return Tokens; 306 307 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 308 // all statements that end with those. Consume this semicolon here. 309 if (Tokens.back().kind() == tok::semi) 310 return Tokens; 311 return withTrailingSemicolon(Tokens); 312 } 313 314 private: 315 llvm::ArrayRef<syntax::Token> 316 maybeAppendSemicolon(llvm::ArrayRef<syntax::Token> Tokens, 317 const Decl *D) const { 318 if (llvm::isa<NamespaceDecl>(D)) 319 return Tokens; 320 if (DeclsWithoutSemicolons.count(D)) 321 return Tokens; 322 // FIXME: do not consume trailing semicolon on function definitions. 323 // Most declarations own a semicolon in syntax trees, but not in clang AST. 324 return withTrailingSemicolon(Tokens); 325 } 326 327 llvm::ArrayRef<syntax::Token> 328 withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const { 329 assert(!Tokens.empty()); 330 assert(Tokens.back().kind() != tok::eof); 331 // We never consume 'eof', so looking at the next token is ok. 332 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 333 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 334 return Tokens; 335 } 336 337 void setRole(syntax::Node *N, NodeRole R) { 338 assert(N->role() == NodeRole::Detached); 339 N->setRole(R); 340 } 341 342 /// A collection of trees covering the input tokens. 343 /// When created, each tree corresponds to a single token in the file. 344 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 345 /// node and update the list of trees accordingly. 346 /// 347 /// Ensures that added nodes properly nest and cover the whole token stream. 348 struct Forest { 349 Forest(syntax::Arena &A) { 350 assert(!A.tokenBuffer().expandedTokens().empty()); 351 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 352 // Create all leaf nodes. 353 // Note that we do not have 'eof' in the tree. 354 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { 355 auto *L = new (A.allocator()) syntax::Leaf(&T); 356 L->Original = true; 357 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); 358 Trees.insert(Trees.end(), {&T, L}); 359 } 360 } 361 362 ~Forest() { assert(DelayedFolds.empty()); } 363 364 void assignRoleDelayed(llvm::ArrayRef<syntax::Token> Range, 365 syntax::NodeRole Role) { 366 auto It = DelayedFolds.find(Range.begin()); 367 assert(It != DelayedFolds.end()); 368 assert(It->second.End == Range.end()); 369 It->second.Role = Role; 370 } 371 372 void assignRoleMaybeDelayed(llvm::ArrayRef<syntax::Token> Range, 373 syntax::NodeRole Role) { 374 auto It = DelayedFolds.find(Range.begin()); 375 if (It == DelayedFolds.end()) 376 return assignRole(Range, Role); 377 assert(It->second.End == Range.end()); 378 It->second.Role = Role; 379 } 380 381 void assignRole(llvm::ArrayRef<syntax::Token> Range, 382 syntax::NodeRole Role) { 383 assert(!Range.empty()); 384 auto It = Trees.lower_bound(Range.begin()); 385 assert(It != Trees.end() && "no node found"); 386 assert(It->first == Range.begin() && "no child with the specified range"); 387 assert((std::next(It) == Trees.end() || 388 std::next(It)->first == Range.end()) && 389 "no child with the specified range"); 390 assert(It->second->role() == NodeRole::Detached && 391 "re-assigning role for a child"); 392 It->second->setRole(Role); 393 } 394 395 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 396 void foldChildren(const syntax::Arena &A, 397 llvm::ArrayRef<syntax::Token> Tokens, 398 syntax::Tree *Node) { 399 // Execute delayed folds inside `Tokens`. 400 auto BeginFolds = DelayedFolds.lower_bound(Tokens.begin()); 401 auto EndFolds = BeginFolds; 402 for (; EndFolds != DelayedFolds.end() && 403 EndFolds->second.End <= Tokens.end(); 404 ++EndFolds) 405 ; 406 // We go in reverse order to ensure we fold deeper nodes first. 407 for (auto RevIt = EndFolds; RevIt != BeginFolds; --RevIt) { 408 auto It = std::prev(RevIt); 409 foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End), 410 It->second.Node); 411 } 412 DelayedFolds.erase(BeginFolds, EndFolds); 413 414 // Attach children to `Node`. 415 foldChildrenEager(A, Tokens, Node); 416 } 417 418 /// Schedule a call to `foldChildren` that will only be executed when 419 /// containing node is folded. The range of delayed nodes can be extended by 420 /// calling `extendDelayedFold`. Only one delayed node for each starting 421 /// token is allowed. 422 void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens, 423 syntax::Tree *Node) { 424 assert(!Tokens.empty()); 425 bool Inserted = 426 DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}}) 427 .second; 428 (void)Inserted; 429 assert(Inserted && "Multiple delayed folds start at the same token"); 430 } 431 432 /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends 433 /// its endpoint to `ExtendedRange.end()` and returns true. 434 /// Otherwise, returns false. 435 bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) { 436 assert(!ExtendedRange.empty()); 437 auto It = DelayedFolds.find(ExtendedRange.data()); 438 if (It == DelayedFolds.end()) 439 return false; 440 assert(It->second.End <= ExtendedRange.end()); 441 It->second.End = ExtendedRange.end(); 442 return true; 443 } 444 445 // EXPECTS: all tokens were consumed and are owned by a single root node. 446 syntax::Node *finalize() && { 447 assert(Trees.size() == 1); 448 auto *Root = Trees.begin()->second; 449 Trees = {}; 450 return Root; 451 } 452 453 std::string str(const syntax::Arena &A) const { 454 std::string R; 455 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 456 unsigned CoveredTokens = 457 It != Trees.end() 458 ? (std::next(It)->first - It->first) 459 : A.tokenBuffer().expandedTokens().end() - It->first; 460 461 R += std::string(llvm::formatv( 462 "- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), 463 It->first->text(A.sourceManager()), CoveredTokens)); 464 R += It->second->dump(A); 465 } 466 return R; 467 } 468 469 private: 470 /// Implementation detail of `foldChildren`, does acutal folding ignoring 471 /// delayed folds. 472 void foldChildrenEager(const syntax::Arena &A, 473 llvm::ArrayRef<syntax::Token> Tokens, 474 syntax::Tree *Node) { 475 assert(Node->firstChild() == nullptr && "node already has children"); 476 477 auto *FirstToken = Tokens.begin(); 478 auto BeginChildren = Trees.lower_bound(FirstToken); 479 assert((BeginChildren == Trees.end() || 480 BeginChildren->first == FirstToken) && 481 "fold crosses boundaries of existing subtrees"); 482 auto EndChildren = Trees.lower_bound(Tokens.end()); 483 assert( 484 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 485 "fold crosses boundaries of existing subtrees"); 486 487 // We need to go in reverse order, because we can only prepend. 488 for (auto It = EndChildren; It != BeginChildren; --It) { 489 auto *C = std::prev(It)->second; 490 if (C->role() == NodeRole::Detached) 491 C->setRole(NodeRole::Unknown); 492 Node->prependChildLowLevel(C); 493 } 494 495 // Mark that this node came from the AST and is backed by the source code. 496 Node->Original = true; 497 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); 498 499 Trees.erase(BeginChildren, EndChildren); 500 Trees.insert({FirstToken, Node}); 501 } 502 503 /// Maps from the start token to a subtree starting at that token. 504 /// Keys in the map are pointers into the array of expanded tokens, so 505 /// pointer order corresponds to the order of preprocessor tokens. 506 std::map<const syntax::Token *, syntax::Node *> Trees; 507 508 /// See documentation of `foldChildrenDelayed` for details. 509 struct DelayedFold { 510 const syntax::Token *End = nullptr; 511 syntax::Tree *Node = nullptr; 512 NodeRole Role = NodeRole::Unknown; 513 }; 514 std::map<const syntax::Token *, DelayedFold> DelayedFolds; 515 }; 516 517 /// For debugging purposes. 518 std::string str() { return Pending.str(Arena); } 519 520 syntax::Arena &Arena; 521 /// To quickly find tokens by their start location. 522 llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> 523 LocationToToken; 524 Forest Pending; 525 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 526 ASTToSyntaxMapping Mapping; 527 }; 528 529 namespace { 530 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 531 public: 532 explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) 533 : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} 534 535 bool shouldTraversePostOrder() const { return true; } 536 537 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { 538 // Ensure declarators are covered by SimpleDeclaration. 539 Builder.noticeDeclRange(Builder.getDeclRange(DD)); 540 541 // Build the declarator node. 542 SourceRange Initializer; 543 if (auto *V = llvm::dyn_cast<VarDecl>(DD)) { 544 auto *I = V->getInit(); 545 // Initializers in range-based-for are not part of the declarator 546 if (I && !V->isCXXForRangeDecl()) 547 Initializer = I->getSourceRange(); 548 } 549 auto Declarator = getDeclaratorRange( 550 Builder.sourceManager(), DD->getTypeSourceInfo()->getTypeLoc(), 551 getQualifiedNameStart(DD), Initializer); 552 if (Declarator.isValid()) { 553 auto *N = new (allocator()) syntax::SimpleDeclarator; 554 Builder.foldNode(Builder.getRange(Declarator), N, DD); 555 Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator); 556 } 557 558 return true; 559 } 560 561 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) { 562 // Ensure declarators are covered by SimpleDeclaration. 563 Builder.noticeDeclRange(Builder.getDeclRange(D)); 564 565 auto R = getDeclaratorRange( 566 Builder.sourceManager(), D->getTypeSourceInfo()->getTypeLoc(), 567 /*Name=*/D->getLocation(), /*Initializer=*/SourceRange()); 568 if (R.isValid()) { 569 auto *N = new (allocator()) syntax::SimpleDeclarator; 570 Builder.foldNode(Builder.getRange(R), N, D); 571 Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator); 572 } 573 return true; 574 } 575 576 bool VisitDecl(Decl *D) { 577 assert(!D->isImplicit()); 578 Builder.foldNode(Builder.getDeclRange(D), 579 new (allocator()) syntax::UnknownDeclaration(), D); 580 return true; 581 } 582 583 // RAV does not call WalkUpFrom* on explicit instantiations, so we have to 584 // override Traverse. 585 // FIXME: make RAV call WalkUpFrom* instead. 586 bool 587 TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { 588 if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) 589 return false; 590 if (C->isExplicitSpecialization()) 591 return true; // we are only interested in explicit instantiations. 592 auto *Declaration = 593 cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); 594 foldExplicitTemplateInstantiation( 595 Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), 596 Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); 597 return true; 598 } 599 600 bool WalkUpFromTemplateDecl(TemplateDecl *S) { 601 foldTemplateDeclaration( 602 Builder.getDeclRange(S), 603 Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), 604 Builder.getDeclRange(S->getTemplatedDecl()), S); 605 return true; 606 } 607 608 bool WalkUpFromTagDecl(TagDecl *C) { 609 // FIXME: build the ClassSpecifier node. 610 if (!C->isFreeStanding()) { 611 assert(C->getNumTemplateParameterLists() == 0); 612 return true; 613 } 614 handleFreeStandingTagDecl(C); 615 return true; 616 } 617 618 syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { 619 assert(C->isFreeStanding()); 620 // Class is a declaration specifier and needs a spanning declaration node. 621 auto DeclarationRange = Builder.getDeclRange(C); 622 syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; 623 Builder.foldNode(DeclarationRange, Result, nullptr); 624 625 // Build TemplateDeclaration nodes if we had template parameters. 626 auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { 627 const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); 628 auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); 629 Result = 630 foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); 631 DeclarationRange = R; 632 }; 633 if (auto *S = llvm::dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) 634 ConsumeTemplateParameters(*S->getTemplateParameters()); 635 for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) 636 ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); 637 return Result; 638 } 639 640 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 641 // We do not want to call VisitDecl(), the declaration for translation 642 // unit is built by finalize(). 643 return true; 644 } 645 646 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 647 using NodeRole = syntax::NodeRole; 648 649 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 650 for (auto *Child : S->body()) 651 Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 652 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 653 654 Builder.foldNode(Builder.getStmtRange(S), 655 new (allocator()) syntax::CompoundStatement, S); 656 return true; 657 } 658 659 // Some statements are not yet handled by syntax trees. 660 bool WalkUpFromStmt(Stmt *S) { 661 Builder.foldNode(Builder.getStmtRange(S), 662 new (allocator()) syntax::UnknownStatement, S); 663 return true; 664 } 665 666 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 667 // We override to traverse range initializer as VarDecl. 668 // RAV traverses it as a statement, we produce invalid node kinds in that 669 // case. 670 // FIXME: should do this in RAV instead? 671 if (S->getInit() && !TraverseStmt(S->getInit())) 672 return false; 673 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 674 return false; 675 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 676 return false; 677 if (S->getBody() && !TraverseStmt(S->getBody())) 678 return false; 679 return true; 680 } 681 682 bool TraverseStmt(Stmt *S) { 683 if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) { 684 // We want to consume the semicolon, make sure SimpleDeclaration does not. 685 for (auto *D : DS->decls()) 686 Builder.noticeDeclWithoutSemicolon(D); 687 } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { 688 // Do not recurse into subexpressions. 689 // We do not have syntax trees for expressions yet, so we only want to see 690 // the first top-level expression. 691 return WalkUpFromExpr(E->IgnoreImplicit()); 692 } 693 return RecursiveASTVisitor::TraverseStmt(S); 694 } 695 696 // Some expressions are not yet handled by syntax trees. 697 bool WalkUpFromExpr(Expr *E) { 698 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 699 Builder.foldNode(Builder.getExprRange(E), 700 new (allocator()) syntax::UnknownExpression, E); 701 return true; 702 } 703 704 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 705 auto Tokens = Builder.getDeclRange(S); 706 if (Tokens.front().kind() == tok::coloncolon) { 707 // Handle nested namespace definitions. Those start at '::' token, e.g. 708 // namespace a^::b {} 709 // FIXME: build corresponding nodes for the name of this namespace. 710 return true; 711 } 712 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); 713 return true; 714 } 715 716 bool TraverseParenTypeLoc(ParenTypeLoc L) { 717 // We reverse order of traversal to get the proper syntax structure. 718 if (!WalkUpFromParenTypeLoc(L)) 719 return false; 720 return TraverseTypeLoc(L.getInnerLoc()); 721 } 722 723 bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { 724 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 725 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 726 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), 727 new (allocator()) syntax::ParenDeclarator, L); 728 return true; 729 } 730 731 // Declarator chunks, they are produced by type locs and some clang::Decls. 732 bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { 733 Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); 734 Builder.markExprChild(L.getSizeExpr(), 735 syntax::NodeRole::ArraySubscript_sizeExpression); 736 Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); 737 Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), 738 new (allocator()) syntax::ArraySubscript, L); 739 return true; 740 } 741 742 bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { 743 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 744 for (auto *P : L.getParams()) 745 Builder.markDelayedChild( 746 Builder.getDeclRange(P), 747 syntax::NodeRole::ParametersAndQualifiers_parameter); 748 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 749 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), 750 new (allocator()) syntax::ParametersAndQualifiers, L); 751 return true; 752 } 753 754 bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { 755 if (!L.getTypePtr()->hasTrailingReturn()) 756 return WalkUpFromFunctionTypeLoc(L); 757 758 auto TrailingReturnTokens = BuildTrailingReturn(L); 759 // Finish building the node for parameters. 760 Builder.markChild(TrailingReturnTokens, 761 syntax::NodeRole::ParametersAndQualifiers_trailingReturn); 762 return WalkUpFromFunctionTypeLoc(L); 763 } 764 765 bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { 766 auto SR = L.getLocalSourceRange(); 767 Builder.foldNode(Builder.getRange(SR), 768 new (allocator()) syntax::MemberPointer, L); 769 return true; 770 } 771 772 // The code below is very regular, it could even be generated with some 773 // preprocessor magic. We merely assign roles to the corresponding children 774 // and fold resulting nodes. 775 bool WalkUpFromDeclStmt(DeclStmt *S) { 776 Builder.foldNode(Builder.getStmtRange(S), 777 new (allocator()) syntax::DeclarationStatement, S); 778 return true; 779 } 780 781 bool WalkUpFromNullStmt(NullStmt *S) { 782 Builder.foldNode(Builder.getStmtRange(S), 783 new (allocator()) syntax::EmptyStatement, S); 784 return true; 785 } 786 787 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 788 Builder.markChildToken(S->getSwitchLoc(), 789 syntax::NodeRole::IntroducerKeyword); 790 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 791 Builder.foldNode(Builder.getStmtRange(S), 792 new (allocator()) syntax::SwitchStatement, S); 793 return true; 794 } 795 796 bool WalkUpFromCaseStmt(CaseStmt *S) { 797 Builder.markChildToken(S->getKeywordLoc(), 798 syntax::NodeRole::IntroducerKeyword); 799 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 800 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 801 Builder.foldNode(Builder.getStmtRange(S), 802 new (allocator()) syntax::CaseStatement, S); 803 return true; 804 } 805 806 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 807 Builder.markChildToken(S->getKeywordLoc(), 808 syntax::NodeRole::IntroducerKeyword); 809 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 810 Builder.foldNode(Builder.getStmtRange(S), 811 new (allocator()) syntax::DefaultStatement, S); 812 return true; 813 } 814 815 bool WalkUpFromIfStmt(IfStmt *S) { 816 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 817 Builder.markStmtChild(S->getThen(), 818 syntax::NodeRole::IfStatement_thenStatement); 819 Builder.markChildToken(S->getElseLoc(), 820 syntax::NodeRole::IfStatement_elseKeyword); 821 Builder.markStmtChild(S->getElse(), 822 syntax::NodeRole::IfStatement_elseStatement); 823 Builder.foldNode(Builder.getStmtRange(S), 824 new (allocator()) syntax::IfStatement, S); 825 return true; 826 } 827 828 bool WalkUpFromForStmt(ForStmt *S) { 829 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 830 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 831 Builder.foldNode(Builder.getStmtRange(S), 832 new (allocator()) syntax::ForStatement, S); 833 return true; 834 } 835 836 bool WalkUpFromWhileStmt(WhileStmt *S) { 837 Builder.markChildToken(S->getWhileLoc(), 838 syntax::NodeRole::IntroducerKeyword); 839 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 840 Builder.foldNode(Builder.getStmtRange(S), 841 new (allocator()) syntax::WhileStatement, S); 842 return true; 843 } 844 845 bool WalkUpFromContinueStmt(ContinueStmt *S) { 846 Builder.markChildToken(S->getContinueLoc(), 847 syntax::NodeRole::IntroducerKeyword); 848 Builder.foldNode(Builder.getStmtRange(S), 849 new (allocator()) syntax::ContinueStatement, S); 850 return true; 851 } 852 853 bool WalkUpFromBreakStmt(BreakStmt *S) { 854 Builder.markChildToken(S->getBreakLoc(), 855 syntax::NodeRole::IntroducerKeyword); 856 Builder.foldNode(Builder.getStmtRange(S), 857 new (allocator()) syntax::BreakStatement, S); 858 return true; 859 } 860 861 bool WalkUpFromReturnStmt(ReturnStmt *S) { 862 Builder.markChildToken(S->getReturnLoc(), 863 syntax::NodeRole::IntroducerKeyword); 864 Builder.markExprChild(S->getRetValue(), 865 syntax::NodeRole::ReturnStatement_value); 866 Builder.foldNode(Builder.getStmtRange(S), 867 new (allocator()) syntax::ReturnStatement, S); 868 return true; 869 } 870 871 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 872 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 873 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 874 Builder.foldNode(Builder.getStmtRange(S), 875 new (allocator()) syntax::RangeBasedForStatement, S); 876 return true; 877 } 878 879 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 880 Builder.foldNode(Builder.getDeclRange(S), 881 new (allocator()) syntax::EmptyDeclaration, S); 882 return true; 883 } 884 885 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 886 Builder.markExprChild(S->getAssertExpr(), 887 syntax::NodeRole::StaticAssertDeclaration_condition); 888 Builder.markExprChild(S->getMessage(), 889 syntax::NodeRole::StaticAssertDeclaration_message); 890 Builder.foldNode(Builder.getDeclRange(S), 891 new (allocator()) syntax::StaticAssertDeclaration, S); 892 return true; 893 } 894 895 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 896 Builder.foldNode(Builder.getDeclRange(S), 897 new (allocator()) syntax::LinkageSpecificationDeclaration, 898 S); 899 return true; 900 } 901 902 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 903 Builder.foldNode(Builder.getDeclRange(S), 904 new (allocator()) syntax::NamespaceAliasDefinition, S); 905 return true; 906 } 907 908 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 909 Builder.foldNode(Builder.getDeclRange(S), 910 new (allocator()) syntax::UsingNamespaceDirective, S); 911 return true; 912 } 913 914 bool WalkUpFromUsingDecl(UsingDecl *S) { 915 Builder.foldNode(Builder.getDeclRange(S), 916 new (allocator()) syntax::UsingDeclaration, S); 917 return true; 918 } 919 920 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 921 Builder.foldNode(Builder.getDeclRange(S), 922 new (allocator()) syntax::UsingDeclaration, S); 923 return true; 924 } 925 926 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 927 Builder.foldNode(Builder.getDeclRange(S), 928 new (allocator()) syntax::UsingDeclaration, S); 929 return true; 930 } 931 932 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 933 Builder.foldNode(Builder.getDeclRange(S), 934 new (allocator()) syntax::TypeAliasDeclaration, S); 935 return true; 936 } 937 938 private: 939 /// Returns the range of the built node. 940 syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) { 941 assert(L.getTypePtr()->hasTrailingReturn()); 942 943 auto ReturnedType = L.getReturnLoc(); 944 // Build node for the declarator, if any. 945 auto ReturnDeclaratorRange = 946 getDeclaratorRange(this->Builder.sourceManager(), ReturnedType, 947 /*Name=*/SourceLocation(), 948 /*Initializer=*/SourceLocation()); 949 syntax::SimpleDeclarator *ReturnDeclarator = nullptr; 950 if (ReturnDeclaratorRange.isValid()) { 951 ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; 952 Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), 953 ReturnDeclarator, nullptr); 954 } 955 956 // Build node for trailing return type. 957 auto Return = Builder.getRange(ReturnedType.getSourceRange()); 958 const auto *Arrow = Return.begin() - 1; 959 assert(Arrow->kind() == tok::arrow); 960 auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); 961 Builder.markChildToken(Arrow, syntax::NodeRole::TrailingReturnType_arrow); 962 if (ReturnDeclarator) 963 Builder.markChild(ReturnDeclarator, 964 syntax::NodeRole::TrailingReturnType_declarator); 965 auto *R = new (allocator()) syntax::TrailingReturnType; 966 Builder.foldNode(Tokens, R, nullptr); 967 return R; 968 } 969 970 void foldExplicitTemplateInstantiation( 971 ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, 972 const syntax::Token *TemplateKW, 973 syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { 974 assert(!ExternKW || ExternKW->kind() == tok::kw_extern); 975 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 976 Builder.markChildToken( 977 ExternKW, 978 syntax::NodeRole::ExplicitTemplateInstantiation_externKeyword); 979 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 980 Builder.markChild( 981 InnerDeclaration, 982 syntax::NodeRole::ExplicitTemplateInstantiation_declaration); 983 Builder.foldNode( 984 Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); 985 } 986 987 syntax::TemplateDeclaration *foldTemplateDeclaration( 988 ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, 989 ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { 990 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 991 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 992 Builder.markMaybeDelayedChild( 993 TemplatedDeclaration, 994 syntax::NodeRole::TemplateDeclaration_declaration); 995 996 auto *N = new (allocator()) syntax::TemplateDeclaration; 997 Builder.foldNode(Range, N, From); 998 return N; 999 } 1000 1001 /// A small helper to save some typing. 1002 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 1003 1004 syntax::TreeBuilder &Builder; 1005 const LangOptions &LangOpts; 1006 }; 1007 } // namespace 1008 1009 void syntax::TreeBuilder::noticeDeclRange(llvm::ArrayRef<syntax::Token> Range) { 1010 if (Pending.extendDelayedFold(Range)) 1011 return; 1012 Pending.foldChildrenDelayed(Range, 1013 new (allocator()) syntax::SimpleDeclaration); 1014 } 1015 1016 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { 1017 DeclsWithoutSemicolons.insert(D); 1018 } 1019 1020 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 1021 if (Loc.isInvalid()) 1022 return; 1023 Pending.assignRole(*findToken(Loc), Role); 1024 } 1025 1026 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { 1027 if (!T) 1028 return; 1029 Pending.assignRole(*T, R); 1030 } 1031 1032 void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { 1033 assert(N); 1034 setRole(N, R); 1035 } 1036 1037 void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { 1038 auto *SN = Mapping.find(N); 1039 assert(SN != nullptr); 1040 setRole(SN, R); 1041 } 1042 1043 void syntax::TreeBuilder::markDelayedChild(llvm::ArrayRef<syntax::Token> Range, 1044 NodeRole R) { 1045 Pending.assignRoleDelayed(Range, R); 1046 } 1047 1048 void syntax::TreeBuilder::markMaybeDelayedChild( 1049 llvm::ArrayRef<syntax::Token> Range, NodeRole R) { 1050 Pending.assignRoleMaybeDelayed(Range, R); 1051 } 1052 1053 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 1054 if (!Child) 1055 return; 1056 1057 syntax::Tree *ChildNode = Mapping.find(Child); 1058 assert(ChildNode != nullptr); 1059 1060 // This is an expression in a statement position, consume the trailing 1061 // semicolon and form an 'ExpressionStatement' node. 1062 if (isa<Expr>(Child)) { 1063 setRole(ChildNode, NodeRole::ExpressionStatement_expression); 1064 ChildNode = new (allocator()) syntax::ExpressionStatement; 1065 // (!) 'getStmtRange()' ensures this covers a trailing semicolon. 1066 Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); 1067 } 1068 setRole(ChildNode, Role); 1069 } 1070 1071 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 1072 if (!Child) 1073 return; 1074 Child = Child->IgnoreImplicit(); 1075 1076 syntax::Tree *ChildNode = Mapping.find(Child); 1077 assert(ChildNode != nullptr); 1078 setRole(ChildNode, Role); 1079 } 1080 1081 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 1082 if (L.isInvalid()) 1083 return nullptr; 1084 auto It = LocationToToken.find(L.getRawEncoding()); 1085 assert(It != LocationToToken.end()); 1086 return It->second; 1087 } 1088 1089 syntax::TranslationUnit * 1090 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 1091 TreeBuilder Builder(A); 1092 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 1093 return std::move(Builder).finalize(); 1094 } 1095