1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/ASTFwd.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/DeclBase.h" 12 #include "clang/AST/DeclCXX.h" 13 #include "clang/AST/DeclarationName.h" 14 #include "clang/AST/RecursiveASTVisitor.h" 15 #include "clang/AST/Stmt.h" 16 #include "clang/AST/TypeLoc.h" 17 #include "clang/AST/TypeLocVisitor.h" 18 #include "clang/Basic/LLVM.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/Specifiers.h" 22 #include "clang/Basic/TokenKinds.h" 23 #include "clang/Lex/Lexer.h" 24 #include "clang/Tooling/Syntax/Nodes.h" 25 #include "clang/Tooling/Syntax/Tokens.h" 26 #include "clang/Tooling/Syntax/Tree.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/PointerUnion.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/ScopeExit.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/Support/Allocator.h" 34 #include "llvm/Support/Casting.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cstddef> 40 #include <map> 41 42 using namespace clang; 43 44 LLVM_ATTRIBUTE_UNUSED 45 static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } 46 47 namespace { 48 /// Get start location of the Declarator from the TypeLoc. 49 /// E.g.: 50 /// loc of `(` in `int (a)` 51 /// loc of `*` in `int *(a)` 52 /// loc of the first `(` in `int (*a)(int)` 53 /// loc of the `*` in `int *(a)(int)` 54 /// loc of the first `*` in `const int *const *volatile a;` 55 /// 56 /// It is non-trivial to get the start location because TypeLocs are stored 57 /// inside out. In the example above `*volatile` is the TypeLoc returned 58 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` 59 /// returns. 60 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { 61 SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { 62 auto L = Visit(T.getInnerLoc()); 63 if (L.isValid()) 64 return L; 65 return T.getLParenLoc(); 66 } 67 68 // Types spelled in the prefix part of the declarator. 69 SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { 70 return HandlePointer(T); 71 } 72 73 SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { 74 return HandlePointer(T); 75 } 76 77 SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { 78 return HandlePointer(T); 79 } 80 81 SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { 82 return HandlePointer(T); 83 } 84 85 SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { 86 return HandlePointer(T); 87 } 88 89 // All other cases are not important, as they are either part of declaration 90 // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on 91 // existing declarators (e.g. QualifiedTypeLoc). They cannot start the 92 // declarator themselves, but their underlying type can. 93 SourceLocation VisitTypeLoc(TypeLoc T) { 94 auto N = T.getNextTypeLoc(); 95 if (!N) 96 return SourceLocation(); 97 return Visit(N); 98 } 99 100 SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { 101 if (T.getTypePtr()->hasTrailingReturn()) 102 return SourceLocation(); // avoid recursing into the suffix of declarator. 103 return VisitTypeLoc(T); 104 } 105 106 private: 107 template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { 108 auto L = Visit(T.getPointeeLoc()); 109 if (L.isValid()) 110 return L; 111 return T.getLocalSourceRange().getBegin(); 112 } 113 }; 114 } // namespace 115 116 /// Gets the range of declarator as defined by the C++ grammar. E.g. 117 /// `int a;` -> range of `a`, 118 /// `int *a;` -> range of `*a`, 119 /// `int a[10];` -> range of `a[10]`, 120 /// `int a[1][2][3];` -> range of `a[1][2][3]`, 121 /// `int *a = nullptr` -> range of `*a = nullptr`. 122 /// FIMXE: \p Name must be a source range, e.g. for `operator+`. 123 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, 124 SourceLocation Name, 125 SourceRange Initializer) { 126 SourceLocation Start = GetStartLoc().Visit(T); 127 SourceLocation End = T.getSourceRange().getEnd(); 128 assert(End.isValid()); 129 if (Name.isValid()) { 130 if (Start.isInvalid()) 131 Start = Name; 132 if (SM.isBeforeInTranslationUnit(End, Name)) 133 End = Name; 134 } 135 if (Initializer.isValid()) { 136 auto InitializerEnd = Initializer.getEnd(); 137 assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || End == InitializerEnd); 138 End = InitializerEnd; 139 } 140 return SourceRange(Start, End); 141 } 142 143 namespace { 144 /// All AST hierarchy roots that can be represented as pointers. 145 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; 146 /// Maintains a mapping from AST to syntax tree nodes. This class will get more 147 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs. 148 /// FIXME: expose this as public API. 149 class ASTToSyntaxMapping { 150 public: 151 void add(ASTPtr From, syntax::Tree *To) { 152 assert(To != nullptr); 153 assert(!From.isNull()); 154 155 bool Added = Nodes.insert({From, To}).second; 156 (void)Added; 157 assert(Added && "mapping added twice"); 158 } 159 160 syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } 161 162 private: 163 llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; 164 }; 165 } // namespace 166 167 /// A helper class for constructing the syntax tree while traversing a clang 168 /// AST. 169 /// 170 /// At each point of the traversal we maintain a list of pending nodes. 171 /// Initially all tokens are added as pending nodes. When processing a clang AST 172 /// node, the clients need to: 173 /// - create a corresponding syntax node, 174 /// - assign roles to all pending child nodes with 'markChild' and 175 /// 'markChildToken', 176 /// - replace the child nodes with the new syntax node in the pending list 177 /// with 'foldNode'. 178 /// 179 /// Note that all children are expected to be processed when building a node. 180 /// 181 /// Call finalize() to finish building the tree and consume the root node. 182 class syntax::TreeBuilder { 183 public: 184 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { 185 for (const auto &T : Arena.tokenBuffer().expandedTokens()) 186 LocationToToken.insert({T.location().getRawEncoding(), &T}); 187 } 188 189 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 190 const SourceManager &sourceManager() const { return Arena.sourceManager(); } 191 192 /// Populate children for \p New node, assuming it covers tokens from \p 193 /// Range. 194 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 195 ASTPtr From) { 196 assert(New); 197 Pending.foldChildren(Arena, Range, New); 198 if (From) 199 Mapping.add(From, New); 200 } 201 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 202 TypeLoc L) { 203 // FIXME: add mapping for TypeLocs 204 foldNode(Range, New, nullptr); 205 } 206 207 /// Notifies that we should not consume trailing semicolon when computing 208 /// token range of \p D. 209 void noticeDeclWithoutSemicolon(Decl *D); 210 211 /// Mark the \p Child node with a corresponding \p Role. All marked children 212 /// should be consumed by foldNode. 213 /// When called on expressions (clang::Expr is derived from clang::Stmt), 214 /// wraps expressions into expression statement. 215 void markStmtChild(Stmt *Child, NodeRole Role); 216 /// Should be called for expressions in non-statement position to avoid 217 /// wrapping into expression statement. 218 void markExprChild(Expr *Child, NodeRole Role); 219 /// Set role for a token starting at \p Loc. 220 void markChildToken(SourceLocation Loc, NodeRole R); 221 /// Set role for \p T. 222 void markChildToken(const syntax::Token *T, NodeRole R); 223 224 /// Set role for \p N. 225 void markChild(syntax::Node *N, NodeRole R); 226 /// Set role for the syntax node matching \p N. 227 void markChild(ASTPtr N, NodeRole R); 228 229 /// Finish building the tree and consume the root node. 230 syntax::TranslationUnit *finalize() && { 231 auto Tokens = Arena.tokenBuffer().expandedTokens(); 232 assert(!Tokens.empty()); 233 assert(Tokens.back().kind() == tok::eof); 234 235 // Build the root of the tree, consuming all the children. 236 Pending.foldChildren(Arena, Tokens.drop_back(), 237 new (Arena.allocator()) syntax::TranslationUnit); 238 239 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 240 TU->assertInvariantsRecursive(); 241 return TU; 242 } 243 244 /// Finds a token starting at \p L. The token must exist if \p L is valid. 245 const syntax::Token *findToken(SourceLocation L) const; 246 247 /// Finds the syntax tokens corresponding to the \p SourceRange. 248 llvm::ArrayRef<syntax::Token> getRange(SourceRange Range) const { 249 assert(Range.isValid()); 250 return getRange(Range.getBegin(), Range.getEnd()); 251 } 252 253 /// Finds the syntax tokens corresponding to the passed source locations. 254 /// \p First is the start position of the first token and \p Last is the start 255 /// position of the last token. 256 llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 257 SourceLocation Last) const { 258 assert(First.isValid()); 259 assert(Last.isValid()); 260 assert(First == Last || 261 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 262 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 263 } 264 265 llvm::ArrayRef<syntax::Token> 266 getTemplateRange(const ClassTemplateSpecializationDecl *D) const { 267 auto Tokens = getRange(D->getSourceRange()); 268 return maybeAppendSemicolon(Tokens, D); 269 } 270 271 /// Returns true if \p D is the last declarator in a chain and is thus 272 /// reponsible for creating SimpleDeclaration for the whole chain. 273 template <class T> 274 bool isResponsibleForCreatingDeclaration(const T *D) const { 275 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 276 std::is_base_of<TypedefNameDecl, T>::value), 277 "only DeclaratorDecl and TypedefNameDecl are supported."); 278 279 const Decl *Next = D->getNextDeclInContext(); 280 281 // There's no next sibling, this one is responsible. 282 if (Next == nullptr) { 283 return true; 284 } 285 const auto *NextT = llvm::dyn_cast<T>(Next); 286 287 // Next sibling is not the same type, this one is responsible. 288 if (NextT == nullptr) { 289 return true; 290 } 291 // Next sibling doesn't begin at the same loc, it must be a different 292 // declaration, so this declarator is responsible. 293 if (NextT->getBeginLoc() != D->getBeginLoc()) { 294 return true; 295 } 296 297 // NextT is a member of the same declaration, and we need the last member to 298 // create declaration. This one is not responsible. 299 return false; 300 } 301 302 llvm::ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { 303 llvm::ArrayRef<clang::syntax::Token> Tokens; 304 // We want to drop the template parameters for specializations. 305 if (const auto *S = llvm::dyn_cast<TagDecl>(D)) 306 Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); 307 else 308 Tokens = getRange(D->getSourceRange()); 309 return maybeAppendSemicolon(Tokens, D); 310 } 311 312 llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 313 return getRange(E->getSourceRange()); 314 } 315 316 /// Find the adjusted range for the statement, consuming the trailing 317 /// semicolon when needed. 318 llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 319 auto Tokens = getRange(S->getSourceRange()); 320 if (isa<CompoundStmt>(S)) 321 return Tokens; 322 323 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 324 // all statements that end with those. Consume this semicolon here. 325 if (Tokens.back().kind() == tok::semi) 326 return Tokens; 327 return withTrailingSemicolon(Tokens); 328 } 329 330 private: 331 llvm::ArrayRef<syntax::Token> 332 maybeAppendSemicolon(llvm::ArrayRef<syntax::Token> Tokens, 333 const Decl *D) const { 334 if (llvm::isa<NamespaceDecl>(D)) 335 return Tokens; 336 if (DeclsWithoutSemicolons.count(D)) 337 return Tokens; 338 // FIXME: do not consume trailing semicolon on function definitions. 339 // Most declarations own a semicolon in syntax trees, but not in clang AST. 340 return withTrailingSemicolon(Tokens); 341 } 342 343 llvm::ArrayRef<syntax::Token> 344 withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const { 345 assert(!Tokens.empty()); 346 assert(Tokens.back().kind() != tok::eof); 347 // We never consume 'eof', so looking at the next token is ok. 348 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 349 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 350 return Tokens; 351 } 352 353 void setRole(syntax::Node *N, NodeRole R) { 354 assert(N->role() == NodeRole::Detached); 355 N->setRole(R); 356 } 357 358 /// A collection of trees covering the input tokens. 359 /// When created, each tree corresponds to a single token in the file. 360 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 361 /// node and update the list of trees accordingly. 362 /// 363 /// Ensures that added nodes properly nest and cover the whole token stream. 364 struct Forest { 365 Forest(syntax::Arena &A) { 366 assert(!A.tokenBuffer().expandedTokens().empty()); 367 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 368 // Create all leaf nodes. 369 // Note that we do not have 'eof' in the tree. 370 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { 371 auto *L = new (A.allocator()) syntax::Leaf(&T); 372 L->Original = true; 373 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); 374 Trees.insert(Trees.end(), {&T, L}); 375 } 376 } 377 378 void assignRole(llvm::ArrayRef<syntax::Token> Range, 379 syntax::NodeRole Role) { 380 assert(!Range.empty()); 381 auto It = Trees.lower_bound(Range.begin()); 382 assert(It != Trees.end() && "no node found"); 383 assert(It->first == Range.begin() && "no child with the specified range"); 384 assert((std::next(It) == Trees.end() || 385 std::next(It)->first == Range.end()) && 386 "no child with the specified range"); 387 assert(It->second->role() == NodeRole::Detached && 388 "re-assigning role for a child"); 389 It->second->setRole(Role); 390 } 391 392 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 393 void foldChildren(const syntax::Arena &A, 394 llvm::ArrayRef<syntax::Token> Tokens, 395 syntax::Tree *Node) { 396 // Attach children to `Node`. 397 assert(Node->firstChild() == nullptr && "node already has children"); 398 399 auto *FirstToken = Tokens.begin(); 400 auto BeginChildren = Trees.lower_bound(FirstToken); 401 402 assert((BeginChildren == Trees.end() || 403 BeginChildren->first == FirstToken) && 404 "fold crosses boundaries of existing subtrees"); 405 auto EndChildren = Trees.lower_bound(Tokens.end()); 406 assert( 407 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 408 "fold crosses boundaries of existing subtrees"); 409 410 // We need to go in reverse order, because we can only prepend. 411 for (auto It = EndChildren; It != BeginChildren; --It) { 412 auto *C = std::prev(It)->second; 413 if (C->role() == NodeRole::Detached) 414 C->setRole(NodeRole::Unknown); 415 Node->prependChildLowLevel(C); 416 } 417 418 // Mark that this node came from the AST and is backed by the source code. 419 Node->Original = true; 420 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); 421 422 Trees.erase(BeginChildren, EndChildren); 423 Trees.insert({FirstToken, Node}); 424 } 425 426 // EXPECTS: all tokens were consumed and are owned by a single root node. 427 syntax::Node *finalize() && { 428 assert(Trees.size() == 1); 429 auto *Root = Trees.begin()->second; 430 Trees = {}; 431 return Root; 432 } 433 434 std::string str(const syntax::Arena &A) const { 435 std::string R; 436 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 437 unsigned CoveredTokens = 438 It != Trees.end() 439 ? (std::next(It)->first - It->first) 440 : A.tokenBuffer().expandedTokens().end() - It->first; 441 442 R += std::string(llvm::formatv( 443 "- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), 444 It->first->text(A.sourceManager()), CoveredTokens)); 445 R += It->second->dump(A); 446 } 447 return R; 448 } 449 450 private: 451 /// Maps from the start token to a subtree starting at that token. 452 /// Keys in the map are pointers into the array of expanded tokens, so 453 /// pointer order corresponds to the order of preprocessor tokens. 454 std::map<const syntax::Token *, syntax::Node *> Trees; 455 }; 456 457 /// For debugging purposes. 458 std::string str() { return Pending.str(Arena); } 459 460 syntax::Arena &Arena; 461 /// To quickly find tokens by their start location. 462 llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> 463 LocationToToken; 464 Forest Pending; 465 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 466 ASTToSyntaxMapping Mapping; 467 }; 468 469 namespace { 470 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 471 public: 472 explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) 473 : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} 474 475 bool shouldTraversePostOrder() const { return true; } 476 477 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { 478 return processDeclaratorAndDeclaration(DD); 479 } 480 481 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { 482 return processDeclaratorAndDeclaration(TD); 483 } 484 485 bool VisitDecl(Decl *D) { 486 assert(!D->isImplicit()); 487 Builder.foldNode(Builder.getDeclarationRange(D), 488 new (allocator()) syntax::UnknownDeclaration(), D); 489 return true; 490 } 491 492 // RAV does not call WalkUpFrom* on explicit instantiations, so we have to 493 // override Traverse. 494 // FIXME: make RAV call WalkUpFrom* instead. 495 bool 496 TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { 497 if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) 498 return false; 499 if (C->isExplicitSpecialization()) 500 return true; // we are only interested in explicit instantiations. 501 auto *Declaration = 502 cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); 503 foldExplicitTemplateInstantiation( 504 Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), 505 Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); 506 return true; 507 } 508 509 bool WalkUpFromTemplateDecl(TemplateDecl *S) { 510 foldTemplateDeclaration( 511 Builder.getDeclarationRange(S), 512 Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), 513 Builder.getDeclarationRange(S->getTemplatedDecl()), S); 514 return true; 515 } 516 517 bool WalkUpFromTagDecl(TagDecl *C) { 518 // FIXME: build the ClassSpecifier node. 519 if (!C->isFreeStanding()) { 520 assert(C->getNumTemplateParameterLists() == 0); 521 return true; 522 } 523 handleFreeStandingTagDecl(C); 524 return true; 525 } 526 527 syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { 528 assert(C->isFreeStanding()); 529 // Class is a declaration specifier and needs a spanning declaration node. 530 auto DeclarationRange = Builder.getDeclarationRange(C); 531 syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; 532 Builder.foldNode(DeclarationRange, Result, nullptr); 533 534 // Build TemplateDeclaration nodes if we had template parameters. 535 auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { 536 const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); 537 auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); 538 Result = 539 foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); 540 DeclarationRange = R; 541 }; 542 if (auto *S = llvm::dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) 543 ConsumeTemplateParameters(*S->getTemplateParameters()); 544 for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) 545 ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); 546 return Result; 547 } 548 549 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 550 // We do not want to call VisitDecl(), the declaration for translation 551 // unit is built by finalize(). 552 return true; 553 } 554 555 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 556 using NodeRole = syntax::NodeRole; 557 558 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 559 for (auto *Child : S->body()) 560 Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 561 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 562 563 Builder.foldNode(Builder.getStmtRange(S), 564 new (allocator()) syntax::CompoundStatement, S); 565 return true; 566 } 567 568 // Some statements are not yet handled by syntax trees. 569 bool WalkUpFromStmt(Stmt *S) { 570 Builder.foldNode(Builder.getStmtRange(S), 571 new (allocator()) syntax::UnknownStatement, S); 572 return true; 573 } 574 575 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 576 // We override to traverse range initializer as VarDecl. 577 // RAV traverses it as a statement, we produce invalid node kinds in that 578 // case. 579 // FIXME: should do this in RAV instead? 580 if (S->getInit() && !TraverseStmt(S->getInit())) 581 return false; 582 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 583 return false; 584 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 585 return false; 586 if (S->getBody() && !TraverseStmt(S->getBody())) 587 return false; 588 return true; 589 } 590 591 bool TraverseStmt(Stmt *S) { 592 if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) { 593 // We want to consume the semicolon, make sure SimpleDeclaration does not. 594 for (auto *D : DS->decls()) 595 Builder.noticeDeclWithoutSemicolon(D); 596 } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { 597 // Do not recurse into subexpressions. 598 // We do not have syntax trees for expressions yet, so we only want to see 599 // the first top-level expression. 600 return WalkUpFromExpr(E->IgnoreImplicit()); 601 } 602 return RecursiveASTVisitor::TraverseStmt(S); 603 } 604 605 // Some expressions are not yet handled by syntax trees. 606 bool WalkUpFromExpr(Expr *E) { 607 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 608 Builder.foldNode(Builder.getExprRange(E), 609 new (allocator()) syntax::UnknownExpression, E); 610 return true; 611 } 612 613 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 614 auto Tokens = Builder.getDeclarationRange(S); 615 if (Tokens.front().kind() == tok::coloncolon) { 616 // Handle nested namespace definitions. Those start at '::' token, e.g. 617 // namespace a^::b {} 618 // FIXME: build corresponding nodes for the name of this namespace. 619 return true; 620 } 621 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); 622 return true; 623 } 624 625 bool TraverseParenTypeLoc(ParenTypeLoc L) { 626 // We reverse order of traversal to get the proper syntax structure. 627 if (!WalkUpFromParenTypeLoc(L)) 628 return false; 629 return TraverseTypeLoc(L.getInnerLoc()); 630 } 631 632 bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { 633 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 634 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 635 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), 636 new (allocator()) syntax::ParenDeclarator, L); 637 return true; 638 } 639 640 // Declarator chunks, they are produced by type locs and some clang::Decls. 641 bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { 642 Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); 643 Builder.markExprChild(L.getSizeExpr(), 644 syntax::NodeRole::ArraySubscript_sizeExpression); 645 Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); 646 Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), 647 new (allocator()) syntax::ArraySubscript, L); 648 return true; 649 } 650 651 bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { 652 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 653 for (auto *P : L.getParams()) { 654 Builder.markChild(P, syntax::NodeRole::ParametersAndQualifiers_parameter); 655 } 656 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 657 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), 658 new (allocator()) syntax::ParametersAndQualifiers, L); 659 return true; 660 } 661 662 bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { 663 if (!L.getTypePtr()->hasTrailingReturn()) 664 return WalkUpFromFunctionTypeLoc(L); 665 666 auto *TrailingReturnTokens = BuildTrailingReturn(L); 667 // Finish building the node for parameters. 668 Builder.markChild(TrailingReturnTokens, 669 syntax::NodeRole::ParametersAndQualifiers_trailingReturn); 670 return WalkUpFromFunctionTypeLoc(L); 671 } 672 673 bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { 674 auto SR = L.getLocalSourceRange(); 675 Builder.foldNode(Builder.getRange(SR), 676 new (allocator()) syntax::MemberPointer, L); 677 return true; 678 } 679 680 // The code below is very regular, it could even be generated with some 681 // preprocessor magic. We merely assign roles to the corresponding children 682 // and fold resulting nodes. 683 bool WalkUpFromDeclStmt(DeclStmt *S) { 684 Builder.foldNode(Builder.getStmtRange(S), 685 new (allocator()) syntax::DeclarationStatement, S); 686 return true; 687 } 688 689 bool WalkUpFromNullStmt(NullStmt *S) { 690 Builder.foldNode(Builder.getStmtRange(S), 691 new (allocator()) syntax::EmptyStatement, S); 692 return true; 693 } 694 695 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 696 Builder.markChildToken(S->getSwitchLoc(), 697 syntax::NodeRole::IntroducerKeyword); 698 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 699 Builder.foldNode(Builder.getStmtRange(S), 700 new (allocator()) syntax::SwitchStatement, S); 701 return true; 702 } 703 704 bool WalkUpFromCaseStmt(CaseStmt *S) { 705 Builder.markChildToken(S->getKeywordLoc(), 706 syntax::NodeRole::IntroducerKeyword); 707 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 708 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 709 Builder.foldNode(Builder.getStmtRange(S), 710 new (allocator()) syntax::CaseStatement, S); 711 return true; 712 } 713 714 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 715 Builder.markChildToken(S->getKeywordLoc(), 716 syntax::NodeRole::IntroducerKeyword); 717 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 718 Builder.foldNode(Builder.getStmtRange(S), 719 new (allocator()) syntax::DefaultStatement, S); 720 return true; 721 } 722 723 bool WalkUpFromIfStmt(IfStmt *S) { 724 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 725 Builder.markStmtChild(S->getThen(), 726 syntax::NodeRole::IfStatement_thenStatement); 727 Builder.markChildToken(S->getElseLoc(), 728 syntax::NodeRole::IfStatement_elseKeyword); 729 Builder.markStmtChild(S->getElse(), 730 syntax::NodeRole::IfStatement_elseStatement); 731 Builder.foldNode(Builder.getStmtRange(S), 732 new (allocator()) syntax::IfStatement, S); 733 return true; 734 } 735 736 bool WalkUpFromForStmt(ForStmt *S) { 737 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 738 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 739 Builder.foldNode(Builder.getStmtRange(S), 740 new (allocator()) syntax::ForStatement, S); 741 return true; 742 } 743 744 bool WalkUpFromWhileStmt(WhileStmt *S) { 745 Builder.markChildToken(S->getWhileLoc(), 746 syntax::NodeRole::IntroducerKeyword); 747 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 748 Builder.foldNode(Builder.getStmtRange(S), 749 new (allocator()) syntax::WhileStatement, S); 750 return true; 751 } 752 753 bool WalkUpFromContinueStmt(ContinueStmt *S) { 754 Builder.markChildToken(S->getContinueLoc(), 755 syntax::NodeRole::IntroducerKeyword); 756 Builder.foldNode(Builder.getStmtRange(S), 757 new (allocator()) syntax::ContinueStatement, S); 758 return true; 759 } 760 761 bool WalkUpFromBreakStmt(BreakStmt *S) { 762 Builder.markChildToken(S->getBreakLoc(), 763 syntax::NodeRole::IntroducerKeyword); 764 Builder.foldNode(Builder.getStmtRange(S), 765 new (allocator()) syntax::BreakStatement, S); 766 return true; 767 } 768 769 bool WalkUpFromReturnStmt(ReturnStmt *S) { 770 Builder.markChildToken(S->getReturnLoc(), 771 syntax::NodeRole::IntroducerKeyword); 772 Builder.markExprChild(S->getRetValue(), 773 syntax::NodeRole::ReturnStatement_value); 774 Builder.foldNode(Builder.getStmtRange(S), 775 new (allocator()) syntax::ReturnStatement, S); 776 return true; 777 } 778 779 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 780 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 781 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 782 Builder.foldNode(Builder.getStmtRange(S), 783 new (allocator()) syntax::RangeBasedForStatement, S); 784 return true; 785 } 786 787 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 788 Builder.foldNode(Builder.getDeclarationRange(S), 789 new (allocator()) syntax::EmptyDeclaration, S); 790 return true; 791 } 792 793 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 794 Builder.markExprChild(S->getAssertExpr(), 795 syntax::NodeRole::StaticAssertDeclaration_condition); 796 Builder.markExprChild(S->getMessage(), 797 syntax::NodeRole::StaticAssertDeclaration_message); 798 Builder.foldNode(Builder.getDeclarationRange(S), 799 new (allocator()) syntax::StaticAssertDeclaration, S); 800 return true; 801 } 802 803 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 804 Builder.foldNode(Builder.getDeclarationRange(S), 805 new (allocator()) syntax::LinkageSpecificationDeclaration, 806 S); 807 return true; 808 } 809 810 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 811 Builder.foldNode(Builder.getDeclarationRange(S), 812 new (allocator()) syntax::NamespaceAliasDefinition, S); 813 return true; 814 } 815 816 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 817 Builder.foldNode(Builder.getDeclarationRange(S), 818 new (allocator()) syntax::UsingNamespaceDirective, S); 819 return true; 820 } 821 822 bool WalkUpFromUsingDecl(UsingDecl *S) { 823 Builder.foldNode(Builder.getDeclarationRange(S), 824 new (allocator()) syntax::UsingDeclaration, S); 825 return true; 826 } 827 828 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 829 Builder.foldNode(Builder.getDeclarationRange(S), 830 new (allocator()) syntax::UsingDeclaration, S); 831 return true; 832 } 833 834 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 835 Builder.foldNode(Builder.getDeclarationRange(S), 836 new (allocator()) syntax::UsingDeclaration, S); 837 return true; 838 } 839 840 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 841 Builder.foldNode(Builder.getDeclarationRange(S), 842 new (allocator()) syntax::TypeAliasDeclaration, S); 843 return true; 844 } 845 846 private: 847 template <class T> SourceLocation getQualifiedNameStart(T *D) { 848 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 849 std::is_base_of<TypedefNameDecl, T>::value), 850 "only DeclaratorDecl and TypedefNameDecl are supported."); 851 852 auto DN = D->getDeclName(); 853 bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); 854 if (IsAnonymous) 855 return SourceLocation(); 856 857 if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D)) { 858 if (DD->getQualifierLoc()) { 859 return DD->getQualifierLoc().getBeginLoc(); 860 } 861 } 862 863 return D->getLocation(); 864 } 865 866 SourceRange getInitializerRange(Decl *D) { 867 if (auto *V = llvm::dyn_cast<VarDecl>(D)) { 868 auto *I = V->getInit(); 869 // Initializers in range-based-for are not part of the declarator 870 if (I && !V->isCXXForRangeDecl()) 871 return I->getSourceRange(); 872 } 873 874 return SourceRange(); 875 } 876 877 /// Folds SimpleDeclarator node (if present) and in case this is the last 878 /// declarator in the chain it also folds SimpleDeclaration node. 879 template <class T> bool processDeclaratorAndDeclaration(T *D) { 880 SourceRange Initializer = getInitializerRange(D); 881 auto Range = getDeclaratorRange(Builder.sourceManager(), 882 D->getTypeSourceInfo()->getTypeLoc(), 883 getQualifiedNameStart(D), Initializer); 884 885 // There doesn't have to be a declarator (e.g. `void foo(int)` only has 886 // declaration, but no declarator). 887 if (Range.getBegin().isValid()) { 888 auto *N = new (allocator()) syntax::SimpleDeclarator; 889 Builder.foldNode(Builder.getRange(Range), N, nullptr); 890 Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator); 891 } 892 893 if (Builder.isResponsibleForCreatingDeclaration(D)) { 894 Builder.foldNode(Builder.getDeclarationRange(D), 895 new (allocator()) syntax::SimpleDeclaration, D); 896 } 897 return true; 898 } 899 900 /// Returns the range of the built node. 901 syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) { 902 assert(L.getTypePtr()->hasTrailingReturn()); 903 904 auto ReturnedType = L.getReturnLoc(); 905 // Build node for the declarator, if any. 906 auto ReturnDeclaratorRange = 907 getDeclaratorRange(this->Builder.sourceManager(), ReturnedType, 908 /*Name=*/SourceLocation(), 909 /*Initializer=*/SourceLocation()); 910 syntax::SimpleDeclarator *ReturnDeclarator = nullptr; 911 if (ReturnDeclaratorRange.isValid()) { 912 ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; 913 Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), 914 ReturnDeclarator, nullptr); 915 } 916 917 // Build node for trailing return type. 918 auto Return = Builder.getRange(ReturnedType.getSourceRange()); 919 const auto *Arrow = Return.begin() - 1; 920 assert(Arrow->kind() == tok::arrow); 921 auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); 922 Builder.markChildToken(Arrow, syntax::NodeRole::TrailingReturnType_arrow); 923 if (ReturnDeclarator) 924 Builder.markChild(ReturnDeclarator, 925 syntax::NodeRole::TrailingReturnType_declarator); 926 auto *R = new (allocator()) syntax::TrailingReturnType; 927 Builder.foldNode(Tokens, R, L); 928 return R; 929 } 930 931 void foldExplicitTemplateInstantiation( 932 ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, 933 const syntax::Token *TemplateKW, 934 syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { 935 assert(!ExternKW || ExternKW->kind() == tok::kw_extern); 936 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 937 Builder.markChildToken( 938 ExternKW, 939 syntax::NodeRole::ExplicitTemplateInstantiation_externKeyword); 940 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 941 Builder.markChild( 942 InnerDeclaration, 943 syntax::NodeRole::ExplicitTemplateInstantiation_declaration); 944 Builder.foldNode( 945 Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); 946 } 947 948 syntax::TemplateDeclaration *foldTemplateDeclaration( 949 ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, 950 ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { 951 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 952 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 953 954 auto *N = new (allocator()) syntax::TemplateDeclaration; 955 Builder.foldNode(Range, N, From); 956 Builder.markChild(N, syntax::NodeRole::TemplateDeclaration_declaration); 957 return N; 958 } 959 960 /// A small helper to save some typing. 961 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 962 963 syntax::TreeBuilder &Builder; 964 const LangOptions &LangOpts; 965 }; 966 } // namespace 967 968 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { 969 DeclsWithoutSemicolons.insert(D); 970 } 971 972 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 973 if (Loc.isInvalid()) 974 return; 975 Pending.assignRole(*findToken(Loc), Role); 976 } 977 978 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { 979 if (!T) 980 return; 981 Pending.assignRole(*T, R); 982 } 983 984 void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { 985 assert(N); 986 setRole(N, R); 987 } 988 989 void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { 990 auto *SN = Mapping.find(N); 991 assert(SN != nullptr); 992 setRole(SN, R); 993 } 994 995 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 996 if (!Child) 997 return; 998 999 syntax::Tree *ChildNode = Mapping.find(Child); 1000 assert(ChildNode != nullptr); 1001 1002 // This is an expression in a statement position, consume the trailing 1003 // semicolon and form an 'ExpressionStatement' node. 1004 if (isa<Expr>(Child)) { 1005 setRole(ChildNode, NodeRole::ExpressionStatement_expression); 1006 ChildNode = new (allocator()) syntax::ExpressionStatement; 1007 // (!) 'getStmtRange()' ensures this covers a trailing semicolon. 1008 Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); 1009 } 1010 setRole(ChildNode, Role); 1011 } 1012 1013 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 1014 if (!Child) 1015 return; 1016 Child = Child->IgnoreImplicit(); 1017 1018 syntax::Tree *ChildNode = Mapping.find(Child); 1019 assert(ChildNode != nullptr); 1020 setRole(ChildNode, Role); 1021 } 1022 1023 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 1024 if (L.isInvalid()) 1025 return nullptr; 1026 auto It = LocationToToken.find(L.getRawEncoding()); 1027 assert(It != LocationToToken.end()); 1028 return It->second; 1029 } 1030 1031 syntax::TranslationUnit * 1032 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 1033 TreeBuilder Builder(A); 1034 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 1035 return std::move(Builder).finalize(); 1036 } 1037