1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/ASTFwd.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/DeclBase.h" 12 #include "clang/AST/DeclCXX.h" 13 #include "clang/AST/DeclarationName.h" 14 #include "clang/AST/Expr.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/AST/RecursiveASTVisitor.h" 17 #include "clang/AST/Stmt.h" 18 #include "clang/AST/TypeLoc.h" 19 #include "clang/AST/TypeLocVisitor.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Basic/Specifiers.h" 24 #include "clang/Basic/TokenKinds.h" 25 #include "clang/Lex/Lexer.h" 26 #include "clang/Tooling/Syntax/Nodes.h" 27 #include "clang/Tooling/Syntax/Tokens.h" 28 #include "clang/Tooling/Syntax/Tree.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/PointerUnion.h" 32 #include "llvm/ADT/STLExtras.h" 33 #include "llvm/ADT/ScopeExit.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/Support/Allocator.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/Compiler.h" 38 #include "llvm/Support/FormatVariadic.h" 39 #include "llvm/Support/MemoryBuffer.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <cstddef> 42 #include <map> 43 44 using namespace clang; 45 46 LLVM_ATTRIBUTE_UNUSED 47 static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } 48 49 namespace { 50 /// Get start location of the Declarator from the TypeLoc. 51 /// E.g.: 52 /// loc of `(` in `int (a)` 53 /// loc of `*` in `int *(a)` 54 /// loc of the first `(` in `int (*a)(int)` 55 /// loc of the `*` in `int *(a)(int)` 56 /// loc of the first `*` in `const int *const *volatile a;` 57 /// 58 /// It is non-trivial to get the start location because TypeLocs are stored 59 /// inside out. In the example above `*volatile` is the TypeLoc returned 60 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` 61 /// returns. 62 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { 63 SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { 64 auto L = Visit(T.getInnerLoc()); 65 if (L.isValid()) 66 return L; 67 return T.getLParenLoc(); 68 } 69 70 // Types spelled in the prefix part of the declarator. 71 SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { 72 return HandlePointer(T); 73 } 74 75 SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { 76 return HandlePointer(T); 77 } 78 79 SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { 80 return HandlePointer(T); 81 } 82 83 SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { 84 return HandlePointer(T); 85 } 86 87 SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { 88 return HandlePointer(T); 89 } 90 91 // All other cases are not important, as they are either part of declaration 92 // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on 93 // existing declarators (e.g. QualifiedTypeLoc). They cannot start the 94 // declarator themselves, but their underlying type can. 95 SourceLocation VisitTypeLoc(TypeLoc T) { 96 auto N = T.getNextTypeLoc(); 97 if (!N) 98 return SourceLocation(); 99 return Visit(N); 100 } 101 102 SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { 103 if (T.getTypePtr()->hasTrailingReturn()) 104 return SourceLocation(); // avoid recursing into the suffix of declarator. 105 return VisitTypeLoc(T); 106 } 107 108 private: 109 template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { 110 auto L = Visit(T.getPointeeLoc()); 111 if (L.isValid()) 112 return L; 113 return T.getLocalSourceRange().getBegin(); 114 } 115 }; 116 } // namespace 117 118 static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { 119 switch (E.getOperator()) { 120 // Comparison 121 case OO_EqualEqual: 122 case OO_ExclaimEqual: 123 case OO_Greater: 124 case OO_GreaterEqual: 125 case OO_Less: 126 case OO_LessEqual: 127 case OO_Spaceship: 128 // Assignment 129 case OO_Equal: 130 case OO_SlashEqual: 131 case OO_PercentEqual: 132 case OO_CaretEqual: 133 case OO_PipeEqual: 134 case OO_LessLessEqual: 135 case OO_GreaterGreaterEqual: 136 case OO_PlusEqual: 137 case OO_MinusEqual: 138 case OO_StarEqual: 139 case OO_AmpEqual: 140 // Binary computation 141 case OO_Slash: 142 case OO_Percent: 143 case OO_Caret: 144 case OO_Pipe: 145 case OO_LessLess: 146 case OO_GreaterGreater: 147 case OO_AmpAmp: 148 case OO_PipePipe: 149 case OO_ArrowStar: 150 case OO_Comma: 151 return syntax::NodeKind::BinaryOperatorExpression; 152 case OO_Tilde: 153 case OO_Exclaim: 154 return syntax::NodeKind::PrefixUnaryOperatorExpression; 155 // Prefix/Postfix increment/decrement 156 case OO_PlusPlus: 157 case OO_MinusMinus: 158 switch (E.getNumArgs()) { 159 case 1: 160 return syntax::NodeKind::PrefixUnaryOperatorExpression; 161 case 2: 162 return syntax::NodeKind::PostfixUnaryOperatorExpression; 163 default: 164 llvm_unreachable("Invalid number of arguments for operator"); 165 } 166 // Operators that can be unary or binary 167 case OO_Plus: 168 case OO_Minus: 169 case OO_Star: 170 case OO_Amp: 171 switch (E.getNumArgs()) { 172 case 1: 173 return syntax::NodeKind::PrefixUnaryOperatorExpression; 174 case 2: 175 return syntax::NodeKind::BinaryOperatorExpression; 176 default: 177 llvm_unreachable("Invalid number of arguments for operator"); 178 } 179 return syntax::NodeKind::BinaryOperatorExpression; 180 // Not yet supported by SyntaxTree 181 case OO_New: 182 case OO_Delete: 183 case OO_Array_New: 184 case OO_Array_Delete: 185 case OO_Coawait: 186 case OO_Call: 187 case OO_Subscript: 188 case OO_Arrow: 189 return syntax::NodeKind::UnknownExpression; 190 case OO_Conditional: // not overloadable 191 case NUM_OVERLOADED_OPERATORS: 192 case OO_None: 193 llvm_unreachable("Not an overloadable operator"); 194 } 195 llvm_unreachable("Unknown OverloadedOperatorKind enum"); 196 } 197 198 /// Gets the range of declarator as defined by the C++ grammar. E.g. 199 /// `int a;` -> range of `a`, 200 /// `int *a;` -> range of `*a`, 201 /// `int a[10];` -> range of `a[10]`, 202 /// `int a[1][2][3];` -> range of `a[1][2][3]`, 203 /// `int *a = nullptr` -> range of `*a = nullptr`. 204 /// FIMXE: \p Name must be a source range, e.g. for `operator+`. 205 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, 206 SourceLocation Name, 207 SourceRange Initializer) { 208 SourceLocation Start = GetStartLoc().Visit(T); 209 SourceLocation End = T.getSourceRange().getEnd(); 210 assert(End.isValid()); 211 if (Name.isValid()) { 212 if (Start.isInvalid()) 213 Start = Name; 214 if (SM.isBeforeInTranslationUnit(End, Name)) 215 End = Name; 216 } 217 if (Initializer.isValid()) { 218 auto InitializerEnd = Initializer.getEnd(); 219 assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || End == InitializerEnd); 220 End = InitializerEnd; 221 } 222 return SourceRange(Start, End); 223 } 224 225 namespace { 226 /// All AST hierarchy roots that can be represented as pointers. 227 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; 228 /// Maintains a mapping from AST to syntax tree nodes. This class will get more 229 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs. 230 /// FIXME: expose this as public API. 231 class ASTToSyntaxMapping { 232 public: 233 void add(ASTPtr From, syntax::Tree *To) { 234 assert(To != nullptr); 235 assert(!From.isNull()); 236 237 bool Added = Nodes.insert({From, To}).second; 238 (void)Added; 239 assert(Added && "mapping added twice"); 240 } 241 242 syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } 243 244 private: 245 llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; 246 }; 247 } // namespace 248 249 /// A helper class for constructing the syntax tree while traversing a clang 250 /// AST. 251 /// 252 /// At each point of the traversal we maintain a list of pending nodes. 253 /// Initially all tokens are added as pending nodes. When processing a clang AST 254 /// node, the clients need to: 255 /// - create a corresponding syntax node, 256 /// - assign roles to all pending child nodes with 'markChild' and 257 /// 'markChildToken', 258 /// - replace the child nodes with the new syntax node in the pending list 259 /// with 'foldNode'. 260 /// 261 /// Note that all children are expected to be processed when building a node. 262 /// 263 /// Call finalize() to finish building the tree and consume the root node. 264 class syntax::TreeBuilder { 265 public: 266 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { 267 for (const auto &T : Arena.tokenBuffer().expandedTokens()) 268 LocationToToken.insert({T.location().getRawEncoding(), &T}); 269 } 270 271 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 272 const SourceManager &sourceManager() const { return Arena.sourceManager(); } 273 274 /// Populate children for \p New node, assuming it covers tokens from \p 275 /// Range. 276 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 277 ASTPtr From) { 278 assert(New); 279 Pending.foldChildren(Arena, Range, New); 280 if (From) 281 Mapping.add(From, New); 282 } 283 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 284 TypeLoc L) { 285 // FIXME: add mapping for TypeLocs 286 foldNode(Range, New, nullptr); 287 } 288 289 /// Notifies that we should not consume trailing semicolon when computing 290 /// token range of \p D. 291 void noticeDeclWithoutSemicolon(Decl *D); 292 293 /// Mark the \p Child node with a corresponding \p Role. All marked children 294 /// should be consumed by foldNode. 295 /// When called on expressions (clang::Expr is derived from clang::Stmt), 296 /// wraps expressions into expression statement. 297 void markStmtChild(Stmt *Child, NodeRole Role); 298 /// Should be called for expressions in non-statement position to avoid 299 /// wrapping into expression statement. 300 void markExprChild(Expr *Child, NodeRole Role); 301 /// Set role for a token starting at \p Loc. 302 void markChildToken(SourceLocation Loc, NodeRole R); 303 /// Set role for \p T. 304 void markChildToken(const syntax::Token *T, NodeRole R); 305 306 /// Set role for \p N. 307 void markChild(syntax::Node *N, NodeRole R); 308 /// Set role for the syntax node matching \p N. 309 void markChild(ASTPtr N, NodeRole R); 310 311 /// Finish building the tree and consume the root node. 312 syntax::TranslationUnit *finalize() && { 313 auto Tokens = Arena.tokenBuffer().expandedTokens(); 314 assert(!Tokens.empty()); 315 assert(Tokens.back().kind() == tok::eof); 316 317 // Build the root of the tree, consuming all the children. 318 Pending.foldChildren(Arena, Tokens.drop_back(), 319 new (Arena.allocator()) syntax::TranslationUnit); 320 321 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 322 TU->assertInvariantsRecursive(); 323 return TU; 324 } 325 326 /// Finds a token starting at \p L. The token must exist if \p L is valid. 327 const syntax::Token *findToken(SourceLocation L) const; 328 329 /// Finds the syntax tokens corresponding to the \p SourceRange. 330 llvm::ArrayRef<syntax::Token> getRange(SourceRange Range) const { 331 assert(Range.isValid()); 332 return getRange(Range.getBegin(), Range.getEnd()); 333 } 334 335 /// Finds the syntax tokens corresponding to the passed source locations. 336 /// \p First is the start position of the first token and \p Last is the start 337 /// position of the last token. 338 llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, 339 SourceLocation Last) const { 340 assert(First.isValid()); 341 assert(Last.isValid()); 342 assert(First == Last || 343 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 344 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 345 } 346 347 llvm::ArrayRef<syntax::Token> 348 getTemplateRange(const ClassTemplateSpecializationDecl *D) const { 349 auto Tokens = getRange(D->getSourceRange()); 350 return maybeAppendSemicolon(Tokens, D); 351 } 352 353 /// Returns true if \p D is the last declarator in a chain and is thus 354 /// reponsible for creating SimpleDeclaration for the whole chain. 355 template <class T> 356 bool isResponsibleForCreatingDeclaration(const T *D) const { 357 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 358 std::is_base_of<TypedefNameDecl, T>::value), 359 "only DeclaratorDecl and TypedefNameDecl are supported."); 360 361 const Decl *Next = D->getNextDeclInContext(); 362 363 // There's no next sibling, this one is responsible. 364 if (Next == nullptr) { 365 return true; 366 } 367 const auto *NextT = llvm::dyn_cast<T>(Next); 368 369 // Next sibling is not the same type, this one is responsible. 370 if (NextT == nullptr) { 371 return true; 372 } 373 // Next sibling doesn't begin at the same loc, it must be a different 374 // declaration, so this declarator is responsible. 375 if (NextT->getBeginLoc() != D->getBeginLoc()) { 376 return true; 377 } 378 379 // NextT is a member of the same declaration, and we need the last member to 380 // create declaration. This one is not responsible. 381 return false; 382 } 383 384 llvm::ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { 385 llvm::ArrayRef<clang::syntax::Token> Tokens; 386 // We want to drop the template parameters for specializations. 387 if (const auto *S = llvm::dyn_cast<TagDecl>(D)) 388 Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); 389 else 390 Tokens = getRange(D->getSourceRange()); 391 return maybeAppendSemicolon(Tokens, D); 392 } 393 394 llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 395 return getRange(E->getSourceRange()); 396 } 397 398 /// Find the adjusted range for the statement, consuming the trailing 399 /// semicolon when needed. 400 llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 401 auto Tokens = getRange(S->getSourceRange()); 402 if (isa<CompoundStmt>(S)) 403 return Tokens; 404 405 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 406 // all statements that end with those. Consume this semicolon here. 407 if (Tokens.back().kind() == tok::semi) 408 return Tokens; 409 return withTrailingSemicolon(Tokens); 410 } 411 412 private: 413 llvm::ArrayRef<syntax::Token> 414 maybeAppendSemicolon(llvm::ArrayRef<syntax::Token> Tokens, 415 const Decl *D) const { 416 if (llvm::isa<NamespaceDecl>(D)) 417 return Tokens; 418 if (DeclsWithoutSemicolons.count(D)) 419 return Tokens; 420 // FIXME: do not consume trailing semicolon on function definitions. 421 // Most declarations own a semicolon in syntax trees, but not in clang AST. 422 return withTrailingSemicolon(Tokens); 423 } 424 425 llvm::ArrayRef<syntax::Token> 426 withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const { 427 assert(!Tokens.empty()); 428 assert(Tokens.back().kind() != tok::eof); 429 // We never consume 'eof', so looking at the next token is ok. 430 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 431 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 432 return Tokens; 433 } 434 435 void setRole(syntax::Node *N, NodeRole R) { 436 assert(N->role() == NodeRole::Detached); 437 N->setRole(R); 438 } 439 440 /// A collection of trees covering the input tokens. 441 /// When created, each tree corresponds to a single token in the file. 442 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 443 /// node and update the list of trees accordingly. 444 /// 445 /// Ensures that added nodes properly nest and cover the whole token stream. 446 struct Forest { 447 Forest(syntax::Arena &A) { 448 assert(!A.tokenBuffer().expandedTokens().empty()); 449 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 450 // Create all leaf nodes. 451 // Note that we do not have 'eof' in the tree. 452 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { 453 auto *L = new (A.allocator()) syntax::Leaf(&T); 454 L->Original = true; 455 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); 456 Trees.insert(Trees.end(), {&T, L}); 457 } 458 } 459 460 void assignRole(llvm::ArrayRef<syntax::Token> Range, 461 syntax::NodeRole Role) { 462 assert(!Range.empty()); 463 auto It = Trees.lower_bound(Range.begin()); 464 assert(It != Trees.end() && "no node found"); 465 assert(It->first == Range.begin() && "no child with the specified range"); 466 assert((std::next(It) == Trees.end() || 467 std::next(It)->first == Range.end()) && 468 "no child with the specified range"); 469 assert(It->second->role() == NodeRole::Detached && 470 "re-assigning role for a child"); 471 It->second->setRole(Role); 472 } 473 474 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 475 void foldChildren(const syntax::Arena &A, 476 llvm::ArrayRef<syntax::Token> Tokens, 477 syntax::Tree *Node) { 478 // Attach children to `Node`. 479 assert(Node->firstChild() == nullptr && "node already has children"); 480 481 auto *FirstToken = Tokens.begin(); 482 auto BeginChildren = Trees.lower_bound(FirstToken); 483 484 assert((BeginChildren == Trees.end() || 485 BeginChildren->first == FirstToken) && 486 "fold crosses boundaries of existing subtrees"); 487 auto EndChildren = Trees.lower_bound(Tokens.end()); 488 assert( 489 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 490 "fold crosses boundaries of existing subtrees"); 491 492 // We need to go in reverse order, because we can only prepend. 493 for (auto It = EndChildren; It != BeginChildren; --It) { 494 auto *C = std::prev(It)->second; 495 if (C->role() == NodeRole::Detached) 496 C->setRole(NodeRole::Unknown); 497 Node->prependChildLowLevel(C); 498 } 499 500 // Mark that this node came from the AST and is backed by the source code. 501 Node->Original = true; 502 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); 503 504 Trees.erase(BeginChildren, EndChildren); 505 Trees.insert({FirstToken, Node}); 506 } 507 508 // EXPECTS: all tokens were consumed and are owned by a single root node. 509 syntax::Node *finalize() && { 510 assert(Trees.size() == 1); 511 auto *Root = Trees.begin()->second; 512 Trees = {}; 513 return Root; 514 } 515 516 std::string str(const syntax::Arena &A) const { 517 std::string R; 518 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 519 unsigned CoveredTokens = 520 It != Trees.end() 521 ? (std::next(It)->first - It->first) 522 : A.tokenBuffer().expandedTokens().end() - It->first; 523 524 R += std::string(llvm::formatv( 525 "- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), 526 It->first->text(A.sourceManager()), CoveredTokens)); 527 R += It->second->dump(A); 528 } 529 return R; 530 } 531 532 private: 533 /// Maps from the start token to a subtree starting at that token. 534 /// Keys in the map are pointers into the array of expanded tokens, so 535 /// pointer order corresponds to the order of preprocessor tokens. 536 std::map<const syntax::Token *, syntax::Node *> Trees; 537 }; 538 539 /// For debugging purposes. 540 std::string str() { return Pending.str(Arena); } 541 542 syntax::Arena &Arena; 543 /// To quickly find tokens by their start location. 544 llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> 545 LocationToToken; 546 Forest Pending; 547 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 548 ASTToSyntaxMapping Mapping; 549 }; 550 551 namespace { 552 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 553 public: 554 explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) 555 : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} 556 557 bool shouldTraversePostOrder() const { return true; } 558 559 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { 560 return processDeclaratorAndDeclaration(DD); 561 } 562 563 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { 564 return processDeclaratorAndDeclaration(TD); 565 } 566 567 bool VisitDecl(Decl *D) { 568 assert(!D->isImplicit()); 569 Builder.foldNode(Builder.getDeclarationRange(D), 570 new (allocator()) syntax::UnknownDeclaration(), D); 571 return true; 572 } 573 574 // RAV does not call WalkUpFrom* on explicit instantiations, so we have to 575 // override Traverse. 576 // FIXME: make RAV call WalkUpFrom* instead. 577 bool 578 TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { 579 if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) 580 return false; 581 if (C->isExplicitSpecialization()) 582 return true; // we are only interested in explicit instantiations. 583 auto *Declaration = 584 cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); 585 foldExplicitTemplateInstantiation( 586 Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), 587 Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); 588 return true; 589 } 590 591 bool WalkUpFromTemplateDecl(TemplateDecl *S) { 592 foldTemplateDeclaration( 593 Builder.getDeclarationRange(S), 594 Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), 595 Builder.getDeclarationRange(S->getTemplatedDecl()), S); 596 return true; 597 } 598 599 bool WalkUpFromTagDecl(TagDecl *C) { 600 // FIXME: build the ClassSpecifier node. 601 if (!C->isFreeStanding()) { 602 assert(C->getNumTemplateParameterLists() == 0); 603 return true; 604 } 605 handleFreeStandingTagDecl(C); 606 return true; 607 } 608 609 syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { 610 assert(C->isFreeStanding()); 611 // Class is a declaration specifier and needs a spanning declaration node. 612 auto DeclarationRange = Builder.getDeclarationRange(C); 613 syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; 614 Builder.foldNode(DeclarationRange, Result, nullptr); 615 616 // Build TemplateDeclaration nodes if we had template parameters. 617 auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { 618 const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); 619 auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); 620 Result = 621 foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); 622 DeclarationRange = R; 623 }; 624 if (auto *S = llvm::dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) 625 ConsumeTemplateParameters(*S->getTemplateParameters()); 626 for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) 627 ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); 628 return Result; 629 } 630 631 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 632 // We do not want to call VisitDecl(), the declaration for translation 633 // unit is built by finalize(). 634 return true; 635 } 636 637 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 638 using NodeRole = syntax::NodeRole; 639 640 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 641 for (auto *Child : S->body()) 642 Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 643 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 644 645 Builder.foldNode(Builder.getStmtRange(S), 646 new (allocator()) syntax::CompoundStatement, S); 647 return true; 648 } 649 650 // Some statements are not yet handled by syntax trees. 651 bool WalkUpFromStmt(Stmt *S) { 652 Builder.foldNode(Builder.getStmtRange(S), 653 new (allocator()) syntax::UnknownStatement, S); 654 return true; 655 } 656 657 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 658 // We override to traverse range initializer as VarDecl. 659 // RAV traverses it as a statement, we produce invalid node kinds in that 660 // case. 661 // FIXME: should do this in RAV instead? 662 bool Result = [&, this]() { 663 if (S->getInit() && !TraverseStmt(S->getInit())) 664 return false; 665 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 666 return false; 667 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 668 return false; 669 if (S->getBody() && !TraverseStmt(S->getBody())) 670 return false; 671 return true; 672 }(); 673 WalkUpFromCXXForRangeStmt(S); 674 return Result; 675 } 676 677 bool TraverseStmt(Stmt *S) { 678 if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) { 679 // We want to consume the semicolon, make sure SimpleDeclaration does not. 680 for (auto *D : DS->decls()) 681 Builder.noticeDeclWithoutSemicolon(D); 682 } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { 683 return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); 684 } 685 return RecursiveASTVisitor::TraverseStmt(S); 686 } 687 688 // Some expressions are not yet handled by syntax trees. 689 bool WalkUpFromExpr(Expr *E) { 690 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 691 Builder.foldNode(Builder.getExprRange(E), 692 new (allocator()) syntax::UnknownExpression, E); 693 return true; 694 } 695 696 syntax::NestedNameSpecifier * 697 BuildNestedNameSpecifier(NestedNameSpecifierLoc QualifierLoc) { 698 if (!QualifierLoc) 699 return nullptr; 700 for (auto it = QualifierLoc; it; it = it.getPrefix()) { 701 auto *NS = new (allocator()) syntax::NameSpecifier; 702 Builder.foldNode(Builder.getRange(it.getLocalSourceRange()), NS, nullptr); 703 Builder.markChild(NS, syntax::NodeRole::NestedNameSpecifier_specifier); 704 } 705 auto *NNS = new (allocator()) syntax::NestedNameSpecifier; 706 Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), NNS, 707 nullptr); 708 return NNS; 709 } 710 711 bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { 712 if (auto *NNS = BuildNestedNameSpecifier(S->getQualifierLoc())) 713 Builder.markChild(NNS, syntax::NodeRole::IdExpression_qualifier); 714 715 auto *unqualifiedId = new (allocator()) syntax::UnqualifiedId; 716 // Get `UnqualifiedId` from `DeclRefExpr`. 717 // FIXME: Extract this logic so that it can be used by `MemberExpr`, 718 // and other semantic constructs, now it is tied to `DeclRefExpr`. 719 if (!S->hasExplicitTemplateArgs()) { 720 Builder.foldNode(Builder.getRange(S->getNameInfo().getSourceRange()), 721 unqualifiedId, nullptr); 722 } else { 723 auto templateIdSourceRange = 724 SourceRange(S->getNameInfo().getBeginLoc(), S->getRAngleLoc()); 725 Builder.foldNode(Builder.getRange(templateIdSourceRange), unqualifiedId, 726 nullptr); 727 } 728 Builder.markChild(unqualifiedId, syntax::NodeRole::IdExpression_id); 729 730 Builder.foldNode(Builder.getExprRange(S), 731 new (allocator()) syntax::IdExpression, S); 732 return true; 733 } 734 735 bool WalkUpFromParenExpr(ParenExpr *S) { 736 Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen); 737 Builder.markExprChild(S->getSubExpr(), 738 syntax::NodeRole::ParenExpression_subExpression); 739 Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen); 740 Builder.foldNode(Builder.getExprRange(S), 741 new (allocator()) syntax::ParenExpression, S); 742 return true; 743 } 744 745 bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { 746 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 747 Builder.foldNode(Builder.getExprRange(S), 748 new (allocator()) syntax::IntegerLiteralExpression, S); 749 return true; 750 } 751 752 bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { 753 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 754 Builder.foldNode(Builder.getExprRange(S), 755 new (allocator()) syntax::CharacterLiteralExpression, S); 756 return true; 757 } 758 759 bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { 760 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 761 Builder.foldNode(Builder.getExprRange(S), 762 new (allocator()) syntax::FloatingLiteralExpression, S); 763 return true; 764 } 765 766 bool WalkUpFromStringLiteral(StringLiteral *S) { 767 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 768 Builder.foldNode(Builder.getExprRange(S), 769 new (allocator()) syntax::StringLiteralExpression, S); 770 return true; 771 } 772 773 bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { 774 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 775 Builder.foldNode(Builder.getExprRange(S), 776 new (allocator()) syntax::BoolLiteralExpression, S); 777 return true; 778 } 779 780 bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { 781 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 782 Builder.foldNode(Builder.getExprRange(S), 783 new (allocator()) syntax::CxxNullPtrExpression, S); 784 return true; 785 } 786 787 bool WalkUpFromUnaryOperator(UnaryOperator *S) { 788 Builder.markChildToken(S->getOperatorLoc(), 789 syntax::NodeRole::OperatorExpression_operatorToken); 790 Builder.markExprChild(S->getSubExpr(), 791 syntax::NodeRole::UnaryOperatorExpression_operand); 792 793 if (S->isPostfix()) 794 Builder.foldNode(Builder.getExprRange(S), 795 new (allocator()) syntax::PostfixUnaryOperatorExpression, 796 S); 797 else 798 Builder.foldNode(Builder.getExprRange(S), 799 new (allocator()) syntax::PrefixUnaryOperatorExpression, 800 S); 801 802 return true; 803 } 804 805 bool WalkUpFromBinaryOperator(BinaryOperator *S) { 806 Builder.markExprChild( 807 S->getLHS(), syntax::NodeRole::BinaryOperatorExpression_leftHandSide); 808 Builder.markChildToken(S->getOperatorLoc(), 809 syntax::NodeRole::OperatorExpression_operatorToken); 810 Builder.markExprChild( 811 S->getRHS(), syntax::NodeRole::BinaryOperatorExpression_rightHandSide); 812 Builder.foldNode(Builder.getExprRange(S), 813 new (allocator()) syntax::BinaryOperatorExpression, S); 814 return true; 815 } 816 817 bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 818 if (getOperatorNodeKind(*S) == 819 syntax::NodeKind::PostfixUnaryOperatorExpression) { 820 // A postfix unary operator is declared as taking two operands. The second 821 // operand is used to distinguish from its prefix counterpart. In the 822 // semantic AST this "phantom" operand is represented as a 823 // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this 824 // operand because it does not correspond to anything written in source 825 // code 826 for (auto *child : S->children()) { 827 if (child->getSourceRange().isInvalid()) 828 continue; 829 if (!TraverseStmt(child)) 830 return false; 831 } 832 return WalkUpFromCXXOperatorCallExpr(S); 833 } else 834 return RecursiveASTVisitor::TraverseCXXOperatorCallExpr(S); 835 } 836 837 bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 838 switch (getOperatorNodeKind(*S)) { 839 case syntax::NodeKind::BinaryOperatorExpression: 840 Builder.markExprChild( 841 S->getArg(0), 842 syntax::NodeRole::BinaryOperatorExpression_leftHandSide); 843 Builder.markChildToken( 844 S->getOperatorLoc(), 845 syntax::NodeRole::OperatorExpression_operatorToken); 846 Builder.markExprChild( 847 S->getArg(1), 848 syntax::NodeRole::BinaryOperatorExpression_rightHandSide); 849 Builder.foldNode(Builder.getExprRange(S), 850 new (allocator()) syntax::BinaryOperatorExpression, S); 851 return true; 852 case syntax::NodeKind::PrefixUnaryOperatorExpression: 853 Builder.markChildToken( 854 S->getOperatorLoc(), 855 syntax::NodeRole::OperatorExpression_operatorToken); 856 Builder.markExprChild(S->getArg(0), 857 syntax::NodeRole::UnaryOperatorExpression_operand); 858 Builder.foldNode(Builder.getExprRange(S), 859 new (allocator()) syntax::PrefixUnaryOperatorExpression, 860 S); 861 return true; 862 case syntax::NodeKind::PostfixUnaryOperatorExpression: 863 Builder.markChildToken( 864 S->getOperatorLoc(), 865 syntax::NodeRole::OperatorExpression_operatorToken); 866 Builder.markExprChild(S->getArg(0), 867 syntax::NodeRole::UnaryOperatorExpression_operand); 868 Builder.foldNode(Builder.getExprRange(S), 869 new (allocator()) syntax::PostfixUnaryOperatorExpression, 870 S); 871 return true; 872 case syntax::NodeKind::UnknownExpression: 873 return RecursiveASTVisitor::WalkUpFromCXXOperatorCallExpr(S); 874 default: 875 llvm_unreachable("getOperatorNodeKind() does not return this value"); 876 } 877 } 878 879 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 880 auto Tokens = Builder.getDeclarationRange(S); 881 if (Tokens.front().kind() == tok::coloncolon) { 882 // Handle nested namespace definitions. Those start at '::' token, e.g. 883 // namespace a^::b {} 884 // FIXME: build corresponding nodes for the name of this namespace. 885 return true; 886 } 887 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); 888 return true; 889 } 890 891 bool TraverseParenTypeLoc(ParenTypeLoc L) { 892 // We reverse order of traversal to get the proper syntax structure. 893 if (!WalkUpFromParenTypeLoc(L)) 894 return false; 895 return TraverseTypeLoc(L.getInnerLoc()); 896 } 897 898 bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { 899 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 900 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 901 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), 902 new (allocator()) syntax::ParenDeclarator, L); 903 return true; 904 } 905 906 // Declarator chunks, they are produced by type locs and some clang::Decls. 907 bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { 908 Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); 909 Builder.markExprChild(L.getSizeExpr(), 910 syntax::NodeRole::ArraySubscript_sizeExpression); 911 Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); 912 Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), 913 new (allocator()) syntax::ArraySubscript, L); 914 return true; 915 } 916 917 bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { 918 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 919 for (auto *P : L.getParams()) { 920 Builder.markChild(P, syntax::NodeRole::ParametersAndQualifiers_parameter); 921 } 922 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 923 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), 924 new (allocator()) syntax::ParametersAndQualifiers, L); 925 return true; 926 } 927 928 bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { 929 if (!L.getTypePtr()->hasTrailingReturn()) 930 return WalkUpFromFunctionTypeLoc(L); 931 932 auto *TrailingReturnTokens = BuildTrailingReturn(L); 933 // Finish building the node for parameters. 934 Builder.markChild(TrailingReturnTokens, 935 syntax::NodeRole::ParametersAndQualifiers_trailingReturn); 936 return WalkUpFromFunctionTypeLoc(L); 937 } 938 939 bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { 940 auto SR = L.getLocalSourceRange(); 941 Builder.foldNode(Builder.getRange(SR), 942 new (allocator()) syntax::MemberPointer, L); 943 return true; 944 } 945 946 // The code below is very regular, it could even be generated with some 947 // preprocessor magic. We merely assign roles to the corresponding children 948 // and fold resulting nodes. 949 bool WalkUpFromDeclStmt(DeclStmt *S) { 950 Builder.foldNode(Builder.getStmtRange(S), 951 new (allocator()) syntax::DeclarationStatement, S); 952 return true; 953 } 954 955 bool WalkUpFromNullStmt(NullStmt *S) { 956 Builder.foldNode(Builder.getStmtRange(S), 957 new (allocator()) syntax::EmptyStatement, S); 958 return true; 959 } 960 961 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 962 Builder.markChildToken(S->getSwitchLoc(), 963 syntax::NodeRole::IntroducerKeyword); 964 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 965 Builder.foldNode(Builder.getStmtRange(S), 966 new (allocator()) syntax::SwitchStatement, S); 967 return true; 968 } 969 970 bool WalkUpFromCaseStmt(CaseStmt *S) { 971 Builder.markChildToken(S->getKeywordLoc(), 972 syntax::NodeRole::IntroducerKeyword); 973 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 974 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 975 Builder.foldNode(Builder.getStmtRange(S), 976 new (allocator()) syntax::CaseStatement, S); 977 return true; 978 } 979 980 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 981 Builder.markChildToken(S->getKeywordLoc(), 982 syntax::NodeRole::IntroducerKeyword); 983 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 984 Builder.foldNode(Builder.getStmtRange(S), 985 new (allocator()) syntax::DefaultStatement, S); 986 return true; 987 } 988 989 bool WalkUpFromIfStmt(IfStmt *S) { 990 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 991 Builder.markStmtChild(S->getThen(), 992 syntax::NodeRole::IfStatement_thenStatement); 993 Builder.markChildToken(S->getElseLoc(), 994 syntax::NodeRole::IfStatement_elseKeyword); 995 Builder.markStmtChild(S->getElse(), 996 syntax::NodeRole::IfStatement_elseStatement); 997 Builder.foldNode(Builder.getStmtRange(S), 998 new (allocator()) syntax::IfStatement, S); 999 return true; 1000 } 1001 1002 bool WalkUpFromForStmt(ForStmt *S) { 1003 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1004 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1005 Builder.foldNode(Builder.getStmtRange(S), 1006 new (allocator()) syntax::ForStatement, S); 1007 return true; 1008 } 1009 1010 bool WalkUpFromWhileStmt(WhileStmt *S) { 1011 Builder.markChildToken(S->getWhileLoc(), 1012 syntax::NodeRole::IntroducerKeyword); 1013 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1014 Builder.foldNode(Builder.getStmtRange(S), 1015 new (allocator()) syntax::WhileStatement, S); 1016 return true; 1017 } 1018 1019 bool WalkUpFromContinueStmt(ContinueStmt *S) { 1020 Builder.markChildToken(S->getContinueLoc(), 1021 syntax::NodeRole::IntroducerKeyword); 1022 Builder.foldNode(Builder.getStmtRange(S), 1023 new (allocator()) syntax::ContinueStatement, S); 1024 return true; 1025 } 1026 1027 bool WalkUpFromBreakStmt(BreakStmt *S) { 1028 Builder.markChildToken(S->getBreakLoc(), 1029 syntax::NodeRole::IntroducerKeyword); 1030 Builder.foldNode(Builder.getStmtRange(S), 1031 new (allocator()) syntax::BreakStatement, S); 1032 return true; 1033 } 1034 1035 bool WalkUpFromReturnStmt(ReturnStmt *S) { 1036 Builder.markChildToken(S->getReturnLoc(), 1037 syntax::NodeRole::IntroducerKeyword); 1038 Builder.markExprChild(S->getRetValue(), 1039 syntax::NodeRole::ReturnStatement_value); 1040 Builder.foldNode(Builder.getStmtRange(S), 1041 new (allocator()) syntax::ReturnStatement, S); 1042 return true; 1043 } 1044 1045 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 1046 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1047 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1048 Builder.foldNode(Builder.getStmtRange(S), 1049 new (allocator()) syntax::RangeBasedForStatement, S); 1050 return true; 1051 } 1052 1053 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 1054 Builder.foldNode(Builder.getDeclarationRange(S), 1055 new (allocator()) syntax::EmptyDeclaration, S); 1056 return true; 1057 } 1058 1059 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 1060 Builder.markExprChild(S->getAssertExpr(), 1061 syntax::NodeRole::StaticAssertDeclaration_condition); 1062 Builder.markExprChild(S->getMessage(), 1063 syntax::NodeRole::StaticAssertDeclaration_message); 1064 Builder.foldNode(Builder.getDeclarationRange(S), 1065 new (allocator()) syntax::StaticAssertDeclaration, S); 1066 return true; 1067 } 1068 1069 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 1070 Builder.foldNode(Builder.getDeclarationRange(S), 1071 new (allocator()) syntax::LinkageSpecificationDeclaration, 1072 S); 1073 return true; 1074 } 1075 1076 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 1077 Builder.foldNode(Builder.getDeclarationRange(S), 1078 new (allocator()) syntax::NamespaceAliasDefinition, S); 1079 return true; 1080 } 1081 1082 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 1083 Builder.foldNode(Builder.getDeclarationRange(S), 1084 new (allocator()) syntax::UsingNamespaceDirective, S); 1085 return true; 1086 } 1087 1088 bool WalkUpFromUsingDecl(UsingDecl *S) { 1089 Builder.foldNode(Builder.getDeclarationRange(S), 1090 new (allocator()) syntax::UsingDeclaration, S); 1091 return true; 1092 } 1093 1094 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 1095 Builder.foldNode(Builder.getDeclarationRange(S), 1096 new (allocator()) syntax::UsingDeclaration, S); 1097 return true; 1098 } 1099 1100 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 1101 Builder.foldNode(Builder.getDeclarationRange(S), 1102 new (allocator()) syntax::UsingDeclaration, S); 1103 return true; 1104 } 1105 1106 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 1107 Builder.foldNode(Builder.getDeclarationRange(S), 1108 new (allocator()) syntax::TypeAliasDeclaration, S); 1109 return true; 1110 } 1111 1112 private: 1113 template <class T> SourceLocation getQualifiedNameStart(T *D) { 1114 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 1115 std::is_base_of<TypedefNameDecl, T>::value), 1116 "only DeclaratorDecl and TypedefNameDecl are supported."); 1117 1118 auto DN = D->getDeclName(); 1119 bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); 1120 if (IsAnonymous) 1121 return SourceLocation(); 1122 1123 if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D)) { 1124 if (DD->getQualifierLoc()) { 1125 return DD->getQualifierLoc().getBeginLoc(); 1126 } 1127 } 1128 1129 return D->getLocation(); 1130 } 1131 1132 SourceRange getInitializerRange(Decl *D) { 1133 if (auto *V = llvm::dyn_cast<VarDecl>(D)) { 1134 auto *I = V->getInit(); 1135 // Initializers in range-based-for are not part of the declarator 1136 if (I && !V->isCXXForRangeDecl()) 1137 return I->getSourceRange(); 1138 } 1139 1140 return SourceRange(); 1141 } 1142 1143 /// Folds SimpleDeclarator node (if present) and in case this is the last 1144 /// declarator in the chain it also folds SimpleDeclaration node. 1145 template <class T> bool processDeclaratorAndDeclaration(T *D) { 1146 SourceRange Initializer = getInitializerRange(D); 1147 auto Range = getDeclaratorRange(Builder.sourceManager(), 1148 D->getTypeSourceInfo()->getTypeLoc(), 1149 getQualifiedNameStart(D), Initializer); 1150 1151 // There doesn't have to be a declarator (e.g. `void foo(int)` only has 1152 // declaration, but no declarator). 1153 if (Range.getBegin().isValid()) { 1154 auto *N = new (allocator()) syntax::SimpleDeclarator; 1155 Builder.foldNode(Builder.getRange(Range), N, nullptr); 1156 Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator); 1157 } 1158 1159 if (Builder.isResponsibleForCreatingDeclaration(D)) { 1160 Builder.foldNode(Builder.getDeclarationRange(D), 1161 new (allocator()) syntax::SimpleDeclaration, D); 1162 } 1163 return true; 1164 } 1165 1166 /// Returns the range of the built node. 1167 syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) { 1168 assert(L.getTypePtr()->hasTrailingReturn()); 1169 1170 auto ReturnedType = L.getReturnLoc(); 1171 // Build node for the declarator, if any. 1172 auto ReturnDeclaratorRange = 1173 getDeclaratorRange(this->Builder.sourceManager(), ReturnedType, 1174 /*Name=*/SourceLocation(), 1175 /*Initializer=*/SourceLocation()); 1176 syntax::SimpleDeclarator *ReturnDeclarator = nullptr; 1177 if (ReturnDeclaratorRange.isValid()) { 1178 ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; 1179 Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), 1180 ReturnDeclarator, nullptr); 1181 } 1182 1183 // Build node for trailing return type. 1184 auto Return = Builder.getRange(ReturnedType.getSourceRange()); 1185 const auto *Arrow = Return.begin() - 1; 1186 assert(Arrow->kind() == tok::arrow); 1187 auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); 1188 Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken); 1189 if (ReturnDeclarator) 1190 Builder.markChild(ReturnDeclarator, 1191 syntax::NodeRole::TrailingReturnType_declarator); 1192 auto *R = new (allocator()) syntax::TrailingReturnType; 1193 Builder.foldNode(Tokens, R, L); 1194 return R; 1195 } 1196 1197 void foldExplicitTemplateInstantiation( 1198 ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, 1199 const syntax::Token *TemplateKW, 1200 syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { 1201 assert(!ExternKW || ExternKW->kind() == tok::kw_extern); 1202 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1203 Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword); 1204 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1205 Builder.markChild( 1206 InnerDeclaration, 1207 syntax::NodeRole::ExplicitTemplateInstantiation_declaration); 1208 Builder.foldNode( 1209 Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); 1210 } 1211 1212 syntax::TemplateDeclaration *foldTemplateDeclaration( 1213 ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, 1214 ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { 1215 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1216 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1217 1218 auto *N = new (allocator()) syntax::TemplateDeclaration; 1219 Builder.foldNode(Range, N, From); 1220 Builder.markChild(N, syntax::NodeRole::TemplateDeclaration_declaration); 1221 return N; 1222 } 1223 1224 /// A small helper to save some typing. 1225 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 1226 1227 syntax::TreeBuilder &Builder; 1228 const LangOptions &LangOpts; 1229 }; 1230 } // namespace 1231 1232 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { 1233 DeclsWithoutSemicolons.insert(D); 1234 } 1235 1236 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 1237 if (Loc.isInvalid()) 1238 return; 1239 Pending.assignRole(*findToken(Loc), Role); 1240 } 1241 1242 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { 1243 if (!T) 1244 return; 1245 Pending.assignRole(*T, R); 1246 } 1247 1248 void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { 1249 assert(N); 1250 setRole(N, R); 1251 } 1252 1253 void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { 1254 auto *SN = Mapping.find(N); 1255 assert(SN != nullptr); 1256 setRole(SN, R); 1257 } 1258 1259 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 1260 if (!Child) 1261 return; 1262 1263 syntax::Tree *ChildNode; 1264 if (Expr *ChildExpr = dyn_cast<Expr>(Child)) { 1265 // This is an expression in a statement position, consume the trailing 1266 // semicolon and form an 'ExpressionStatement' node. 1267 markExprChild(ChildExpr, NodeRole::ExpressionStatement_expression); 1268 ChildNode = new (allocator()) syntax::ExpressionStatement; 1269 // (!) 'getStmtRange()' ensures this covers a trailing semicolon. 1270 Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); 1271 } else { 1272 ChildNode = Mapping.find(Child); 1273 } 1274 assert(ChildNode != nullptr); 1275 setRole(ChildNode, Role); 1276 } 1277 1278 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 1279 if (!Child) 1280 return; 1281 Child = Child->IgnoreImplicit(); 1282 1283 syntax::Tree *ChildNode = Mapping.find(Child); 1284 assert(ChildNode != nullptr); 1285 setRole(ChildNode, Role); 1286 } 1287 1288 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 1289 if (L.isInvalid()) 1290 return nullptr; 1291 auto It = LocationToToken.find(L.getRawEncoding()); 1292 assert(It != LocationToToken.end()); 1293 return It->second; 1294 } 1295 1296 syntax::TranslationUnit * 1297 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 1298 TreeBuilder Builder(A); 1299 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 1300 return std::move(Builder).finalize(); 1301 } 1302