1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/ASTFwd.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/DeclBase.h" 12 #include "clang/AST/DeclCXX.h" 13 #include "clang/AST/DeclarationName.h" 14 #include "clang/AST/Expr.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/AST/RecursiveASTVisitor.h" 17 #include "clang/AST/Stmt.h" 18 #include "clang/AST/TypeLoc.h" 19 #include "clang/AST/TypeLocVisitor.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Basic/Specifiers.h" 24 #include "clang/Basic/TokenKinds.h" 25 #include "clang/Lex/Lexer.h" 26 #include "clang/Lex/LiteralSupport.h" 27 #include "clang/Tooling/Syntax/Nodes.h" 28 #include "clang/Tooling/Syntax/Tokens.h" 29 #include "clang/Tooling/Syntax/Tree.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/DenseMap.h" 32 #include "llvm/ADT/PointerUnion.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/ScopeExit.h" 35 #include "llvm/ADT/SmallVector.h" 36 #include "llvm/Support/Allocator.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/FormatVariadic.h" 40 #include "llvm/Support/MemoryBuffer.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <cstddef> 43 #include <map> 44 45 using namespace clang; 46 47 LLVM_ATTRIBUTE_UNUSED 48 static bool isImplicitExpr(Expr *E) { return E->IgnoreImplicit() != E; } 49 50 namespace { 51 /// Get start location of the Declarator from the TypeLoc. 52 /// E.g.: 53 /// loc of `(` in `int (a)` 54 /// loc of `*` in `int *(a)` 55 /// loc of the first `(` in `int (*a)(int)` 56 /// loc of the `*` in `int *(a)(int)` 57 /// loc of the first `*` in `const int *const *volatile a;` 58 /// 59 /// It is non-trivial to get the start location because TypeLocs are stored 60 /// inside out. In the example above `*volatile` is the TypeLoc returned 61 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` 62 /// returns. 63 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { 64 SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { 65 auto L = Visit(T.getInnerLoc()); 66 if (L.isValid()) 67 return L; 68 return T.getLParenLoc(); 69 } 70 71 // Types spelled in the prefix part of the declarator. 72 SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { 73 return HandlePointer(T); 74 } 75 76 SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { 77 return HandlePointer(T); 78 } 79 80 SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { 81 return HandlePointer(T); 82 } 83 84 SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { 85 return HandlePointer(T); 86 } 87 88 SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { 89 return HandlePointer(T); 90 } 91 92 // All other cases are not important, as they are either part of declaration 93 // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on 94 // existing declarators (e.g. QualifiedTypeLoc). They cannot start the 95 // declarator themselves, but their underlying type can. 96 SourceLocation VisitTypeLoc(TypeLoc T) { 97 auto N = T.getNextTypeLoc(); 98 if (!N) 99 return SourceLocation(); 100 return Visit(N); 101 } 102 103 SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { 104 if (T.getTypePtr()->hasTrailingReturn()) 105 return SourceLocation(); // avoid recursing into the suffix of declarator. 106 return VisitTypeLoc(T); 107 } 108 109 private: 110 template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { 111 auto L = Visit(T.getPointeeLoc()); 112 if (L.isValid()) 113 return L; 114 return T.getLocalSourceRange().getBegin(); 115 } 116 }; 117 } // namespace 118 119 static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { 120 switch (E.getOperator()) { 121 // Comparison 122 case OO_EqualEqual: 123 case OO_ExclaimEqual: 124 case OO_Greater: 125 case OO_GreaterEqual: 126 case OO_Less: 127 case OO_LessEqual: 128 case OO_Spaceship: 129 // Assignment 130 case OO_Equal: 131 case OO_SlashEqual: 132 case OO_PercentEqual: 133 case OO_CaretEqual: 134 case OO_PipeEqual: 135 case OO_LessLessEqual: 136 case OO_GreaterGreaterEqual: 137 case OO_PlusEqual: 138 case OO_MinusEqual: 139 case OO_StarEqual: 140 case OO_AmpEqual: 141 // Binary computation 142 case OO_Slash: 143 case OO_Percent: 144 case OO_Caret: 145 case OO_Pipe: 146 case OO_LessLess: 147 case OO_GreaterGreater: 148 case OO_AmpAmp: 149 case OO_PipePipe: 150 case OO_ArrowStar: 151 case OO_Comma: 152 return syntax::NodeKind::BinaryOperatorExpression; 153 case OO_Tilde: 154 case OO_Exclaim: 155 return syntax::NodeKind::PrefixUnaryOperatorExpression; 156 // Prefix/Postfix increment/decrement 157 case OO_PlusPlus: 158 case OO_MinusMinus: 159 switch (E.getNumArgs()) { 160 case 1: 161 return syntax::NodeKind::PrefixUnaryOperatorExpression; 162 case 2: 163 return syntax::NodeKind::PostfixUnaryOperatorExpression; 164 default: 165 llvm_unreachable("Invalid number of arguments for operator"); 166 } 167 // Operators that can be unary or binary 168 case OO_Plus: 169 case OO_Minus: 170 case OO_Star: 171 case OO_Amp: 172 switch (E.getNumArgs()) { 173 case 1: 174 return syntax::NodeKind::PrefixUnaryOperatorExpression; 175 case 2: 176 return syntax::NodeKind::BinaryOperatorExpression; 177 default: 178 llvm_unreachable("Invalid number of arguments for operator"); 179 } 180 return syntax::NodeKind::BinaryOperatorExpression; 181 // Not yet supported by SyntaxTree 182 case OO_New: 183 case OO_Delete: 184 case OO_Array_New: 185 case OO_Array_Delete: 186 case OO_Coawait: 187 case OO_Call: 188 case OO_Subscript: 189 case OO_Arrow: 190 return syntax::NodeKind::UnknownExpression; 191 case OO_Conditional: // not overloadable 192 case NUM_OVERLOADED_OPERATORS: 193 case OO_None: 194 llvm_unreachable("Not an overloadable operator"); 195 } 196 llvm_unreachable("Unknown OverloadedOperatorKind enum"); 197 } 198 199 /// Gets the range of declarator as defined by the C++ grammar. E.g. 200 /// `int a;` -> range of `a`, 201 /// `int *a;` -> range of `*a`, 202 /// `int a[10];` -> range of `a[10]`, 203 /// `int a[1][2][3];` -> range of `a[1][2][3]`, 204 /// `int *a = nullptr` -> range of `*a = nullptr`. 205 /// FIMXE: \p Name must be a source range, e.g. for `operator+`. 206 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, 207 SourceLocation Name, 208 SourceRange Initializer) { 209 SourceLocation Start = GetStartLoc().Visit(T); 210 SourceLocation End = T.getSourceRange().getEnd(); 211 assert(End.isValid()); 212 if (Name.isValid()) { 213 if (Start.isInvalid()) 214 Start = Name; 215 if (SM.isBeforeInTranslationUnit(End, Name)) 216 End = Name; 217 } 218 if (Initializer.isValid()) { 219 auto InitializerEnd = Initializer.getEnd(); 220 assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || 221 End == InitializerEnd); 222 End = InitializerEnd; 223 } 224 return SourceRange(Start, End); 225 } 226 227 namespace { 228 /// All AST hierarchy roots that can be represented as pointers. 229 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; 230 /// Maintains a mapping from AST to syntax tree nodes. This class will get more 231 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs. 232 /// FIXME: expose this as public API. 233 class ASTToSyntaxMapping { 234 public: 235 void add(ASTPtr From, syntax::Tree *To) { 236 assert(To != nullptr); 237 assert(!From.isNull()); 238 239 bool Added = Nodes.insert({From, To}).second; 240 (void)Added; 241 assert(Added && "mapping added twice"); 242 } 243 244 void add(NestedNameSpecifierLoc From, syntax::Tree *To) { 245 assert(To != nullptr); 246 assert(From.hasQualifier()); 247 248 bool Added = NNSNodes.insert({From, To}).second; 249 (void)Added; 250 assert(Added && "mapping added twice"); 251 } 252 253 syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } 254 255 syntax::Tree *find(NestedNameSpecifierLoc P) const { 256 return NNSNodes.lookup(P); 257 } 258 259 private: 260 llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; 261 llvm::DenseMap<NestedNameSpecifierLoc, syntax::Tree *> NNSNodes; 262 }; 263 } // namespace 264 265 /// A helper class for constructing the syntax tree while traversing a clang 266 /// AST. 267 /// 268 /// At each point of the traversal we maintain a list of pending nodes. 269 /// Initially all tokens are added as pending nodes. When processing a clang AST 270 /// node, the clients need to: 271 /// - create a corresponding syntax node, 272 /// - assign roles to all pending child nodes with 'markChild' and 273 /// 'markChildToken', 274 /// - replace the child nodes with the new syntax node in the pending list 275 /// with 'foldNode'. 276 /// 277 /// Note that all children are expected to be processed when building a node. 278 /// 279 /// Call finalize() to finish building the tree and consume the root node. 280 class syntax::TreeBuilder { 281 public: 282 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { 283 for (const auto &T : Arena.tokenBuffer().expandedTokens()) 284 LocationToToken.insert({T.location().getRawEncoding(), &T}); 285 } 286 287 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 288 const SourceManager &sourceManager() const { return Arena.sourceManager(); } 289 290 /// Populate children for \p New node, assuming it covers tokens from \p 291 /// Range. 292 void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) { 293 assert(New); 294 Pending.foldChildren(Arena, Range, New); 295 if (From) 296 Mapping.add(From, New); 297 } 298 299 void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, TypeLoc L) { 300 // FIXME: add mapping for TypeLocs 301 foldNode(Range, New, nullptr); 302 } 303 304 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 305 NestedNameSpecifierLoc From) { 306 assert(New); 307 Pending.foldChildren(Arena, Range, New); 308 if (From) 309 Mapping.add(From, New); 310 } 311 312 /// Notifies that we should not consume trailing semicolon when computing 313 /// token range of \p D. 314 void noticeDeclWithoutSemicolon(Decl *D); 315 316 /// Mark the \p Child node with a corresponding \p Role. All marked children 317 /// should be consumed by foldNode. 318 /// When called on expressions (clang::Expr is derived from clang::Stmt), 319 /// wraps expressions into expression statement. 320 void markStmtChild(Stmt *Child, NodeRole Role); 321 /// Should be called for expressions in non-statement position to avoid 322 /// wrapping into expression statement. 323 void markExprChild(Expr *Child, NodeRole Role); 324 /// Set role for a token starting at \p Loc. 325 void markChildToken(SourceLocation Loc, NodeRole R); 326 /// Set role for \p T. 327 void markChildToken(const syntax::Token *T, NodeRole R); 328 329 /// Set role for \p N. 330 void markChild(syntax::Node *N, NodeRole R); 331 /// Set role for the syntax node matching \p N. 332 void markChild(ASTPtr N, NodeRole R); 333 /// Set role for the syntax node matching \p N. 334 void markChild(NestedNameSpecifierLoc N, NodeRole R); 335 336 /// Finish building the tree and consume the root node. 337 syntax::TranslationUnit *finalize() && { 338 auto Tokens = Arena.tokenBuffer().expandedTokens(); 339 assert(!Tokens.empty()); 340 assert(Tokens.back().kind() == tok::eof); 341 342 // Build the root of the tree, consuming all the children. 343 Pending.foldChildren(Arena, Tokens.drop_back(), 344 new (Arena.allocator()) syntax::TranslationUnit); 345 346 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 347 TU->assertInvariantsRecursive(); 348 return TU; 349 } 350 351 /// Finds a token starting at \p L. The token must exist if \p L is valid. 352 const syntax::Token *findToken(SourceLocation L) const; 353 354 /// Finds the syntax tokens corresponding to the \p SourceRange. 355 ArrayRef<syntax::Token> getRange(SourceRange Range) const { 356 assert(Range.isValid()); 357 return getRange(Range.getBegin(), Range.getEnd()); 358 } 359 360 /// Finds the syntax tokens corresponding to the passed source locations. 361 /// \p First is the start position of the first token and \p Last is the start 362 /// position of the last token. 363 ArrayRef<syntax::Token> getRange(SourceLocation First, 364 SourceLocation Last) const { 365 assert(First.isValid()); 366 assert(Last.isValid()); 367 assert(First == Last || 368 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 369 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 370 } 371 372 ArrayRef<syntax::Token> 373 getTemplateRange(const ClassTemplateSpecializationDecl *D) const { 374 auto Tokens = getRange(D->getSourceRange()); 375 return maybeAppendSemicolon(Tokens, D); 376 } 377 378 /// Returns true if \p D is the last declarator in a chain and is thus 379 /// reponsible for creating SimpleDeclaration for the whole chain. 380 template <class T> 381 bool isResponsibleForCreatingDeclaration(const T *D) const { 382 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 383 std::is_base_of<TypedefNameDecl, T>::value), 384 "only DeclaratorDecl and TypedefNameDecl are supported."); 385 386 const Decl *Next = D->getNextDeclInContext(); 387 388 // There's no next sibling, this one is responsible. 389 if (Next == nullptr) { 390 return true; 391 } 392 const auto *NextT = dyn_cast<T>(Next); 393 394 // Next sibling is not the same type, this one is responsible. 395 if (NextT == nullptr) { 396 return true; 397 } 398 // Next sibling doesn't begin at the same loc, it must be a different 399 // declaration, so this declarator is responsible. 400 if (NextT->getBeginLoc() != D->getBeginLoc()) { 401 return true; 402 } 403 404 // NextT is a member of the same declaration, and we need the last member to 405 // create declaration. This one is not responsible. 406 return false; 407 } 408 409 ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { 410 ArrayRef<syntax::Token> Tokens; 411 // We want to drop the template parameters for specializations. 412 if (const auto *S = dyn_cast<TagDecl>(D)) 413 Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); 414 else 415 Tokens = getRange(D->getSourceRange()); 416 return maybeAppendSemicolon(Tokens, D); 417 } 418 419 ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 420 return getRange(E->getSourceRange()); 421 } 422 423 /// Find the adjusted range for the statement, consuming the trailing 424 /// semicolon when needed. 425 ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 426 auto Tokens = getRange(S->getSourceRange()); 427 if (isa<CompoundStmt>(S)) 428 return Tokens; 429 430 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 431 // all statements that end with those. Consume this semicolon here. 432 if (Tokens.back().kind() == tok::semi) 433 return Tokens; 434 return withTrailingSemicolon(Tokens); 435 } 436 437 private: 438 ArrayRef<syntax::Token> maybeAppendSemicolon(ArrayRef<syntax::Token> Tokens, 439 const Decl *D) const { 440 if (isa<NamespaceDecl>(D)) 441 return Tokens; 442 if (DeclsWithoutSemicolons.count(D)) 443 return Tokens; 444 // FIXME: do not consume trailing semicolon on function definitions. 445 // Most declarations own a semicolon in syntax trees, but not in clang AST. 446 return withTrailingSemicolon(Tokens); 447 } 448 449 ArrayRef<syntax::Token> 450 withTrailingSemicolon(ArrayRef<syntax::Token> Tokens) const { 451 assert(!Tokens.empty()); 452 assert(Tokens.back().kind() != tok::eof); 453 // We never consume 'eof', so looking at the next token is ok. 454 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 455 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 456 return Tokens; 457 } 458 459 void setRole(syntax::Node *N, NodeRole R) { 460 assert(N->role() == NodeRole::Detached); 461 N->setRole(R); 462 } 463 464 /// A collection of trees covering the input tokens. 465 /// When created, each tree corresponds to a single token in the file. 466 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 467 /// node and update the list of trees accordingly. 468 /// 469 /// Ensures that added nodes properly nest and cover the whole token stream. 470 struct Forest { 471 Forest(syntax::Arena &A) { 472 assert(!A.tokenBuffer().expandedTokens().empty()); 473 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 474 // Create all leaf nodes. 475 // Note that we do not have 'eof' in the tree. 476 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { 477 auto *L = new (A.allocator()) syntax::Leaf(&T); 478 L->Original = true; 479 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); 480 Trees.insert(Trees.end(), {&T, L}); 481 } 482 } 483 484 void assignRole(ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { 485 assert(!Range.empty()); 486 auto It = Trees.lower_bound(Range.begin()); 487 assert(It != Trees.end() && "no node found"); 488 assert(It->first == Range.begin() && "no child with the specified range"); 489 assert((std::next(It) == Trees.end() || 490 std::next(It)->first == Range.end()) && 491 "no child with the specified range"); 492 assert(It->second->role() == NodeRole::Detached && 493 "re-assigning role for a child"); 494 It->second->setRole(Role); 495 } 496 497 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 498 void foldChildren(const syntax::Arena &A, ArrayRef<syntax::Token> Tokens, 499 syntax::Tree *Node) { 500 // Attach children to `Node`. 501 assert(Node->firstChild() == nullptr && "node already has children"); 502 503 auto *FirstToken = Tokens.begin(); 504 auto BeginChildren = Trees.lower_bound(FirstToken); 505 506 assert((BeginChildren == Trees.end() || 507 BeginChildren->first == FirstToken) && 508 "fold crosses boundaries of existing subtrees"); 509 auto EndChildren = Trees.lower_bound(Tokens.end()); 510 assert( 511 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 512 "fold crosses boundaries of existing subtrees"); 513 514 // We need to go in reverse order, because we can only prepend. 515 for (auto It = EndChildren; It != BeginChildren; --It) { 516 auto *C = std::prev(It)->second; 517 if (C->role() == NodeRole::Detached) 518 C->setRole(NodeRole::Unknown); 519 Node->prependChildLowLevel(C); 520 } 521 522 // Mark that this node came from the AST and is backed by the source code. 523 Node->Original = true; 524 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); 525 526 Trees.erase(BeginChildren, EndChildren); 527 Trees.insert({FirstToken, Node}); 528 } 529 530 // EXPECTS: all tokens were consumed and are owned by a single root node. 531 syntax::Node *finalize() && { 532 assert(Trees.size() == 1); 533 auto *Root = Trees.begin()->second; 534 Trees = {}; 535 return Root; 536 } 537 538 std::string str(const syntax::Arena &A) const { 539 std::string R; 540 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 541 unsigned CoveredTokens = 542 It != Trees.end() 543 ? (std::next(It)->first - It->first) 544 : A.tokenBuffer().expandedTokens().end() - It->first; 545 546 R += std::string( 547 formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), 548 It->first->text(A.sourceManager()), CoveredTokens)); 549 R += It->second->dump(A); 550 } 551 return R; 552 } 553 554 private: 555 /// Maps from the start token to a subtree starting at that token. 556 /// Keys in the map are pointers into the array of expanded tokens, so 557 /// pointer order corresponds to the order of preprocessor tokens. 558 std::map<const syntax::Token *, syntax::Node *> Trees; 559 }; 560 561 /// For debugging purposes. 562 std::string str() { return Pending.str(Arena); } 563 564 syntax::Arena &Arena; 565 /// To quickly find tokens by their start location. 566 llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> 567 LocationToToken; 568 Forest Pending; 569 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 570 ASTToSyntaxMapping Mapping; 571 }; 572 573 namespace { 574 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 575 public: 576 explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) 577 : Builder(Builder), Context(Context) {} 578 579 bool shouldTraversePostOrder() const { return true; } 580 581 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { 582 return processDeclaratorAndDeclaration(DD); 583 } 584 585 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { 586 return processDeclaratorAndDeclaration(TD); 587 } 588 589 bool VisitDecl(Decl *D) { 590 assert(!D->isImplicit()); 591 Builder.foldNode(Builder.getDeclarationRange(D), 592 new (allocator()) syntax::UnknownDeclaration(), D); 593 return true; 594 } 595 596 // RAV does not call WalkUpFrom* on explicit instantiations, so we have to 597 // override Traverse. 598 // FIXME: make RAV call WalkUpFrom* instead. 599 bool 600 TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { 601 if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) 602 return false; 603 if (C->isExplicitSpecialization()) 604 return true; // we are only interested in explicit instantiations. 605 auto *Declaration = 606 cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); 607 foldExplicitTemplateInstantiation( 608 Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), 609 Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); 610 return true; 611 } 612 613 bool WalkUpFromTemplateDecl(TemplateDecl *S) { 614 foldTemplateDeclaration( 615 Builder.getDeclarationRange(S), 616 Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), 617 Builder.getDeclarationRange(S->getTemplatedDecl()), S); 618 return true; 619 } 620 621 bool WalkUpFromTagDecl(TagDecl *C) { 622 // FIXME: build the ClassSpecifier node. 623 if (!C->isFreeStanding()) { 624 assert(C->getNumTemplateParameterLists() == 0); 625 return true; 626 } 627 handleFreeStandingTagDecl(C); 628 return true; 629 } 630 631 syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { 632 assert(C->isFreeStanding()); 633 // Class is a declaration specifier and needs a spanning declaration node. 634 auto DeclarationRange = Builder.getDeclarationRange(C); 635 syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; 636 Builder.foldNode(DeclarationRange, Result, nullptr); 637 638 // Build TemplateDeclaration nodes if we had template parameters. 639 auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { 640 const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); 641 auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); 642 Result = 643 foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); 644 DeclarationRange = R; 645 }; 646 if (auto *S = dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) 647 ConsumeTemplateParameters(*S->getTemplateParameters()); 648 for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) 649 ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); 650 return Result; 651 } 652 653 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 654 // We do not want to call VisitDecl(), the declaration for translation 655 // unit is built by finalize(). 656 return true; 657 } 658 659 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 660 using NodeRole = syntax::NodeRole; 661 662 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 663 for (auto *Child : S->body()) 664 Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 665 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 666 667 Builder.foldNode(Builder.getStmtRange(S), 668 new (allocator()) syntax::CompoundStatement, S); 669 return true; 670 } 671 672 // Some statements are not yet handled by syntax trees. 673 bool WalkUpFromStmt(Stmt *S) { 674 Builder.foldNode(Builder.getStmtRange(S), 675 new (allocator()) syntax::UnknownStatement, S); 676 return true; 677 } 678 679 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 680 // We override to traverse range initializer as VarDecl. 681 // RAV traverses it as a statement, we produce invalid node kinds in that 682 // case. 683 // FIXME: should do this in RAV instead? 684 bool Result = [&, this]() { 685 if (S->getInit() && !TraverseStmt(S->getInit())) 686 return false; 687 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 688 return false; 689 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 690 return false; 691 if (S->getBody() && !TraverseStmt(S->getBody())) 692 return false; 693 return true; 694 }(); 695 WalkUpFromCXXForRangeStmt(S); 696 return Result; 697 } 698 699 bool TraverseStmt(Stmt *S) { 700 if (auto *DS = dyn_cast_or_null<DeclStmt>(S)) { 701 // We want to consume the semicolon, make sure SimpleDeclaration does not. 702 for (auto *D : DS->decls()) 703 Builder.noticeDeclWithoutSemicolon(D); 704 } else if (auto *E = dyn_cast_or_null<Expr>(S)) { 705 return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); 706 } 707 return RecursiveASTVisitor::TraverseStmt(S); 708 } 709 710 // Some expressions are not yet handled by syntax trees. 711 bool WalkUpFromExpr(Expr *E) { 712 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 713 Builder.foldNode(Builder.getExprRange(E), 714 new (allocator()) syntax::UnknownExpression, E); 715 return true; 716 } 717 718 bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { 719 // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node 720 // referencing the location of the UDL suffix (`_w` in `1.2_w`). The 721 // UDL suffix location does not point to the beginning of a token, so we 722 // can't represent the UDL suffix as a separate syntax tree node. 723 724 return WalkUpFromUserDefinedLiteral(S); 725 } 726 727 syntax::UserDefinedLiteralExpression * 728 buildUserDefinedLiteral(UserDefinedLiteral *S) { 729 switch (S->getLiteralOperatorKind()) { 730 case UserDefinedLiteral::LOK_Integer: 731 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 732 case UserDefinedLiteral::LOK_Floating: 733 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 734 case UserDefinedLiteral::LOK_Character: 735 return new (allocator()) syntax::CharUserDefinedLiteralExpression; 736 case UserDefinedLiteral::LOK_String: 737 return new (allocator()) syntax::StringUserDefinedLiteralExpression; 738 case UserDefinedLiteral::LOK_Raw: 739 case UserDefinedLiteral::LOK_Template: 740 // For raw literal operator and numeric literal operator template we 741 // cannot get the type of the operand in the semantic AST. We get this 742 // information from the token. As integer and floating point have the same 743 // token kind, we run `NumericLiteralParser` again to distinguish them. 744 auto TokLoc = S->getBeginLoc(); 745 auto TokSpelling = 746 Builder.findToken(TokLoc)->text(Context.getSourceManager()); 747 auto Literal = 748 NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), 749 Context.getLangOpts(), Context.getTargetInfo(), 750 Context.getDiagnostics()); 751 if (Literal.isIntegerLiteral()) 752 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 753 else { 754 assert(Literal.isFloatingLiteral()); 755 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 756 } 757 } 758 llvm_unreachable("Unknown literal operator kind."); 759 } 760 761 bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { 762 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 763 Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S); 764 return true; 765 } 766 767 // FIXME: Fix `NestedNameSpecifierLoc::getLocalSourceRange` for the 768 // `DependentTemplateSpecializationType` case. 769 /// Given a nested-name-specifier return the range for the last name 770 /// specifier. 771 /// 772 /// e.g. `std::T::template X<U>::` => `template X<U>::` 773 SourceRange getLocalSourceRange(const NestedNameSpecifierLoc &NNSLoc) { 774 auto SR = NNSLoc.getLocalSourceRange(); 775 776 // The method `NestedNameSpecifierLoc::getLocalSourceRange` *should* 777 // return the desired `SourceRange`, but there is a corner case. For a 778 // `DependentTemplateSpecializationType` this method returns its 779 // qualifiers as well, in other words in the example above this method 780 // returns `T::template X<U>::` instead of only `template X<U>::` 781 if (auto TL = NNSLoc.getTypeLoc()) { 782 if (auto DependentTL = 783 TL.getAs<DependentTemplateSpecializationTypeLoc>()) { 784 // The 'template' keyword is always present in dependent template 785 // specializations. Except in the case of incorrect code 786 // TODO: Treat the case of incorrect code. 787 SR.setBegin(DependentTL.getTemplateKeywordLoc()); 788 } 789 } 790 791 return SR; 792 } 793 794 syntax::NodeKind getNameSpecifierKind(const NestedNameSpecifier &NNS) { 795 switch (NNS.getKind()) { 796 case NestedNameSpecifier::Global: 797 return syntax::NodeKind::GlobalNameSpecifier; 798 case NestedNameSpecifier::Namespace: 799 case NestedNameSpecifier::NamespaceAlias: 800 case NestedNameSpecifier::Identifier: 801 return syntax::NodeKind::IdentifierNameSpecifier; 802 case NestedNameSpecifier::TypeSpecWithTemplate: 803 return syntax::NodeKind::SimpleTemplateNameSpecifier; 804 case NestedNameSpecifier::TypeSpec: { 805 const auto *NNSType = NNS.getAsType(); 806 assert(NNSType); 807 if (isa<DecltypeType>(NNSType)) 808 return syntax::NodeKind::DecltypeNameSpecifier; 809 if (isa<TemplateSpecializationType, DependentTemplateSpecializationType>( 810 NNSType)) 811 return syntax::NodeKind::SimpleTemplateNameSpecifier; 812 return syntax::NodeKind::IdentifierNameSpecifier; 813 } 814 default: 815 // FIXME: Support Microsoft's __super 816 llvm::report_fatal_error("We don't yet support the __super specifier", 817 true); 818 } 819 } 820 821 syntax::NameSpecifier * 822 BuildNameSpecifier(const NestedNameSpecifierLoc &NNSLoc) { 823 assert(NNSLoc.hasQualifier()); 824 auto NameSpecifierTokens = 825 Builder.getRange(getLocalSourceRange(NNSLoc)).drop_back(); 826 switch (getNameSpecifierKind(*NNSLoc.getNestedNameSpecifier())) { 827 case syntax::NodeKind::GlobalNameSpecifier: 828 return new (allocator()) syntax::GlobalNameSpecifier; 829 case syntax::NodeKind::IdentifierNameSpecifier: { 830 assert(NameSpecifierTokens.size() == 1); 831 Builder.markChildToken(NameSpecifierTokens.begin(), 832 syntax::NodeRole::Unknown); 833 auto *NS = new (allocator()) syntax::IdentifierNameSpecifier; 834 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 835 return NS; 836 } 837 case syntax::NodeKind::SimpleTemplateNameSpecifier: { 838 // TODO: Build `SimpleTemplateNameSpecifier` children and implement 839 // accessors to them. 840 // Be aware, we cannot do that simply by calling `TraverseTypeLoc`, 841 // some `TypeLoc`s have inside them the previous name specifier and 842 // we want to treat them independently. 843 auto *NS = new (allocator()) syntax::SimpleTemplateNameSpecifier; 844 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 845 return NS; 846 } 847 case syntax::NodeKind::DecltypeNameSpecifier: { 848 const auto TL = NNSLoc.getTypeLoc().castAs<DecltypeTypeLoc>(); 849 if (!RecursiveASTVisitor::TraverseDecltypeTypeLoc(TL)) 850 return nullptr; 851 auto *NS = new (allocator()) syntax::DecltypeNameSpecifier; 852 // TODO: Implement accessor to `DecltypeNameSpecifier` inner 853 // `DecltypeTypeLoc`. 854 // For that add mapping from `TypeLoc` to `syntax::Node*` then: 855 // Builder.markChild(TypeLoc, syntax::NodeRole); 856 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 857 return NS; 858 } 859 default: 860 llvm_unreachable("getChildKind() does not return this value"); 861 } 862 } 863 864 // To build syntax tree nodes for NestedNameSpecifierLoc we override 865 // Traverse instead of WalkUpFrom because we want to traverse the children 866 // ourselves and build a list instead of a nested tree of name specifier 867 // prefixes. 868 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc QualifierLoc) { 869 if (!QualifierLoc) 870 return true; 871 for (auto it = QualifierLoc; it; it = it.getPrefix()) { 872 auto *NS = BuildNameSpecifier(it); 873 if (!NS) 874 return false; 875 Builder.markChild(NS, syntax::NodeRole::List_element); 876 Builder.markChildToken(it.getEndLoc(), syntax::NodeRole::List_delimiter); 877 } 878 Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), 879 new (allocator()) syntax::NestedNameSpecifier, 880 QualifierLoc); 881 return true; 882 } 883 884 syntax::IdExpression *buildIdExpression(NestedNameSpecifierLoc QualifierLoc, 885 SourceLocation TemplateKeywordLoc, 886 SourceRange UnqualifiedIdLoc, 887 ASTPtr From) { 888 if (QualifierLoc) { 889 Builder.markChild(QualifierLoc, syntax::NodeRole::IdExpression_qualifier); 890 if (TemplateKeywordLoc.isValid()) 891 Builder.markChildToken(TemplateKeywordLoc, 892 syntax::NodeRole::TemplateKeyword); 893 } 894 895 auto *TheUnqualifiedId = new (allocator()) syntax::UnqualifiedId; 896 Builder.foldNode(Builder.getRange(UnqualifiedIdLoc), TheUnqualifiedId, 897 nullptr); 898 Builder.markChild(TheUnqualifiedId, syntax::NodeRole::IdExpression_id); 899 900 auto IdExpressionBeginLoc = 901 QualifierLoc ? QualifierLoc.getBeginLoc() : UnqualifiedIdLoc.getBegin(); 902 903 auto *TheIdExpression = new (allocator()) syntax::IdExpression; 904 Builder.foldNode( 905 Builder.getRange(IdExpressionBeginLoc, UnqualifiedIdLoc.getEnd()), 906 TheIdExpression, From); 907 908 return TheIdExpression; 909 } 910 911 bool WalkUpFromMemberExpr(MemberExpr *S) { 912 // For `MemberExpr` with implicit `this->` we generate a simple 913 // `id-expression` syntax node, beacuse an implicit `member-expression` is 914 // syntactically undistinguishable from an `id-expression` 915 if (S->isImplicitAccess()) { 916 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 917 SourceRange(S->getMemberLoc(), S->getEndLoc()), S); 918 return true; 919 } 920 921 auto *TheIdExpression = buildIdExpression( 922 S->getQualifierLoc(), S->getTemplateKeywordLoc(), 923 SourceRange(S->getMemberLoc(), S->getEndLoc()), nullptr); 924 925 Builder.markChild(TheIdExpression, 926 syntax::NodeRole::MemberExpression_member); 927 928 Builder.markExprChild(S->getBase(), 929 syntax::NodeRole::MemberExpression_object); 930 Builder.markChildToken(S->getOperatorLoc(), 931 syntax::NodeRole::MemberExpression_accessToken); 932 933 Builder.foldNode(Builder.getExprRange(S), 934 new (allocator()) syntax::MemberExpression, S); 935 return true; 936 } 937 938 bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { 939 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 940 SourceRange(S->getLocation(), S->getEndLoc()), S); 941 942 return true; 943 } 944 945 // Same logic as DeclRefExpr. 946 bool WalkUpFromDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *S) { 947 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 948 SourceRange(S->getLocation(), S->getEndLoc()), S); 949 950 return true; 951 } 952 953 bool WalkUpFromParenExpr(ParenExpr *S) { 954 Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen); 955 Builder.markExprChild(S->getSubExpr(), 956 syntax::NodeRole::ParenExpression_subExpression); 957 Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen); 958 Builder.foldNode(Builder.getExprRange(S), 959 new (allocator()) syntax::ParenExpression, S); 960 return true; 961 } 962 963 bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { 964 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 965 Builder.foldNode(Builder.getExprRange(S), 966 new (allocator()) syntax::IntegerLiteralExpression, S); 967 return true; 968 } 969 970 bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { 971 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 972 Builder.foldNode(Builder.getExprRange(S), 973 new (allocator()) syntax::CharacterLiteralExpression, S); 974 return true; 975 } 976 977 bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { 978 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 979 Builder.foldNode(Builder.getExprRange(S), 980 new (allocator()) syntax::FloatingLiteralExpression, S); 981 return true; 982 } 983 984 bool WalkUpFromStringLiteral(StringLiteral *S) { 985 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 986 Builder.foldNode(Builder.getExprRange(S), 987 new (allocator()) syntax::StringLiteralExpression, S); 988 return true; 989 } 990 991 bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { 992 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 993 Builder.foldNode(Builder.getExprRange(S), 994 new (allocator()) syntax::BoolLiteralExpression, S); 995 return true; 996 } 997 998 bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { 999 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1000 Builder.foldNode(Builder.getExprRange(S), 1001 new (allocator()) syntax::CxxNullPtrExpression, S); 1002 return true; 1003 } 1004 1005 bool WalkUpFromUnaryOperator(UnaryOperator *S) { 1006 Builder.markChildToken(S->getOperatorLoc(), 1007 syntax::NodeRole::OperatorExpression_operatorToken); 1008 Builder.markExprChild(S->getSubExpr(), 1009 syntax::NodeRole::UnaryOperatorExpression_operand); 1010 1011 if (S->isPostfix()) 1012 Builder.foldNode(Builder.getExprRange(S), 1013 new (allocator()) syntax::PostfixUnaryOperatorExpression, 1014 S); 1015 else 1016 Builder.foldNode(Builder.getExprRange(S), 1017 new (allocator()) syntax::PrefixUnaryOperatorExpression, 1018 S); 1019 1020 return true; 1021 } 1022 1023 bool WalkUpFromBinaryOperator(BinaryOperator *S) { 1024 Builder.markExprChild( 1025 S->getLHS(), syntax::NodeRole::BinaryOperatorExpression_leftHandSide); 1026 Builder.markChildToken(S->getOperatorLoc(), 1027 syntax::NodeRole::OperatorExpression_operatorToken); 1028 Builder.markExprChild( 1029 S->getRHS(), syntax::NodeRole::BinaryOperatorExpression_rightHandSide); 1030 Builder.foldNode(Builder.getExprRange(S), 1031 new (allocator()) syntax::BinaryOperatorExpression, S); 1032 return true; 1033 } 1034 1035 bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 1036 // To construct a syntax tree of the same shape for calls to built-in and 1037 // user-defined operators, ignore the `DeclRefExpr` that refers to the 1038 // operator and treat it as a simple token. Do that by traversing 1039 // arguments instead of children. 1040 for (auto *child : S->arguments()) { 1041 // A postfix unary operator is declared as taking two operands. The 1042 // second operand is used to distinguish from its prefix counterpart. In 1043 // the semantic AST this "phantom" operand is represented as a 1044 // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this 1045 // operand because it does not correspond to anything written in source 1046 // code. 1047 if (child->getSourceRange().isInvalid()) { 1048 assert(getOperatorNodeKind(*S) == 1049 syntax::NodeKind::PostfixUnaryOperatorExpression); 1050 continue; 1051 } 1052 if (!TraverseStmt(child)) 1053 return false; 1054 } 1055 return WalkUpFromCXXOperatorCallExpr(S); 1056 } 1057 1058 bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 1059 switch (getOperatorNodeKind(*S)) { 1060 case syntax::NodeKind::BinaryOperatorExpression: 1061 Builder.markExprChild( 1062 S->getArg(0), 1063 syntax::NodeRole::BinaryOperatorExpression_leftHandSide); 1064 Builder.markChildToken( 1065 S->getOperatorLoc(), 1066 syntax::NodeRole::OperatorExpression_operatorToken); 1067 Builder.markExprChild( 1068 S->getArg(1), 1069 syntax::NodeRole::BinaryOperatorExpression_rightHandSide); 1070 Builder.foldNode(Builder.getExprRange(S), 1071 new (allocator()) syntax::BinaryOperatorExpression, S); 1072 return true; 1073 case syntax::NodeKind::PrefixUnaryOperatorExpression: 1074 Builder.markChildToken( 1075 S->getOperatorLoc(), 1076 syntax::NodeRole::OperatorExpression_operatorToken); 1077 Builder.markExprChild(S->getArg(0), 1078 syntax::NodeRole::UnaryOperatorExpression_operand); 1079 Builder.foldNode(Builder.getExprRange(S), 1080 new (allocator()) syntax::PrefixUnaryOperatorExpression, 1081 S); 1082 return true; 1083 case syntax::NodeKind::PostfixUnaryOperatorExpression: 1084 Builder.markChildToken( 1085 S->getOperatorLoc(), 1086 syntax::NodeRole::OperatorExpression_operatorToken); 1087 Builder.markExprChild(S->getArg(0), 1088 syntax::NodeRole::UnaryOperatorExpression_operand); 1089 Builder.foldNode(Builder.getExprRange(S), 1090 new (allocator()) syntax::PostfixUnaryOperatorExpression, 1091 S); 1092 return true; 1093 case syntax::NodeKind::UnknownExpression: 1094 return RecursiveASTVisitor::WalkUpFromCXXOperatorCallExpr(S); 1095 default: 1096 llvm_unreachable("getOperatorNodeKind() does not return this value"); 1097 } 1098 } 1099 1100 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 1101 auto Tokens = Builder.getDeclarationRange(S); 1102 if (Tokens.front().kind() == tok::coloncolon) { 1103 // Handle nested namespace definitions. Those start at '::' token, e.g. 1104 // namespace a^::b {} 1105 // FIXME: build corresponding nodes for the name of this namespace. 1106 return true; 1107 } 1108 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); 1109 return true; 1110 } 1111 1112 // FIXME: Deleting the `TraverseParenTypeLoc` override doesn't change test 1113 // results. Find test coverage or remove it. 1114 bool TraverseParenTypeLoc(ParenTypeLoc L) { 1115 // We reverse order of traversal to get the proper syntax structure. 1116 if (!WalkUpFromParenTypeLoc(L)) 1117 return false; 1118 return TraverseTypeLoc(L.getInnerLoc()); 1119 } 1120 1121 bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { 1122 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 1123 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 1124 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), 1125 new (allocator()) syntax::ParenDeclarator, L); 1126 return true; 1127 } 1128 1129 // Declarator chunks, they are produced by type locs and some clang::Decls. 1130 bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { 1131 Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); 1132 Builder.markExprChild(L.getSizeExpr(), 1133 syntax::NodeRole::ArraySubscript_sizeExpression); 1134 Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); 1135 Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), 1136 new (allocator()) syntax::ArraySubscript, L); 1137 return true; 1138 } 1139 1140 bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { 1141 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 1142 for (auto *P : L.getParams()) { 1143 Builder.markChild(P, syntax::NodeRole::ParametersAndQualifiers_parameter); 1144 } 1145 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 1146 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), 1147 new (allocator()) syntax::ParametersAndQualifiers, L); 1148 return true; 1149 } 1150 1151 bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { 1152 if (!L.getTypePtr()->hasTrailingReturn()) 1153 return WalkUpFromFunctionTypeLoc(L); 1154 1155 auto *TrailingReturnTokens = BuildTrailingReturn(L); 1156 // Finish building the node for parameters. 1157 Builder.markChild(TrailingReturnTokens, 1158 syntax::NodeRole::ParametersAndQualifiers_trailingReturn); 1159 return WalkUpFromFunctionTypeLoc(L); 1160 } 1161 1162 bool TraverseMemberPointerTypeLoc(MemberPointerTypeLoc L) { 1163 // In the source code "void (Y::*mp)()" `MemberPointerTypeLoc` corresponds 1164 // to "Y::*" but it points to a `ParenTypeLoc` that corresponds to 1165 // "(Y::*mp)" We thus reverse the order of traversal to get the proper 1166 // syntax structure. 1167 if (!WalkUpFromMemberPointerTypeLoc(L)) 1168 return false; 1169 return TraverseTypeLoc(L.getPointeeLoc()); 1170 } 1171 1172 bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { 1173 auto SR = L.getLocalSourceRange(); 1174 Builder.foldNode(Builder.getRange(SR), 1175 new (allocator()) syntax::MemberPointer, L); 1176 return true; 1177 } 1178 1179 // The code below is very regular, it could even be generated with some 1180 // preprocessor magic. We merely assign roles to the corresponding children 1181 // and fold resulting nodes. 1182 bool WalkUpFromDeclStmt(DeclStmt *S) { 1183 Builder.foldNode(Builder.getStmtRange(S), 1184 new (allocator()) syntax::DeclarationStatement, S); 1185 return true; 1186 } 1187 1188 bool WalkUpFromNullStmt(NullStmt *S) { 1189 Builder.foldNode(Builder.getStmtRange(S), 1190 new (allocator()) syntax::EmptyStatement, S); 1191 return true; 1192 } 1193 1194 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 1195 Builder.markChildToken(S->getSwitchLoc(), 1196 syntax::NodeRole::IntroducerKeyword); 1197 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1198 Builder.foldNode(Builder.getStmtRange(S), 1199 new (allocator()) syntax::SwitchStatement, S); 1200 return true; 1201 } 1202 1203 bool WalkUpFromCaseStmt(CaseStmt *S) { 1204 Builder.markChildToken(S->getKeywordLoc(), 1205 syntax::NodeRole::IntroducerKeyword); 1206 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 1207 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1208 Builder.foldNode(Builder.getStmtRange(S), 1209 new (allocator()) syntax::CaseStatement, S); 1210 return true; 1211 } 1212 1213 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 1214 Builder.markChildToken(S->getKeywordLoc(), 1215 syntax::NodeRole::IntroducerKeyword); 1216 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1217 Builder.foldNode(Builder.getStmtRange(S), 1218 new (allocator()) syntax::DefaultStatement, S); 1219 return true; 1220 } 1221 1222 bool WalkUpFromIfStmt(IfStmt *S) { 1223 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 1224 Builder.markStmtChild(S->getThen(), 1225 syntax::NodeRole::IfStatement_thenStatement); 1226 Builder.markChildToken(S->getElseLoc(), 1227 syntax::NodeRole::IfStatement_elseKeyword); 1228 Builder.markStmtChild(S->getElse(), 1229 syntax::NodeRole::IfStatement_elseStatement); 1230 Builder.foldNode(Builder.getStmtRange(S), 1231 new (allocator()) syntax::IfStatement, S); 1232 return true; 1233 } 1234 1235 bool WalkUpFromForStmt(ForStmt *S) { 1236 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1237 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1238 Builder.foldNode(Builder.getStmtRange(S), 1239 new (allocator()) syntax::ForStatement, S); 1240 return true; 1241 } 1242 1243 bool WalkUpFromWhileStmt(WhileStmt *S) { 1244 Builder.markChildToken(S->getWhileLoc(), 1245 syntax::NodeRole::IntroducerKeyword); 1246 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1247 Builder.foldNode(Builder.getStmtRange(S), 1248 new (allocator()) syntax::WhileStatement, S); 1249 return true; 1250 } 1251 1252 bool WalkUpFromContinueStmt(ContinueStmt *S) { 1253 Builder.markChildToken(S->getContinueLoc(), 1254 syntax::NodeRole::IntroducerKeyword); 1255 Builder.foldNode(Builder.getStmtRange(S), 1256 new (allocator()) syntax::ContinueStatement, S); 1257 return true; 1258 } 1259 1260 bool WalkUpFromBreakStmt(BreakStmt *S) { 1261 Builder.markChildToken(S->getBreakLoc(), 1262 syntax::NodeRole::IntroducerKeyword); 1263 Builder.foldNode(Builder.getStmtRange(S), 1264 new (allocator()) syntax::BreakStatement, S); 1265 return true; 1266 } 1267 1268 bool WalkUpFromReturnStmt(ReturnStmt *S) { 1269 Builder.markChildToken(S->getReturnLoc(), 1270 syntax::NodeRole::IntroducerKeyword); 1271 Builder.markExprChild(S->getRetValue(), 1272 syntax::NodeRole::ReturnStatement_value); 1273 Builder.foldNode(Builder.getStmtRange(S), 1274 new (allocator()) syntax::ReturnStatement, S); 1275 return true; 1276 } 1277 1278 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 1279 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1280 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1281 Builder.foldNode(Builder.getStmtRange(S), 1282 new (allocator()) syntax::RangeBasedForStatement, S); 1283 return true; 1284 } 1285 1286 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 1287 Builder.foldNode(Builder.getDeclarationRange(S), 1288 new (allocator()) syntax::EmptyDeclaration, S); 1289 return true; 1290 } 1291 1292 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 1293 Builder.markExprChild(S->getAssertExpr(), 1294 syntax::NodeRole::StaticAssertDeclaration_condition); 1295 Builder.markExprChild(S->getMessage(), 1296 syntax::NodeRole::StaticAssertDeclaration_message); 1297 Builder.foldNode(Builder.getDeclarationRange(S), 1298 new (allocator()) syntax::StaticAssertDeclaration, S); 1299 return true; 1300 } 1301 1302 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 1303 Builder.foldNode(Builder.getDeclarationRange(S), 1304 new (allocator()) syntax::LinkageSpecificationDeclaration, 1305 S); 1306 return true; 1307 } 1308 1309 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 1310 Builder.foldNode(Builder.getDeclarationRange(S), 1311 new (allocator()) syntax::NamespaceAliasDefinition, S); 1312 return true; 1313 } 1314 1315 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 1316 Builder.foldNode(Builder.getDeclarationRange(S), 1317 new (allocator()) syntax::UsingNamespaceDirective, S); 1318 return true; 1319 } 1320 1321 bool WalkUpFromUsingDecl(UsingDecl *S) { 1322 Builder.foldNode(Builder.getDeclarationRange(S), 1323 new (allocator()) syntax::UsingDeclaration, S); 1324 return true; 1325 } 1326 1327 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 1328 Builder.foldNode(Builder.getDeclarationRange(S), 1329 new (allocator()) syntax::UsingDeclaration, S); 1330 return true; 1331 } 1332 1333 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 1334 Builder.foldNode(Builder.getDeclarationRange(S), 1335 new (allocator()) syntax::UsingDeclaration, S); 1336 return true; 1337 } 1338 1339 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 1340 Builder.foldNode(Builder.getDeclarationRange(S), 1341 new (allocator()) syntax::TypeAliasDeclaration, S); 1342 return true; 1343 } 1344 1345 private: 1346 template <class T> SourceLocation getQualifiedNameStart(T *D) { 1347 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 1348 std::is_base_of<TypedefNameDecl, T>::value), 1349 "only DeclaratorDecl and TypedefNameDecl are supported."); 1350 1351 auto DN = D->getDeclName(); 1352 bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); 1353 if (IsAnonymous) 1354 return SourceLocation(); 1355 1356 if (const auto *DD = dyn_cast<DeclaratorDecl>(D)) { 1357 if (DD->getQualifierLoc()) { 1358 return DD->getQualifierLoc().getBeginLoc(); 1359 } 1360 } 1361 1362 return D->getLocation(); 1363 } 1364 1365 SourceRange getInitializerRange(Decl *D) { 1366 if (auto *V = dyn_cast<VarDecl>(D)) { 1367 auto *I = V->getInit(); 1368 // Initializers in range-based-for are not part of the declarator 1369 if (I && !V->isCXXForRangeDecl()) 1370 return I->getSourceRange(); 1371 } 1372 1373 return SourceRange(); 1374 } 1375 1376 /// Folds SimpleDeclarator node (if present) and in case this is the last 1377 /// declarator in the chain it also folds SimpleDeclaration node. 1378 template <class T> bool processDeclaratorAndDeclaration(T *D) { 1379 SourceRange Initializer = getInitializerRange(D); 1380 auto Range = getDeclaratorRange(Builder.sourceManager(), 1381 D->getTypeSourceInfo()->getTypeLoc(), 1382 getQualifiedNameStart(D), Initializer); 1383 1384 // There doesn't have to be a declarator (e.g. `void foo(int)` only has 1385 // declaration, but no declarator). 1386 if (Range.getBegin().isValid()) { 1387 auto *N = new (allocator()) syntax::SimpleDeclarator; 1388 Builder.foldNode(Builder.getRange(Range), N, nullptr); 1389 Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator); 1390 } 1391 1392 if (Builder.isResponsibleForCreatingDeclaration(D)) { 1393 Builder.foldNode(Builder.getDeclarationRange(D), 1394 new (allocator()) syntax::SimpleDeclaration, D); 1395 } 1396 return true; 1397 } 1398 1399 /// Returns the range of the built node. 1400 syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) { 1401 assert(L.getTypePtr()->hasTrailingReturn()); 1402 1403 auto ReturnedType = L.getReturnLoc(); 1404 // Build node for the declarator, if any. 1405 auto ReturnDeclaratorRange = 1406 getDeclaratorRange(this->Builder.sourceManager(), ReturnedType, 1407 /*Name=*/SourceLocation(), 1408 /*Initializer=*/SourceLocation()); 1409 syntax::SimpleDeclarator *ReturnDeclarator = nullptr; 1410 if (ReturnDeclaratorRange.isValid()) { 1411 ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; 1412 Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), 1413 ReturnDeclarator, nullptr); 1414 } 1415 1416 // Build node for trailing return type. 1417 auto Return = Builder.getRange(ReturnedType.getSourceRange()); 1418 const auto *Arrow = Return.begin() - 1; 1419 assert(Arrow->kind() == tok::arrow); 1420 auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); 1421 Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken); 1422 if (ReturnDeclarator) 1423 Builder.markChild(ReturnDeclarator, 1424 syntax::NodeRole::TrailingReturnType_declarator); 1425 auto *R = new (allocator()) syntax::TrailingReturnType; 1426 Builder.foldNode(Tokens, R, L); 1427 return R; 1428 } 1429 1430 void foldExplicitTemplateInstantiation( 1431 ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, 1432 const syntax::Token *TemplateKW, 1433 syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { 1434 assert(!ExternKW || ExternKW->kind() == tok::kw_extern); 1435 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1436 Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword); 1437 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1438 Builder.markChild( 1439 InnerDeclaration, 1440 syntax::NodeRole::ExplicitTemplateInstantiation_declaration); 1441 Builder.foldNode( 1442 Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); 1443 } 1444 1445 syntax::TemplateDeclaration *foldTemplateDeclaration( 1446 ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, 1447 ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { 1448 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1449 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1450 1451 auto *N = new (allocator()) syntax::TemplateDeclaration; 1452 Builder.foldNode(Range, N, From); 1453 Builder.markChild(N, syntax::NodeRole::TemplateDeclaration_declaration); 1454 return N; 1455 } 1456 1457 /// A small helper to save some typing. 1458 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 1459 1460 syntax::TreeBuilder &Builder; 1461 const ASTContext &Context; 1462 }; 1463 } // namespace 1464 1465 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { 1466 DeclsWithoutSemicolons.insert(D); 1467 } 1468 1469 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 1470 if (Loc.isInvalid()) 1471 return; 1472 Pending.assignRole(*findToken(Loc), Role); 1473 } 1474 1475 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { 1476 if (!T) 1477 return; 1478 Pending.assignRole(*T, R); 1479 } 1480 1481 void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { 1482 assert(N); 1483 setRole(N, R); 1484 } 1485 1486 void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { 1487 auto *SN = Mapping.find(N); 1488 assert(SN != nullptr); 1489 setRole(SN, R); 1490 } 1491 void syntax::TreeBuilder::markChild(NestedNameSpecifierLoc NNSLoc, NodeRole R) { 1492 auto *SN = Mapping.find(NNSLoc); 1493 assert(SN != nullptr); 1494 setRole(SN, R); 1495 } 1496 1497 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 1498 if (!Child) 1499 return; 1500 1501 syntax::Tree *ChildNode; 1502 if (Expr *ChildExpr = dyn_cast<Expr>(Child)) { 1503 // This is an expression in a statement position, consume the trailing 1504 // semicolon and form an 'ExpressionStatement' node. 1505 markExprChild(ChildExpr, NodeRole::ExpressionStatement_expression); 1506 ChildNode = new (allocator()) syntax::ExpressionStatement; 1507 // (!) 'getStmtRange()' ensures this covers a trailing semicolon. 1508 Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); 1509 } else { 1510 ChildNode = Mapping.find(Child); 1511 } 1512 assert(ChildNode != nullptr); 1513 setRole(ChildNode, Role); 1514 } 1515 1516 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 1517 if (!Child) 1518 return; 1519 Child = Child->IgnoreImplicit(); 1520 1521 syntax::Tree *ChildNode = Mapping.find(Child); 1522 assert(ChildNode != nullptr); 1523 setRole(ChildNode, Role); 1524 } 1525 1526 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 1527 if (L.isInvalid()) 1528 return nullptr; 1529 auto It = LocationToToken.find(L.getRawEncoding()); 1530 assert(It != LocationToToken.end()); 1531 return It->second; 1532 } 1533 1534 syntax::TranslationUnit * 1535 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 1536 TreeBuilder Builder(A); 1537 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 1538 return std::move(Builder).finalize(); 1539 } 1540