1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/ASTFwd.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/DeclBase.h" 12 #include "clang/AST/DeclCXX.h" 13 #include "clang/AST/DeclarationName.h" 14 #include "clang/AST/Expr.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/AST/RecursiveASTVisitor.h" 17 #include "clang/AST/Stmt.h" 18 #include "clang/AST/TypeLoc.h" 19 #include "clang/AST/TypeLocVisitor.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Basic/Specifiers.h" 24 #include "clang/Basic/TokenKinds.h" 25 #include "clang/Lex/Lexer.h" 26 #include "clang/Lex/LiteralSupport.h" 27 #include "clang/Tooling/Syntax/Nodes.h" 28 #include "clang/Tooling/Syntax/Tokens.h" 29 #include "clang/Tooling/Syntax/Tree.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/DenseMap.h" 32 #include "llvm/ADT/PointerUnion.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/ScopeExit.h" 35 #include "llvm/ADT/SmallVector.h" 36 #include "llvm/Support/Allocator.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/FormatVariadic.h" 40 #include "llvm/Support/MemoryBuffer.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <cstddef> 43 #include <map> 44 45 using namespace clang; 46 47 LLVM_ATTRIBUTE_UNUSED 48 static bool isImplicitExpr(Expr *E) { return E->IgnoreImplicit() != E; } 49 50 namespace { 51 /// Get start location of the Declarator from the TypeLoc. 52 /// E.g.: 53 /// loc of `(` in `int (a)` 54 /// loc of `*` in `int *(a)` 55 /// loc of the first `(` in `int (*a)(int)` 56 /// loc of the `*` in `int *(a)(int)` 57 /// loc of the first `*` in `const int *const *volatile a;` 58 /// 59 /// It is non-trivial to get the start location because TypeLocs are stored 60 /// inside out. In the example above `*volatile` is the TypeLoc returned 61 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` 62 /// returns. 63 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { 64 SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { 65 auto L = Visit(T.getInnerLoc()); 66 if (L.isValid()) 67 return L; 68 return T.getLParenLoc(); 69 } 70 71 // Types spelled in the prefix part of the declarator. 72 SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { 73 return HandlePointer(T); 74 } 75 76 SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { 77 return HandlePointer(T); 78 } 79 80 SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { 81 return HandlePointer(T); 82 } 83 84 SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { 85 return HandlePointer(T); 86 } 87 88 SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { 89 return HandlePointer(T); 90 } 91 92 // All other cases are not important, as they are either part of declaration 93 // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on 94 // existing declarators (e.g. QualifiedTypeLoc). They cannot start the 95 // declarator themselves, but their underlying type can. 96 SourceLocation VisitTypeLoc(TypeLoc T) { 97 auto N = T.getNextTypeLoc(); 98 if (!N) 99 return SourceLocation(); 100 return Visit(N); 101 } 102 103 SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { 104 if (T.getTypePtr()->hasTrailingReturn()) 105 return SourceLocation(); // avoid recursing into the suffix of declarator. 106 return VisitTypeLoc(T); 107 } 108 109 private: 110 template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { 111 auto L = Visit(T.getPointeeLoc()); 112 if (L.isValid()) 113 return L; 114 return T.getLocalSourceRange().getBegin(); 115 } 116 }; 117 } // namespace 118 119 static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { 120 switch (E.getOperator()) { 121 // Comparison 122 case OO_EqualEqual: 123 case OO_ExclaimEqual: 124 case OO_Greater: 125 case OO_GreaterEqual: 126 case OO_Less: 127 case OO_LessEqual: 128 case OO_Spaceship: 129 // Assignment 130 case OO_Equal: 131 case OO_SlashEqual: 132 case OO_PercentEqual: 133 case OO_CaretEqual: 134 case OO_PipeEqual: 135 case OO_LessLessEqual: 136 case OO_GreaterGreaterEqual: 137 case OO_PlusEqual: 138 case OO_MinusEqual: 139 case OO_StarEqual: 140 case OO_AmpEqual: 141 // Binary computation 142 case OO_Slash: 143 case OO_Percent: 144 case OO_Caret: 145 case OO_Pipe: 146 case OO_LessLess: 147 case OO_GreaterGreater: 148 case OO_AmpAmp: 149 case OO_PipePipe: 150 case OO_ArrowStar: 151 case OO_Comma: 152 return syntax::NodeKind::BinaryOperatorExpression; 153 case OO_Tilde: 154 case OO_Exclaim: 155 return syntax::NodeKind::PrefixUnaryOperatorExpression; 156 // Prefix/Postfix increment/decrement 157 case OO_PlusPlus: 158 case OO_MinusMinus: 159 switch (E.getNumArgs()) { 160 case 1: 161 return syntax::NodeKind::PrefixUnaryOperatorExpression; 162 case 2: 163 return syntax::NodeKind::PostfixUnaryOperatorExpression; 164 default: 165 llvm_unreachable("Invalid number of arguments for operator"); 166 } 167 // Operators that can be unary or binary 168 case OO_Plus: 169 case OO_Minus: 170 case OO_Star: 171 case OO_Amp: 172 switch (E.getNumArgs()) { 173 case 1: 174 return syntax::NodeKind::PrefixUnaryOperatorExpression; 175 case 2: 176 return syntax::NodeKind::BinaryOperatorExpression; 177 default: 178 llvm_unreachable("Invalid number of arguments for operator"); 179 } 180 return syntax::NodeKind::BinaryOperatorExpression; 181 // Not yet supported by SyntaxTree 182 case OO_New: 183 case OO_Delete: 184 case OO_Array_New: 185 case OO_Array_Delete: 186 case OO_Coawait: 187 case OO_Subscript: 188 case OO_Arrow: 189 return syntax::NodeKind::UnknownExpression; 190 case OO_Call: 191 return syntax::NodeKind::CallExpression; 192 case OO_Conditional: // not overloadable 193 case NUM_OVERLOADED_OPERATORS: 194 case OO_None: 195 llvm_unreachable("Not an overloadable operator"); 196 } 197 llvm_unreachable("Unknown OverloadedOperatorKind enum"); 198 } 199 200 /// Gets the range of declarator as defined by the C++ grammar. E.g. 201 /// `int a;` -> range of `a`, 202 /// `int *a;` -> range of `*a`, 203 /// `int a[10];` -> range of `a[10]`, 204 /// `int a[1][2][3];` -> range of `a[1][2][3]`, 205 /// `int *a = nullptr` -> range of `*a = nullptr`. 206 /// FIMXE: \p Name must be a source range, e.g. for `operator+`. 207 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, 208 SourceLocation Name, 209 SourceRange Initializer) { 210 SourceLocation Start = GetStartLoc().Visit(T); 211 SourceLocation End = T.getSourceRange().getEnd(); 212 assert(End.isValid()); 213 if (Name.isValid()) { 214 if (Start.isInvalid()) 215 Start = Name; 216 if (SM.isBeforeInTranslationUnit(End, Name)) 217 End = Name; 218 } 219 if (Initializer.isValid()) { 220 auto InitializerEnd = Initializer.getEnd(); 221 assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || 222 End == InitializerEnd); 223 End = InitializerEnd; 224 } 225 return SourceRange(Start, End); 226 } 227 228 namespace { 229 /// All AST hierarchy roots that can be represented as pointers. 230 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; 231 /// Maintains a mapping from AST to syntax tree nodes. This class will get more 232 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs. 233 /// FIXME: expose this as public API. 234 class ASTToSyntaxMapping { 235 public: 236 void add(ASTPtr From, syntax::Tree *To) { 237 assert(To != nullptr); 238 assert(!From.isNull()); 239 240 bool Added = Nodes.insert({From, To}).second; 241 (void)Added; 242 assert(Added && "mapping added twice"); 243 } 244 245 void add(NestedNameSpecifierLoc From, syntax::Tree *To) { 246 assert(To != nullptr); 247 assert(From.hasQualifier()); 248 249 bool Added = NNSNodes.insert({From, To}).second; 250 (void)Added; 251 assert(Added && "mapping added twice"); 252 } 253 254 syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } 255 256 syntax::Tree *find(NestedNameSpecifierLoc P) const { 257 return NNSNodes.lookup(P); 258 } 259 260 private: 261 llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; 262 llvm::DenseMap<NestedNameSpecifierLoc, syntax::Tree *> NNSNodes; 263 }; 264 } // namespace 265 266 /// A helper class for constructing the syntax tree while traversing a clang 267 /// AST. 268 /// 269 /// At each point of the traversal we maintain a list of pending nodes. 270 /// Initially all tokens are added as pending nodes. When processing a clang AST 271 /// node, the clients need to: 272 /// - create a corresponding syntax node, 273 /// - assign roles to all pending child nodes with 'markChild' and 274 /// 'markChildToken', 275 /// - replace the child nodes with the new syntax node in the pending list 276 /// with 'foldNode'. 277 /// 278 /// Note that all children are expected to be processed when building a node. 279 /// 280 /// Call finalize() to finish building the tree and consume the root node. 281 class syntax::TreeBuilder { 282 public: 283 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { 284 for (const auto &T : Arena.tokenBuffer().expandedTokens()) 285 LocationToToken.insert({T.location().getRawEncoding(), &T}); 286 } 287 288 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 289 const SourceManager &sourceManager() const { return Arena.sourceManager(); } 290 291 /// Populate children for \p New node, assuming it covers tokens from \p 292 /// Range. 293 void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) { 294 assert(New); 295 Pending.foldChildren(Arena, Range, New); 296 if (From) 297 Mapping.add(From, New); 298 } 299 300 void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, TypeLoc L) { 301 // FIXME: add mapping for TypeLocs 302 foldNode(Range, New, nullptr); 303 } 304 305 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 306 NestedNameSpecifierLoc From) { 307 assert(New); 308 Pending.foldChildren(Arena, Range, New); 309 if (From) 310 Mapping.add(From, New); 311 } 312 313 /// Notifies that we should not consume trailing semicolon when computing 314 /// token range of \p D. 315 void noticeDeclWithoutSemicolon(Decl *D); 316 317 /// Mark the \p Child node with a corresponding \p Role. All marked children 318 /// should be consumed by foldNode. 319 /// When called on expressions (clang::Expr is derived from clang::Stmt), 320 /// wraps expressions into expression statement. 321 void markStmtChild(Stmt *Child, NodeRole Role); 322 /// Should be called for expressions in non-statement position to avoid 323 /// wrapping into expression statement. 324 void markExprChild(Expr *Child, NodeRole Role); 325 /// Set role for a token starting at \p Loc. 326 void markChildToken(SourceLocation Loc, NodeRole R); 327 /// Set role for \p T. 328 void markChildToken(const syntax::Token *T, NodeRole R); 329 330 /// Set role for \p N. 331 void markChild(syntax::Node *N, NodeRole R); 332 /// Set role for the syntax node matching \p N. 333 void markChild(ASTPtr N, NodeRole R); 334 /// Set role for the syntax node matching \p N. 335 void markChild(NestedNameSpecifierLoc N, NodeRole R); 336 337 /// Finish building the tree and consume the root node. 338 syntax::TranslationUnit *finalize() && { 339 auto Tokens = Arena.tokenBuffer().expandedTokens(); 340 assert(!Tokens.empty()); 341 assert(Tokens.back().kind() == tok::eof); 342 343 // Build the root of the tree, consuming all the children. 344 Pending.foldChildren(Arena, Tokens.drop_back(), 345 new (Arena.allocator()) syntax::TranslationUnit); 346 347 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 348 TU->assertInvariantsRecursive(); 349 return TU; 350 } 351 352 /// Finds a token starting at \p L. The token must exist if \p L is valid. 353 const syntax::Token *findToken(SourceLocation L) const; 354 355 /// Finds the syntax tokens corresponding to the \p SourceRange. 356 ArrayRef<syntax::Token> getRange(SourceRange Range) const { 357 assert(Range.isValid()); 358 return getRange(Range.getBegin(), Range.getEnd()); 359 } 360 361 /// Finds the syntax tokens corresponding to the passed source locations. 362 /// \p First is the start position of the first token and \p Last is the start 363 /// position of the last token. 364 ArrayRef<syntax::Token> getRange(SourceLocation First, 365 SourceLocation Last) const { 366 assert(First.isValid()); 367 assert(Last.isValid()); 368 assert(First == Last || 369 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 370 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 371 } 372 373 ArrayRef<syntax::Token> 374 getTemplateRange(const ClassTemplateSpecializationDecl *D) const { 375 auto Tokens = getRange(D->getSourceRange()); 376 return maybeAppendSemicolon(Tokens, D); 377 } 378 379 /// Returns true if \p D is the last declarator in a chain and is thus 380 /// reponsible for creating SimpleDeclaration for the whole chain. 381 template <class T> 382 bool isResponsibleForCreatingDeclaration(const T *D) const { 383 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 384 std::is_base_of<TypedefNameDecl, T>::value), 385 "only DeclaratorDecl and TypedefNameDecl are supported."); 386 387 const Decl *Next = D->getNextDeclInContext(); 388 389 // There's no next sibling, this one is responsible. 390 if (Next == nullptr) { 391 return true; 392 } 393 const auto *NextT = dyn_cast<T>(Next); 394 395 // Next sibling is not the same type, this one is responsible. 396 if (NextT == nullptr) { 397 return true; 398 } 399 // Next sibling doesn't begin at the same loc, it must be a different 400 // declaration, so this declarator is responsible. 401 if (NextT->getBeginLoc() != D->getBeginLoc()) { 402 return true; 403 } 404 405 // NextT is a member of the same declaration, and we need the last member to 406 // create declaration. This one is not responsible. 407 return false; 408 } 409 410 ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { 411 ArrayRef<syntax::Token> Tokens; 412 // We want to drop the template parameters for specializations. 413 if (const auto *S = dyn_cast<TagDecl>(D)) 414 Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); 415 else 416 Tokens = getRange(D->getSourceRange()); 417 return maybeAppendSemicolon(Tokens, D); 418 } 419 420 ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 421 return getRange(E->getSourceRange()); 422 } 423 424 /// Find the adjusted range for the statement, consuming the trailing 425 /// semicolon when needed. 426 ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 427 auto Tokens = getRange(S->getSourceRange()); 428 if (isa<CompoundStmt>(S)) 429 return Tokens; 430 431 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 432 // all statements that end with those. Consume this semicolon here. 433 if (Tokens.back().kind() == tok::semi) 434 return Tokens; 435 return withTrailingSemicolon(Tokens); 436 } 437 438 private: 439 ArrayRef<syntax::Token> maybeAppendSemicolon(ArrayRef<syntax::Token> Tokens, 440 const Decl *D) const { 441 if (isa<NamespaceDecl>(D)) 442 return Tokens; 443 if (DeclsWithoutSemicolons.count(D)) 444 return Tokens; 445 // FIXME: do not consume trailing semicolon on function definitions. 446 // Most declarations own a semicolon in syntax trees, but not in clang AST. 447 return withTrailingSemicolon(Tokens); 448 } 449 450 ArrayRef<syntax::Token> 451 withTrailingSemicolon(ArrayRef<syntax::Token> Tokens) const { 452 assert(!Tokens.empty()); 453 assert(Tokens.back().kind() != tok::eof); 454 // We never consume 'eof', so looking at the next token is ok. 455 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 456 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 457 return Tokens; 458 } 459 460 void setRole(syntax::Node *N, NodeRole R) { 461 assert(N->role() == NodeRole::Detached); 462 N->setRole(R); 463 } 464 465 /// A collection of trees covering the input tokens. 466 /// When created, each tree corresponds to a single token in the file. 467 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 468 /// node and update the list of trees accordingly. 469 /// 470 /// Ensures that added nodes properly nest and cover the whole token stream. 471 struct Forest { 472 Forest(syntax::Arena &A) { 473 assert(!A.tokenBuffer().expandedTokens().empty()); 474 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 475 // Create all leaf nodes. 476 // Note that we do not have 'eof' in the tree. 477 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { 478 auto *L = new (A.allocator()) syntax::Leaf(&T); 479 L->Original = true; 480 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); 481 Trees.insert(Trees.end(), {&T, L}); 482 } 483 } 484 485 void assignRole(ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { 486 assert(!Range.empty()); 487 auto It = Trees.lower_bound(Range.begin()); 488 assert(It != Trees.end() && "no node found"); 489 assert(It->first == Range.begin() && "no child with the specified range"); 490 assert((std::next(It) == Trees.end() || 491 std::next(It)->first == Range.end()) && 492 "no child with the specified range"); 493 assert(It->second->role() == NodeRole::Detached && 494 "re-assigning role for a child"); 495 It->second->setRole(Role); 496 } 497 498 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 499 void foldChildren(const syntax::Arena &A, ArrayRef<syntax::Token> Tokens, 500 syntax::Tree *Node) { 501 // Attach children to `Node`. 502 assert(Node->firstChild() == nullptr && "node already has children"); 503 504 auto *FirstToken = Tokens.begin(); 505 auto BeginChildren = Trees.lower_bound(FirstToken); 506 507 assert((BeginChildren == Trees.end() || 508 BeginChildren->first == FirstToken) && 509 "fold crosses boundaries of existing subtrees"); 510 auto EndChildren = Trees.lower_bound(Tokens.end()); 511 assert( 512 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 513 "fold crosses boundaries of existing subtrees"); 514 515 // We need to go in reverse order, because we can only prepend. 516 for (auto It = EndChildren; It != BeginChildren; --It) { 517 auto *C = std::prev(It)->second; 518 if (C->role() == NodeRole::Detached) 519 C->setRole(NodeRole::Unknown); 520 Node->prependChildLowLevel(C); 521 } 522 523 // Mark that this node came from the AST and is backed by the source code. 524 Node->Original = true; 525 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); 526 527 Trees.erase(BeginChildren, EndChildren); 528 Trees.insert({FirstToken, Node}); 529 } 530 531 // EXPECTS: all tokens were consumed and are owned by a single root node. 532 syntax::Node *finalize() && { 533 assert(Trees.size() == 1); 534 auto *Root = Trees.begin()->second; 535 Trees = {}; 536 return Root; 537 } 538 539 std::string str(const syntax::Arena &A) const { 540 std::string R; 541 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 542 unsigned CoveredTokens = 543 It != Trees.end() 544 ? (std::next(It)->first - It->first) 545 : A.tokenBuffer().expandedTokens().end() - It->first; 546 547 R += std::string( 548 formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), 549 It->first->text(A.sourceManager()), CoveredTokens)); 550 R += It->second->dump(A.sourceManager()); 551 } 552 return R; 553 } 554 555 private: 556 /// Maps from the start token to a subtree starting at that token. 557 /// Keys in the map are pointers into the array of expanded tokens, so 558 /// pointer order corresponds to the order of preprocessor tokens. 559 std::map<const syntax::Token *, syntax::Node *> Trees; 560 }; 561 562 /// For debugging purposes. 563 std::string str() { return Pending.str(Arena); } 564 565 syntax::Arena &Arena; 566 /// To quickly find tokens by their start location. 567 llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> 568 LocationToToken; 569 Forest Pending; 570 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 571 ASTToSyntaxMapping Mapping; 572 }; 573 574 namespace { 575 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 576 public: 577 explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) 578 : Builder(Builder), Context(Context) {} 579 580 bool shouldTraversePostOrder() const { return true; } 581 582 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { 583 return processDeclaratorAndDeclaration(DD); 584 } 585 586 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { 587 return processDeclaratorAndDeclaration(TD); 588 } 589 590 bool VisitDecl(Decl *D) { 591 assert(!D->isImplicit()); 592 Builder.foldNode(Builder.getDeclarationRange(D), 593 new (allocator()) syntax::UnknownDeclaration(), D); 594 return true; 595 } 596 597 // RAV does not call WalkUpFrom* on explicit instantiations, so we have to 598 // override Traverse. 599 // FIXME: make RAV call WalkUpFrom* instead. 600 bool 601 TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { 602 if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) 603 return false; 604 if (C->isExplicitSpecialization()) 605 return true; // we are only interested in explicit instantiations. 606 auto *Declaration = 607 cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); 608 foldExplicitTemplateInstantiation( 609 Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), 610 Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); 611 return true; 612 } 613 614 bool WalkUpFromTemplateDecl(TemplateDecl *S) { 615 foldTemplateDeclaration( 616 Builder.getDeclarationRange(S), 617 Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), 618 Builder.getDeclarationRange(S->getTemplatedDecl()), S); 619 return true; 620 } 621 622 bool WalkUpFromTagDecl(TagDecl *C) { 623 // FIXME: build the ClassSpecifier node. 624 if (!C->isFreeStanding()) { 625 assert(C->getNumTemplateParameterLists() == 0); 626 return true; 627 } 628 handleFreeStandingTagDecl(C); 629 return true; 630 } 631 632 syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { 633 assert(C->isFreeStanding()); 634 // Class is a declaration specifier and needs a spanning declaration node. 635 auto DeclarationRange = Builder.getDeclarationRange(C); 636 syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; 637 Builder.foldNode(DeclarationRange, Result, nullptr); 638 639 // Build TemplateDeclaration nodes if we had template parameters. 640 auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { 641 const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); 642 auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); 643 Result = 644 foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); 645 DeclarationRange = R; 646 }; 647 if (auto *S = dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) 648 ConsumeTemplateParameters(*S->getTemplateParameters()); 649 for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) 650 ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); 651 return Result; 652 } 653 654 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 655 // We do not want to call VisitDecl(), the declaration for translation 656 // unit is built by finalize(). 657 return true; 658 } 659 660 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 661 using NodeRole = syntax::NodeRole; 662 663 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 664 for (auto *Child : S->body()) 665 Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 666 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 667 668 Builder.foldNode(Builder.getStmtRange(S), 669 new (allocator()) syntax::CompoundStatement, S); 670 return true; 671 } 672 673 // Some statements are not yet handled by syntax trees. 674 bool WalkUpFromStmt(Stmt *S) { 675 Builder.foldNode(Builder.getStmtRange(S), 676 new (allocator()) syntax::UnknownStatement, S); 677 return true; 678 } 679 680 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 681 // We override to traverse range initializer as VarDecl. 682 // RAV traverses it as a statement, we produce invalid node kinds in that 683 // case. 684 // FIXME: should do this in RAV instead? 685 bool Result = [&, this]() { 686 if (S->getInit() && !TraverseStmt(S->getInit())) 687 return false; 688 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 689 return false; 690 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 691 return false; 692 if (S->getBody() && !TraverseStmt(S->getBody())) 693 return false; 694 return true; 695 }(); 696 WalkUpFromCXXForRangeStmt(S); 697 return Result; 698 } 699 700 bool TraverseStmt(Stmt *S) { 701 if (auto *DS = dyn_cast_or_null<DeclStmt>(S)) { 702 // We want to consume the semicolon, make sure SimpleDeclaration does not. 703 for (auto *D : DS->decls()) 704 Builder.noticeDeclWithoutSemicolon(D); 705 } else if (auto *E = dyn_cast_or_null<Expr>(S)) { 706 return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); 707 } 708 return RecursiveASTVisitor::TraverseStmt(S); 709 } 710 711 // Some expressions are not yet handled by syntax trees. 712 bool WalkUpFromExpr(Expr *E) { 713 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 714 Builder.foldNode(Builder.getExprRange(E), 715 new (allocator()) syntax::UnknownExpression, E); 716 return true; 717 } 718 719 bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { 720 // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node 721 // referencing the location of the UDL suffix (`_w` in `1.2_w`). The 722 // UDL suffix location does not point to the beginning of a token, so we 723 // can't represent the UDL suffix as a separate syntax tree node. 724 725 return WalkUpFromUserDefinedLiteral(S); 726 } 727 728 syntax::UserDefinedLiteralExpression * 729 buildUserDefinedLiteral(UserDefinedLiteral *S) { 730 switch (S->getLiteralOperatorKind()) { 731 case UserDefinedLiteral::LOK_Integer: 732 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 733 case UserDefinedLiteral::LOK_Floating: 734 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 735 case UserDefinedLiteral::LOK_Character: 736 return new (allocator()) syntax::CharUserDefinedLiteralExpression; 737 case UserDefinedLiteral::LOK_String: 738 return new (allocator()) syntax::StringUserDefinedLiteralExpression; 739 case UserDefinedLiteral::LOK_Raw: 740 case UserDefinedLiteral::LOK_Template: 741 // For raw literal operator and numeric literal operator template we 742 // cannot get the type of the operand in the semantic AST. We get this 743 // information from the token. As integer and floating point have the same 744 // token kind, we run `NumericLiteralParser` again to distinguish them. 745 auto TokLoc = S->getBeginLoc(); 746 auto TokSpelling = 747 Builder.findToken(TokLoc)->text(Context.getSourceManager()); 748 auto Literal = 749 NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), 750 Context.getLangOpts(), Context.getTargetInfo(), 751 Context.getDiagnostics()); 752 if (Literal.isIntegerLiteral()) 753 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 754 else { 755 assert(Literal.isFloatingLiteral()); 756 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 757 } 758 } 759 llvm_unreachable("Unknown literal operator kind."); 760 } 761 762 bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { 763 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 764 Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S); 765 return true; 766 } 767 768 // FIXME: Fix `NestedNameSpecifierLoc::getLocalSourceRange` for the 769 // `DependentTemplateSpecializationType` case. 770 /// Given a nested-name-specifier return the range for the last name 771 /// specifier. 772 /// 773 /// e.g. `std::T::template X<U>::` => `template X<U>::` 774 SourceRange getLocalSourceRange(const NestedNameSpecifierLoc &NNSLoc) { 775 auto SR = NNSLoc.getLocalSourceRange(); 776 777 // The method `NestedNameSpecifierLoc::getLocalSourceRange` *should* 778 // return the desired `SourceRange`, but there is a corner case. For a 779 // `DependentTemplateSpecializationType` this method returns its 780 // qualifiers as well, in other words in the example above this method 781 // returns `T::template X<U>::` instead of only `template X<U>::` 782 if (auto TL = NNSLoc.getTypeLoc()) { 783 if (auto DependentTL = 784 TL.getAs<DependentTemplateSpecializationTypeLoc>()) { 785 // The 'template' keyword is always present in dependent template 786 // specializations. Except in the case of incorrect code 787 // TODO: Treat the case of incorrect code. 788 SR.setBegin(DependentTL.getTemplateKeywordLoc()); 789 } 790 } 791 792 return SR; 793 } 794 795 syntax::NodeKind getNameSpecifierKind(const NestedNameSpecifier &NNS) { 796 switch (NNS.getKind()) { 797 case NestedNameSpecifier::Global: 798 return syntax::NodeKind::GlobalNameSpecifier; 799 case NestedNameSpecifier::Namespace: 800 case NestedNameSpecifier::NamespaceAlias: 801 case NestedNameSpecifier::Identifier: 802 return syntax::NodeKind::IdentifierNameSpecifier; 803 case NestedNameSpecifier::TypeSpecWithTemplate: 804 return syntax::NodeKind::SimpleTemplateNameSpecifier; 805 case NestedNameSpecifier::TypeSpec: { 806 const auto *NNSType = NNS.getAsType(); 807 assert(NNSType); 808 if (isa<DecltypeType>(NNSType)) 809 return syntax::NodeKind::DecltypeNameSpecifier; 810 if (isa<TemplateSpecializationType, DependentTemplateSpecializationType>( 811 NNSType)) 812 return syntax::NodeKind::SimpleTemplateNameSpecifier; 813 return syntax::NodeKind::IdentifierNameSpecifier; 814 } 815 default: 816 // FIXME: Support Microsoft's __super 817 llvm::report_fatal_error("We don't yet support the __super specifier", 818 true); 819 } 820 } 821 822 syntax::NameSpecifier * 823 BuildNameSpecifier(const NestedNameSpecifierLoc &NNSLoc) { 824 assert(NNSLoc.hasQualifier()); 825 auto NameSpecifierTokens = 826 Builder.getRange(getLocalSourceRange(NNSLoc)).drop_back(); 827 switch (getNameSpecifierKind(*NNSLoc.getNestedNameSpecifier())) { 828 case syntax::NodeKind::GlobalNameSpecifier: 829 return new (allocator()) syntax::GlobalNameSpecifier; 830 case syntax::NodeKind::IdentifierNameSpecifier: { 831 assert(NameSpecifierTokens.size() == 1); 832 Builder.markChildToken(NameSpecifierTokens.begin(), 833 syntax::NodeRole::Unknown); 834 auto *NS = new (allocator()) syntax::IdentifierNameSpecifier; 835 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 836 return NS; 837 } 838 case syntax::NodeKind::SimpleTemplateNameSpecifier: { 839 // TODO: Build `SimpleTemplateNameSpecifier` children and implement 840 // accessors to them. 841 // Be aware, we cannot do that simply by calling `TraverseTypeLoc`, 842 // some `TypeLoc`s have inside them the previous name specifier and 843 // we want to treat them independently. 844 auto *NS = new (allocator()) syntax::SimpleTemplateNameSpecifier; 845 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 846 return NS; 847 } 848 case syntax::NodeKind::DecltypeNameSpecifier: { 849 const auto TL = NNSLoc.getTypeLoc().castAs<DecltypeTypeLoc>(); 850 if (!RecursiveASTVisitor::TraverseDecltypeTypeLoc(TL)) 851 return nullptr; 852 auto *NS = new (allocator()) syntax::DecltypeNameSpecifier; 853 // TODO: Implement accessor to `DecltypeNameSpecifier` inner 854 // `DecltypeTypeLoc`. 855 // For that add mapping from `TypeLoc` to `syntax::Node*` then: 856 // Builder.markChild(TypeLoc, syntax::NodeRole); 857 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 858 return NS; 859 } 860 default: 861 llvm_unreachable("getChildKind() does not return this value"); 862 } 863 } 864 865 // To build syntax tree nodes for NestedNameSpecifierLoc we override 866 // Traverse instead of WalkUpFrom because we want to traverse the children 867 // ourselves and build a list instead of a nested tree of name specifier 868 // prefixes. 869 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc QualifierLoc) { 870 if (!QualifierLoc) 871 return true; 872 for (auto it = QualifierLoc; it; it = it.getPrefix()) { 873 auto *NS = BuildNameSpecifier(it); 874 if (!NS) 875 return false; 876 Builder.markChild(NS, syntax::NodeRole::List_element); 877 Builder.markChildToken(it.getEndLoc(), syntax::NodeRole::List_delimiter); 878 } 879 Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), 880 new (allocator()) syntax::NestedNameSpecifier, 881 QualifierLoc); 882 return true; 883 } 884 885 syntax::IdExpression *buildIdExpression(NestedNameSpecifierLoc QualifierLoc, 886 SourceLocation TemplateKeywordLoc, 887 SourceRange UnqualifiedIdLoc, 888 ASTPtr From) { 889 if (QualifierLoc) { 890 Builder.markChild(QualifierLoc, syntax::NodeRole::IdExpression_qualifier); 891 if (TemplateKeywordLoc.isValid()) 892 Builder.markChildToken(TemplateKeywordLoc, 893 syntax::NodeRole::TemplateKeyword); 894 } 895 896 auto *TheUnqualifiedId = new (allocator()) syntax::UnqualifiedId; 897 Builder.foldNode(Builder.getRange(UnqualifiedIdLoc), TheUnqualifiedId, 898 nullptr); 899 Builder.markChild(TheUnqualifiedId, syntax::NodeRole::IdExpression_id); 900 901 auto IdExpressionBeginLoc = 902 QualifierLoc ? QualifierLoc.getBeginLoc() : UnqualifiedIdLoc.getBegin(); 903 904 auto *TheIdExpression = new (allocator()) syntax::IdExpression; 905 Builder.foldNode( 906 Builder.getRange(IdExpressionBeginLoc, UnqualifiedIdLoc.getEnd()), 907 TheIdExpression, From); 908 909 return TheIdExpression; 910 } 911 912 bool WalkUpFromMemberExpr(MemberExpr *S) { 913 // For `MemberExpr` with implicit `this->` we generate a simple 914 // `id-expression` syntax node, beacuse an implicit `member-expression` is 915 // syntactically undistinguishable from an `id-expression` 916 if (S->isImplicitAccess()) { 917 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 918 SourceRange(S->getMemberLoc(), S->getEndLoc()), S); 919 return true; 920 } 921 922 auto *TheIdExpression = buildIdExpression( 923 S->getQualifierLoc(), S->getTemplateKeywordLoc(), 924 SourceRange(S->getMemberLoc(), S->getEndLoc()), nullptr); 925 926 Builder.markChild(TheIdExpression, 927 syntax::NodeRole::MemberExpression_member); 928 929 Builder.markExprChild(S->getBase(), 930 syntax::NodeRole::MemberExpression_object); 931 Builder.markChildToken(S->getOperatorLoc(), 932 syntax::NodeRole::MemberExpression_accessToken); 933 934 Builder.foldNode(Builder.getExprRange(S), 935 new (allocator()) syntax::MemberExpression, S); 936 return true; 937 } 938 939 bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { 940 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 941 SourceRange(S->getLocation(), S->getEndLoc()), S); 942 943 return true; 944 } 945 946 // Same logic as DeclRefExpr. 947 bool WalkUpFromDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *S) { 948 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 949 SourceRange(S->getLocation(), S->getEndLoc()), S); 950 951 return true; 952 } 953 954 bool WalkUpFromCXXThisExpr(CXXThisExpr *S) { 955 if (!S->isImplicit()) { 956 Builder.markChildToken(S->getLocation(), 957 syntax::NodeRole::IntroducerKeyword); 958 Builder.foldNode(Builder.getExprRange(S), 959 new (allocator()) syntax::ThisExpression, S); 960 } 961 return true; 962 } 963 964 bool WalkUpFromParenExpr(ParenExpr *S) { 965 Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen); 966 Builder.markExprChild(S->getSubExpr(), 967 syntax::NodeRole::ParenExpression_subExpression); 968 Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen); 969 Builder.foldNode(Builder.getExprRange(S), 970 new (allocator()) syntax::ParenExpression, S); 971 return true; 972 } 973 974 bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { 975 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 976 Builder.foldNode(Builder.getExprRange(S), 977 new (allocator()) syntax::IntegerLiteralExpression, S); 978 return true; 979 } 980 981 bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { 982 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 983 Builder.foldNode(Builder.getExprRange(S), 984 new (allocator()) syntax::CharacterLiteralExpression, S); 985 return true; 986 } 987 988 bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { 989 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 990 Builder.foldNode(Builder.getExprRange(S), 991 new (allocator()) syntax::FloatingLiteralExpression, S); 992 return true; 993 } 994 995 bool WalkUpFromStringLiteral(StringLiteral *S) { 996 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 997 Builder.foldNode(Builder.getExprRange(S), 998 new (allocator()) syntax::StringLiteralExpression, S); 999 return true; 1000 } 1001 1002 bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { 1003 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1004 Builder.foldNode(Builder.getExprRange(S), 1005 new (allocator()) syntax::BoolLiteralExpression, S); 1006 return true; 1007 } 1008 1009 bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { 1010 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1011 Builder.foldNode(Builder.getExprRange(S), 1012 new (allocator()) syntax::CxxNullPtrExpression, S); 1013 return true; 1014 } 1015 1016 bool WalkUpFromUnaryOperator(UnaryOperator *S) { 1017 Builder.markChildToken(S->getOperatorLoc(), 1018 syntax::NodeRole::OperatorExpression_operatorToken); 1019 Builder.markExprChild(S->getSubExpr(), 1020 syntax::NodeRole::UnaryOperatorExpression_operand); 1021 1022 if (S->isPostfix()) 1023 Builder.foldNode(Builder.getExprRange(S), 1024 new (allocator()) syntax::PostfixUnaryOperatorExpression, 1025 S); 1026 else 1027 Builder.foldNode(Builder.getExprRange(S), 1028 new (allocator()) syntax::PrefixUnaryOperatorExpression, 1029 S); 1030 1031 return true; 1032 } 1033 1034 bool WalkUpFromBinaryOperator(BinaryOperator *S) { 1035 Builder.markExprChild( 1036 S->getLHS(), syntax::NodeRole::BinaryOperatorExpression_leftHandSide); 1037 Builder.markChildToken(S->getOperatorLoc(), 1038 syntax::NodeRole::OperatorExpression_operatorToken); 1039 Builder.markExprChild( 1040 S->getRHS(), syntax::NodeRole::BinaryOperatorExpression_rightHandSide); 1041 Builder.foldNode(Builder.getExprRange(S), 1042 new (allocator()) syntax::BinaryOperatorExpression, S); 1043 return true; 1044 } 1045 1046 syntax::CallArguments *buildCallArguments(CallExpr::arg_range Args) { 1047 for (const auto &Arg : Args) { 1048 Builder.markExprChild(Arg, syntax::NodeRole::List_element); 1049 const auto *DelimiterToken = 1050 std::next(Builder.findToken(Arg->getEndLoc())); 1051 if (DelimiterToken->kind() == clang::tok::TokenKind::comma) 1052 Builder.markChildToken(DelimiterToken, 1053 syntax::NodeRole::List_delimiter); 1054 } 1055 1056 auto *Arguments = new (allocator()) syntax::CallArguments; 1057 if (!Args.empty()) 1058 Builder.foldNode(Builder.getRange((*Args.begin())->getBeginLoc(), 1059 (*(Args.end() - 1))->getEndLoc()), 1060 Arguments, nullptr); 1061 1062 return Arguments; 1063 } 1064 1065 bool WalkUpFromCallExpr(CallExpr *S) { 1066 Builder.markExprChild(S->getCallee(), 1067 syntax::NodeRole::CallExpression_callee); 1068 1069 const auto *LParenToken = 1070 std::next(Builder.findToken(S->getCallee()->getEndLoc())); 1071 // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have fixed 1072 // the test on decltype desctructors. 1073 if (LParenToken->kind() == clang::tok::l_paren) 1074 Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen); 1075 1076 Builder.markChild(buildCallArguments(S->arguments()), 1077 syntax::NodeRole::CallExpression_arguments); 1078 1079 Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen); 1080 1081 Builder.foldNode(Builder.getRange(S->getSourceRange()), 1082 new (allocator()) syntax::CallExpression, S); 1083 return true; 1084 } 1085 1086 bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 1087 // To construct a syntax tree of the same shape for calls to built-in and 1088 // user-defined operators, ignore the `DeclRefExpr` that refers to the 1089 // operator and treat it as a simple token. Do that by traversing 1090 // arguments instead of children. 1091 for (auto *child : S->arguments()) { 1092 // A postfix unary operator is declared as taking two operands. The 1093 // second operand is used to distinguish from its prefix counterpart. In 1094 // the semantic AST this "phantom" operand is represented as a 1095 // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this 1096 // operand because it does not correspond to anything written in source 1097 // code. 1098 if (child->getSourceRange().isInvalid()) { 1099 assert(getOperatorNodeKind(*S) == 1100 syntax::NodeKind::PostfixUnaryOperatorExpression); 1101 continue; 1102 } 1103 if (!TraverseStmt(child)) 1104 return false; 1105 } 1106 return WalkUpFromCXXOperatorCallExpr(S); 1107 } 1108 1109 bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 1110 switch (getOperatorNodeKind(*S)) { 1111 case syntax::NodeKind::BinaryOperatorExpression: 1112 Builder.markExprChild( 1113 S->getArg(0), 1114 syntax::NodeRole::BinaryOperatorExpression_leftHandSide); 1115 Builder.markChildToken( 1116 S->getOperatorLoc(), 1117 syntax::NodeRole::OperatorExpression_operatorToken); 1118 Builder.markExprChild( 1119 S->getArg(1), 1120 syntax::NodeRole::BinaryOperatorExpression_rightHandSide); 1121 Builder.foldNode(Builder.getExprRange(S), 1122 new (allocator()) syntax::BinaryOperatorExpression, S); 1123 return true; 1124 case syntax::NodeKind::PrefixUnaryOperatorExpression: 1125 Builder.markChildToken( 1126 S->getOperatorLoc(), 1127 syntax::NodeRole::OperatorExpression_operatorToken); 1128 Builder.markExprChild(S->getArg(0), 1129 syntax::NodeRole::UnaryOperatorExpression_operand); 1130 Builder.foldNode(Builder.getExprRange(S), 1131 new (allocator()) syntax::PrefixUnaryOperatorExpression, 1132 S); 1133 return true; 1134 case syntax::NodeKind::PostfixUnaryOperatorExpression: 1135 Builder.markChildToken( 1136 S->getOperatorLoc(), 1137 syntax::NodeRole::OperatorExpression_operatorToken); 1138 Builder.markExprChild(S->getArg(0), 1139 syntax::NodeRole::UnaryOperatorExpression_operand); 1140 Builder.foldNode(Builder.getExprRange(S), 1141 new (allocator()) syntax::PostfixUnaryOperatorExpression, 1142 S); 1143 return true; 1144 case syntax::NodeKind::CallExpression: { 1145 Builder.markExprChild(S->getArg(0), 1146 syntax::NodeRole::CallExpression_callee); 1147 1148 const auto *LParenToken = 1149 std::next(Builder.findToken(S->getArg(0)->getEndLoc())); 1150 // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have 1151 // fixed the test on decltype desctructors. 1152 if (LParenToken->kind() == clang::tok::l_paren) 1153 Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen); 1154 1155 Builder.markChild(buildCallArguments(CallExpr::arg_range( 1156 S->arg_begin() + 1, S->arg_end())), 1157 syntax::NodeRole::CallExpression_arguments); 1158 1159 Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen); 1160 1161 Builder.foldNode(Builder.getRange(S->getSourceRange()), 1162 new (allocator()) syntax::CallExpression, S); 1163 return true; 1164 } 1165 case syntax::NodeKind::UnknownExpression: 1166 return WalkUpFromExpr(S); 1167 default: 1168 llvm_unreachable("getOperatorNodeKind() does not return this value"); 1169 } 1170 } 1171 1172 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 1173 auto Tokens = Builder.getDeclarationRange(S); 1174 if (Tokens.front().kind() == tok::coloncolon) { 1175 // Handle nested namespace definitions. Those start at '::' token, e.g. 1176 // namespace a^::b {} 1177 // FIXME: build corresponding nodes for the name of this namespace. 1178 return true; 1179 } 1180 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); 1181 return true; 1182 } 1183 1184 // FIXME: Deleting the `TraverseParenTypeLoc` override doesn't change test 1185 // results. Find test coverage or remove it. 1186 bool TraverseParenTypeLoc(ParenTypeLoc L) { 1187 // We reverse order of traversal to get the proper syntax structure. 1188 if (!WalkUpFromParenTypeLoc(L)) 1189 return false; 1190 return TraverseTypeLoc(L.getInnerLoc()); 1191 } 1192 1193 bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { 1194 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 1195 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 1196 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), 1197 new (allocator()) syntax::ParenDeclarator, L); 1198 return true; 1199 } 1200 1201 // Declarator chunks, they are produced by type locs and some clang::Decls. 1202 bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { 1203 Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); 1204 Builder.markExprChild(L.getSizeExpr(), 1205 syntax::NodeRole::ArraySubscript_sizeExpression); 1206 Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); 1207 Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), 1208 new (allocator()) syntax::ArraySubscript, L); 1209 return true; 1210 } 1211 1212 bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { 1213 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 1214 for (auto *P : L.getParams()) { 1215 Builder.markChild(P, syntax::NodeRole::ParametersAndQualifiers_parameter); 1216 } 1217 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 1218 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), 1219 new (allocator()) syntax::ParametersAndQualifiers, L); 1220 return true; 1221 } 1222 1223 bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { 1224 if (!L.getTypePtr()->hasTrailingReturn()) 1225 return WalkUpFromFunctionTypeLoc(L); 1226 1227 auto *TrailingReturnTokens = BuildTrailingReturn(L); 1228 // Finish building the node for parameters. 1229 Builder.markChild(TrailingReturnTokens, 1230 syntax::NodeRole::ParametersAndQualifiers_trailingReturn); 1231 return WalkUpFromFunctionTypeLoc(L); 1232 } 1233 1234 bool TraverseMemberPointerTypeLoc(MemberPointerTypeLoc L) { 1235 // In the source code "void (Y::*mp)()" `MemberPointerTypeLoc` corresponds 1236 // to "Y::*" but it points to a `ParenTypeLoc` that corresponds to 1237 // "(Y::*mp)" We thus reverse the order of traversal to get the proper 1238 // syntax structure. 1239 if (!WalkUpFromMemberPointerTypeLoc(L)) 1240 return false; 1241 return TraverseTypeLoc(L.getPointeeLoc()); 1242 } 1243 1244 bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { 1245 auto SR = L.getLocalSourceRange(); 1246 Builder.foldNode(Builder.getRange(SR), 1247 new (allocator()) syntax::MemberPointer, L); 1248 return true; 1249 } 1250 1251 // The code below is very regular, it could even be generated with some 1252 // preprocessor magic. We merely assign roles to the corresponding children 1253 // and fold resulting nodes. 1254 bool WalkUpFromDeclStmt(DeclStmt *S) { 1255 Builder.foldNode(Builder.getStmtRange(S), 1256 new (allocator()) syntax::DeclarationStatement, S); 1257 return true; 1258 } 1259 1260 bool WalkUpFromNullStmt(NullStmt *S) { 1261 Builder.foldNode(Builder.getStmtRange(S), 1262 new (allocator()) syntax::EmptyStatement, S); 1263 return true; 1264 } 1265 1266 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 1267 Builder.markChildToken(S->getSwitchLoc(), 1268 syntax::NodeRole::IntroducerKeyword); 1269 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1270 Builder.foldNode(Builder.getStmtRange(S), 1271 new (allocator()) syntax::SwitchStatement, S); 1272 return true; 1273 } 1274 1275 bool WalkUpFromCaseStmt(CaseStmt *S) { 1276 Builder.markChildToken(S->getKeywordLoc(), 1277 syntax::NodeRole::IntroducerKeyword); 1278 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 1279 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1280 Builder.foldNode(Builder.getStmtRange(S), 1281 new (allocator()) syntax::CaseStatement, S); 1282 return true; 1283 } 1284 1285 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 1286 Builder.markChildToken(S->getKeywordLoc(), 1287 syntax::NodeRole::IntroducerKeyword); 1288 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1289 Builder.foldNode(Builder.getStmtRange(S), 1290 new (allocator()) syntax::DefaultStatement, S); 1291 return true; 1292 } 1293 1294 bool WalkUpFromIfStmt(IfStmt *S) { 1295 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 1296 Builder.markStmtChild(S->getThen(), 1297 syntax::NodeRole::IfStatement_thenStatement); 1298 Builder.markChildToken(S->getElseLoc(), 1299 syntax::NodeRole::IfStatement_elseKeyword); 1300 Builder.markStmtChild(S->getElse(), 1301 syntax::NodeRole::IfStatement_elseStatement); 1302 Builder.foldNode(Builder.getStmtRange(S), 1303 new (allocator()) syntax::IfStatement, S); 1304 return true; 1305 } 1306 1307 bool WalkUpFromForStmt(ForStmt *S) { 1308 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1309 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1310 Builder.foldNode(Builder.getStmtRange(S), 1311 new (allocator()) syntax::ForStatement, S); 1312 return true; 1313 } 1314 1315 bool WalkUpFromWhileStmt(WhileStmt *S) { 1316 Builder.markChildToken(S->getWhileLoc(), 1317 syntax::NodeRole::IntroducerKeyword); 1318 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1319 Builder.foldNode(Builder.getStmtRange(S), 1320 new (allocator()) syntax::WhileStatement, S); 1321 return true; 1322 } 1323 1324 bool WalkUpFromContinueStmt(ContinueStmt *S) { 1325 Builder.markChildToken(S->getContinueLoc(), 1326 syntax::NodeRole::IntroducerKeyword); 1327 Builder.foldNode(Builder.getStmtRange(S), 1328 new (allocator()) syntax::ContinueStatement, S); 1329 return true; 1330 } 1331 1332 bool WalkUpFromBreakStmt(BreakStmt *S) { 1333 Builder.markChildToken(S->getBreakLoc(), 1334 syntax::NodeRole::IntroducerKeyword); 1335 Builder.foldNode(Builder.getStmtRange(S), 1336 new (allocator()) syntax::BreakStatement, S); 1337 return true; 1338 } 1339 1340 bool WalkUpFromReturnStmt(ReturnStmt *S) { 1341 Builder.markChildToken(S->getReturnLoc(), 1342 syntax::NodeRole::IntroducerKeyword); 1343 Builder.markExprChild(S->getRetValue(), 1344 syntax::NodeRole::ReturnStatement_value); 1345 Builder.foldNode(Builder.getStmtRange(S), 1346 new (allocator()) syntax::ReturnStatement, S); 1347 return true; 1348 } 1349 1350 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 1351 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1352 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1353 Builder.foldNode(Builder.getStmtRange(S), 1354 new (allocator()) syntax::RangeBasedForStatement, S); 1355 return true; 1356 } 1357 1358 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 1359 Builder.foldNode(Builder.getDeclarationRange(S), 1360 new (allocator()) syntax::EmptyDeclaration, S); 1361 return true; 1362 } 1363 1364 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 1365 Builder.markExprChild(S->getAssertExpr(), 1366 syntax::NodeRole::StaticAssertDeclaration_condition); 1367 Builder.markExprChild(S->getMessage(), 1368 syntax::NodeRole::StaticAssertDeclaration_message); 1369 Builder.foldNode(Builder.getDeclarationRange(S), 1370 new (allocator()) syntax::StaticAssertDeclaration, S); 1371 return true; 1372 } 1373 1374 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 1375 Builder.foldNode(Builder.getDeclarationRange(S), 1376 new (allocator()) syntax::LinkageSpecificationDeclaration, 1377 S); 1378 return true; 1379 } 1380 1381 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 1382 Builder.foldNode(Builder.getDeclarationRange(S), 1383 new (allocator()) syntax::NamespaceAliasDefinition, S); 1384 return true; 1385 } 1386 1387 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 1388 Builder.foldNode(Builder.getDeclarationRange(S), 1389 new (allocator()) syntax::UsingNamespaceDirective, S); 1390 return true; 1391 } 1392 1393 bool WalkUpFromUsingDecl(UsingDecl *S) { 1394 Builder.foldNode(Builder.getDeclarationRange(S), 1395 new (allocator()) syntax::UsingDeclaration, S); 1396 return true; 1397 } 1398 1399 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 1400 Builder.foldNode(Builder.getDeclarationRange(S), 1401 new (allocator()) syntax::UsingDeclaration, S); 1402 return true; 1403 } 1404 1405 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 1406 Builder.foldNode(Builder.getDeclarationRange(S), 1407 new (allocator()) syntax::UsingDeclaration, S); 1408 return true; 1409 } 1410 1411 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 1412 Builder.foldNode(Builder.getDeclarationRange(S), 1413 new (allocator()) syntax::TypeAliasDeclaration, S); 1414 return true; 1415 } 1416 1417 private: 1418 template <class T> SourceLocation getQualifiedNameStart(T *D) { 1419 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 1420 std::is_base_of<TypedefNameDecl, T>::value), 1421 "only DeclaratorDecl and TypedefNameDecl are supported."); 1422 1423 auto DN = D->getDeclName(); 1424 bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); 1425 if (IsAnonymous) 1426 return SourceLocation(); 1427 1428 if (const auto *DD = dyn_cast<DeclaratorDecl>(D)) { 1429 if (DD->getQualifierLoc()) { 1430 return DD->getQualifierLoc().getBeginLoc(); 1431 } 1432 } 1433 1434 return D->getLocation(); 1435 } 1436 1437 SourceRange getInitializerRange(Decl *D) { 1438 if (auto *V = dyn_cast<VarDecl>(D)) { 1439 auto *I = V->getInit(); 1440 // Initializers in range-based-for are not part of the declarator 1441 if (I && !V->isCXXForRangeDecl()) 1442 return I->getSourceRange(); 1443 } 1444 1445 return SourceRange(); 1446 } 1447 1448 /// Folds SimpleDeclarator node (if present) and in case this is the last 1449 /// declarator in the chain it also folds SimpleDeclaration node. 1450 template <class T> bool processDeclaratorAndDeclaration(T *D) { 1451 SourceRange Initializer = getInitializerRange(D); 1452 auto Range = getDeclaratorRange(Builder.sourceManager(), 1453 D->getTypeSourceInfo()->getTypeLoc(), 1454 getQualifiedNameStart(D), Initializer); 1455 1456 // There doesn't have to be a declarator (e.g. `void foo(int)` only has 1457 // declaration, but no declarator). 1458 if (Range.getBegin().isValid()) { 1459 auto *N = new (allocator()) syntax::SimpleDeclarator; 1460 Builder.foldNode(Builder.getRange(Range), N, nullptr); 1461 Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator); 1462 } 1463 1464 if (Builder.isResponsibleForCreatingDeclaration(D)) { 1465 Builder.foldNode(Builder.getDeclarationRange(D), 1466 new (allocator()) syntax::SimpleDeclaration, D); 1467 } 1468 return true; 1469 } 1470 1471 /// Returns the range of the built node. 1472 syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) { 1473 assert(L.getTypePtr()->hasTrailingReturn()); 1474 1475 auto ReturnedType = L.getReturnLoc(); 1476 // Build node for the declarator, if any. 1477 auto ReturnDeclaratorRange = 1478 getDeclaratorRange(this->Builder.sourceManager(), ReturnedType, 1479 /*Name=*/SourceLocation(), 1480 /*Initializer=*/SourceLocation()); 1481 syntax::SimpleDeclarator *ReturnDeclarator = nullptr; 1482 if (ReturnDeclaratorRange.isValid()) { 1483 ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; 1484 Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), 1485 ReturnDeclarator, nullptr); 1486 } 1487 1488 // Build node for trailing return type. 1489 auto Return = Builder.getRange(ReturnedType.getSourceRange()); 1490 const auto *Arrow = Return.begin() - 1; 1491 assert(Arrow->kind() == tok::arrow); 1492 auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); 1493 Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken); 1494 if (ReturnDeclarator) 1495 Builder.markChild(ReturnDeclarator, 1496 syntax::NodeRole::TrailingReturnType_declarator); 1497 auto *R = new (allocator()) syntax::TrailingReturnType; 1498 Builder.foldNode(Tokens, R, L); 1499 return R; 1500 } 1501 1502 void foldExplicitTemplateInstantiation( 1503 ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, 1504 const syntax::Token *TemplateKW, 1505 syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { 1506 assert(!ExternKW || ExternKW->kind() == tok::kw_extern); 1507 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1508 Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword); 1509 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1510 Builder.markChild( 1511 InnerDeclaration, 1512 syntax::NodeRole::ExplicitTemplateInstantiation_declaration); 1513 Builder.foldNode( 1514 Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); 1515 } 1516 1517 syntax::TemplateDeclaration *foldTemplateDeclaration( 1518 ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, 1519 ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { 1520 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1521 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1522 1523 auto *N = new (allocator()) syntax::TemplateDeclaration; 1524 Builder.foldNode(Range, N, From); 1525 Builder.markChild(N, syntax::NodeRole::TemplateDeclaration_declaration); 1526 return N; 1527 } 1528 1529 /// A small helper to save some typing. 1530 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 1531 1532 syntax::TreeBuilder &Builder; 1533 const ASTContext &Context; 1534 }; 1535 } // namespace 1536 1537 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { 1538 DeclsWithoutSemicolons.insert(D); 1539 } 1540 1541 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 1542 if (Loc.isInvalid()) 1543 return; 1544 Pending.assignRole(*findToken(Loc), Role); 1545 } 1546 1547 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { 1548 if (!T) 1549 return; 1550 Pending.assignRole(*T, R); 1551 } 1552 1553 void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { 1554 assert(N); 1555 setRole(N, R); 1556 } 1557 1558 void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { 1559 auto *SN = Mapping.find(N); 1560 assert(SN != nullptr); 1561 setRole(SN, R); 1562 } 1563 void syntax::TreeBuilder::markChild(NestedNameSpecifierLoc NNSLoc, NodeRole R) { 1564 auto *SN = Mapping.find(NNSLoc); 1565 assert(SN != nullptr); 1566 setRole(SN, R); 1567 } 1568 1569 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 1570 if (!Child) 1571 return; 1572 1573 syntax::Tree *ChildNode; 1574 if (Expr *ChildExpr = dyn_cast<Expr>(Child)) { 1575 // This is an expression in a statement position, consume the trailing 1576 // semicolon and form an 'ExpressionStatement' node. 1577 markExprChild(ChildExpr, NodeRole::ExpressionStatement_expression); 1578 ChildNode = new (allocator()) syntax::ExpressionStatement; 1579 // (!) 'getStmtRange()' ensures this covers a trailing semicolon. 1580 Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); 1581 } else { 1582 ChildNode = Mapping.find(Child); 1583 } 1584 assert(ChildNode != nullptr); 1585 setRole(ChildNode, Role); 1586 } 1587 1588 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 1589 if (!Child) 1590 return; 1591 Child = Child->IgnoreImplicit(); 1592 1593 syntax::Tree *ChildNode = Mapping.find(Child); 1594 assert(ChildNode != nullptr); 1595 setRole(ChildNode, Role); 1596 } 1597 1598 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 1599 if (L.isInvalid()) 1600 return nullptr; 1601 auto It = LocationToToken.find(L.getRawEncoding()); 1602 assert(It != LocationToToken.end()); 1603 return It->second; 1604 } 1605 1606 syntax::TranslationUnit * 1607 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 1608 TreeBuilder Builder(A); 1609 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 1610 return std::move(Builder).finalize(); 1611 } 1612