1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/ASTFwd.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/DeclBase.h" 12 #include "clang/AST/DeclCXX.h" 13 #include "clang/AST/DeclarationName.h" 14 #include "clang/AST/Expr.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/AST/RecursiveASTVisitor.h" 17 #include "clang/AST/Stmt.h" 18 #include "clang/AST/TypeLoc.h" 19 #include "clang/AST/TypeLocVisitor.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Basic/Specifiers.h" 24 #include "clang/Basic/TokenKinds.h" 25 #include "clang/Lex/Lexer.h" 26 #include "clang/Lex/LiteralSupport.h" 27 #include "clang/Tooling/Syntax/Nodes.h" 28 #include "clang/Tooling/Syntax/Tokens.h" 29 #include "clang/Tooling/Syntax/Tree.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/DenseMap.h" 32 #include "llvm/ADT/PointerUnion.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/ScopeExit.h" 35 #include "llvm/ADT/SmallVector.h" 36 #include "llvm/Support/Allocator.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/FormatVariadic.h" 40 #include "llvm/Support/MemoryBuffer.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <cstddef> 43 #include <map> 44 45 using namespace clang; 46 47 LLVM_ATTRIBUTE_UNUSED 48 static bool isImplicitExpr(Expr *E) { return E->IgnoreImplicit() != E; } 49 50 namespace { 51 /// Get start location of the Declarator from the TypeLoc. 52 /// E.g.: 53 /// loc of `(` in `int (a)` 54 /// loc of `*` in `int *(a)` 55 /// loc of the first `(` in `int (*a)(int)` 56 /// loc of the `*` in `int *(a)(int)` 57 /// loc of the first `*` in `const int *const *volatile a;` 58 /// 59 /// It is non-trivial to get the start location because TypeLocs are stored 60 /// inside out. In the example above `*volatile` is the TypeLoc returned 61 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` 62 /// returns. 63 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { 64 SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { 65 auto L = Visit(T.getInnerLoc()); 66 if (L.isValid()) 67 return L; 68 return T.getLParenLoc(); 69 } 70 71 // Types spelled in the prefix part of the declarator. 72 SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { 73 return HandlePointer(T); 74 } 75 76 SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { 77 return HandlePointer(T); 78 } 79 80 SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { 81 return HandlePointer(T); 82 } 83 84 SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { 85 return HandlePointer(T); 86 } 87 88 SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { 89 return HandlePointer(T); 90 } 91 92 // All other cases are not important, as they are either part of declaration 93 // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on 94 // existing declarators (e.g. QualifiedTypeLoc). They cannot start the 95 // declarator themselves, but their underlying type can. 96 SourceLocation VisitTypeLoc(TypeLoc T) { 97 auto N = T.getNextTypeLoc(); 98 if (!N) 99 return SourceLocation(); 100 return Visit(N); 101 } 102 103 SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { 104 if (T.getTypePtr()->hasTrailingReturn()) 105 return SourceLocation(); // avoid recursing into the suffix of declarator. 106 return VisitTypeLoc(T); 107 } 108 109 private: 110 template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { 111 auto L = Visit(T.getPointeeLoc()); 112 if (L.isValid()) 113 return L; 114 return T.getLocalSourceRange().getBegin(); 115 } 116 }; 117 } // namespace 118 119 static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { 120 switch (E.getOperator()) { 121 // Comparison 122 case OO_EqualEqual: 123 case OO_ExclaimEqual: 124 case OO_Greater: 125 case OO_GreaterEqual: 126 case OO_Less: 127 case OO_LessEqual: 128 case OO_Spaceship: 129 // Assignment 130 case OO_Equal: 131 case OO_SlashEqual: 132 case OO_PercentEqual: 133 case OO_CaretEqual: 134 case OO_PipeEqual: 135 case OO_LessLessEqual: 136 case OO_GreaterGreaterEqual: 137 case OO_PlusEqual: 138 case OO_MinusEqual: 139 case OO_StarEqual: 140 case OO_AmpEqual: 141 // Binary computation 142 case OO_Slash: 143 case OO_Percent: 144 case OO_Caret: 145 case OO_Pipe: 146 case OO_LessLess: 147 case OO_GreaterGreater: 148 case OO_AmpAmp: 149 case OO_PipePipe: 150 case OO_ArrowStar: 151 case OO_Comma: 152 return syntax::NodeKind::BinaryOperatorExpression; 153 case OO_Tilde: 154 case OO_Exclaim: 155 return syntax::NodeKind::PrefixUnaryOperatorExpression; 156 // Prefix/Postfix increment/decrement 157 case OO_PlusPlus: 158 case OO_MinusMinus: 159 switch (E.getNumArgs()) { 160 case 1: 161 return syntax::NodeKind::PrefixUnaryOperatorExpression; 162 case 2: 163 return syntax::NodeKind::PostfixUnaryOperatorExpression; 164 default: 165 llvm_unreachable("Invalid number of arguments for operator"); 166 } 167 // Operators that can be unary or binary 168 case OO_Plus: 169 case OO_Minus: 170 case OO_Star: 171 case OO_Amp: 172 switch (E.getNumArgs()) { 173 case 1: 174 return syntax::NodeKind::PrefixUnaryOperatorExpression; 175 case 2: 176 return syntax::NodeKind::BinaryOperatorExpression; 177 default: 178 llvm_unreachable("Invalid number of arguments for operator"); 179 } 180 return syntax::NodeKind::BinaryOperatorExpression; 181 // Not yet supported by SyntaxTree 182 case OO_New: 183 case OO_Delete: 184 case OO_Array_New: 185 case OO_Array_Delete: 186 case OO_Coawait: 187 case OO_Call: 188 case OO_Subscript: 189 case OO_Arrow: 190 return syntax::NodeKind::UnknownExpression; 191 case OO_Conditional: // not overloadable 192 case NUM_OVERLOADED_OPERATORS: 193 case OO_None: 194 llvm_unreachable("Not an overloadable operator"); 195 } 196 llvm_unreachable("Unknown OverloadedOperatorKind enum"); 197 } 198 199 /// Gets the range of declarator as defined by the C++ grammar. E.g. 200 /// `int a;` -> range of `a`, 201 /// `int *a;` -> range of `*a`, 202 /// `int a[10];` -> range of `a[10]`, 203 /// `int a[1][2][3];` -> range of `a[1][2][3]`, 204 /// `int *a = nullptr` -> range of `*a = nullptr`. 205 /// FIMXE: \p Name must be a source range, e.g. for `operator+`. 206 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, 207 SourceLocation Name, 208 SourceRange Initializer) { 209 SourceLocation Start = GetStartLoc().Visit(T); 210 SourceLocation End = T.getSourceRange().getEnd(); 211 assert(End.isValid()); 212 if (Name.isValid()) { 213 if (Start.isInvalid()) 214 Start = Name; 215 if (SM.isBeforeInTranslationUnit(End, Name)) 216 End = Name; 217 } 218 if (Initializer.isValid()) { 219 auto InitializerEnd = Initializer.getEnd(); 220 assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || 221 End == InitializerEnd); 222 End = InitializerEnd; 223 } 224 return SourceRange(Start, End); 225 } 226 227 namespace { 228 /// All AST hierarchy roots that can be represented as pointers. 229 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; 230 /// Maintains a mapping from AST to syntax tree nodes. This class will get more 231 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs. 232 /// FIXME: expose this as public API. 233 class ASTToSyntaxMapping { 234 public: 235 void add(ASTPtr From, syntax::Tree *To) { 236 assert(To != nullptr); 237 assert(!From.isNull()); 238 239 bool Added = Nodes.insert({From, To}).second; 240 (void)Added; 241 assert(Added && "mapping added twice"); 242 } 243 244 syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } 245 246 private: 247 llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; 248 }; 249 } // namespace 250 251 /// A helper class for constructing the syntax tree while traversing a clang 252 /// AST. 253 /// 254 /// At each point of the traversal we maintain a list of pending nodes. 255 /// Initially all tokens are added as pending nodes. When processing a clang AST 256 /// node, the clients need to: 257 /// - create a corresponding syntax node, 258 /// - assign roles to all pending child nodes with 'markChild' and 259 /// 'markChildToken', 260 /// - replace the child nodes with the new syntax node in the pending list 261 /// with 'foldNode'. 262 /// 263 /// Note that all children are expected to be processed when building a node. 264 /// 265 /// Call finalize() to finish building the tree and consume the root node. 266 class syntax::TreeBuilder { 267 public: 268 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { 269 for (const auto &T : Arena.tokenBuffer().expandedTokens()) 270 LocationToToken.insert({T.location().getRawEncoding(), &T}); 271 } 272 273 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 274 const SourceManager &sourceManager() const { return Arena.sourceManager(); } 275 276 /// Populate children for \p New node, assuming it covers tokens from \p 277 /// Range. 278 void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) { 279 assert(New); 280 Pending.foldChildren(Arena, Range, New); 281 if (From) 282 Mapping.add(From, New); 283 } 284 void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, TypeLoc L) { 285 // FIXME: add mapping for TypeLocs 286 foldNode(Range, New, nullptr); 287 } 288 289 void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, 290 NestedNameSpecifierLoc L) { 291 // FIXME: add mapping for NestedNameSpecifierLoc 292 foldNode(Range, New, nullptr); 293 } 294 /// Notifies that we should not consume trailing semicolon when computing 295 /// token range of \p D. 296 void noticeDeclWithoutSemicolon(Decl *D); 297 298 /// Mark the \p Child node with a corresponding \p Role. All marked children 299 /// should be consumed by foldNode. 300 /// When called on expressions (clang::Expr is derived from clang::Stmt), 301 /// wraps expressions into expression statement. 302 void markStmtChild(Stmt *Child, NodeRole Role); 303 /// Should be called for expressions in non-statement position to avoid 304 /// wrapping into expression statement. 305 void markExprChild(Expr *Child, NodeRole Role); 306 /// Set role for a token starting at \p Loc. 307 void markChildToken(SourceLocation Loc, NodeRole R); 308 /// Set role for \p T. 309 void markChildToken(const syntax::Token *T, NodeRole R); 310 311 /// Set role for \p N. 312 void markChild(syntax::Node *N, NodeRole R); 313 /// Set role for the syntax node matching \p N. 314 void markChild(ASTPtr N, NodeRole R); 315 316 /// Finish building the tree and consume the root node. 317 syntax::TranslationUnit *finalize() && { 318 auto Tokens = Arena.tokenBuffer().expandedTokens(); 319 assert(!Tokens.empty()); 320 assert(Tokens.back().kind() == tok::eof); 321 322 // Build the root of the tree, consuming all the children. 323 Pending.foldChildren(Arena, Tokens.drop_back(), 324 new (Arena.allocator()) syntax::TranslationUnit); 325 326 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 327 TU->assertInvariantsRecursive(); 328 return TU; 329 } 330 331 /// Finds a token starting at \p L. The token must exist if \p L is valid. 332 const syntax::Token *findToken(SourceLocation L) const; 333 334 /// Finds the syntax tokens corresponding to the \p SourceRange. 335 ArrayRef<syntax::Token> getRange(SourceRange Range) const { 336 assert(Range.isValid()); 337 return getRange(Range.getBegin(), Range.getEnd()); 338 } 339 340 /// Finds the syntax tokens corresponding to the passed source locations. 341 /// \p First is the start position of the first token and \p Last is the start 342 /// position of the last token. 343 ArrayRef<syntax::Token> getRange(SourceLocation First, 344 SourceLocation Last) const { 345 assert(First.isValid()); 346 assert(Last.isValid()); 347 assert(First == Last || 348 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 349 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 350 } 351 352 ArrayRef<syntax::Token> 353 getTemplateRange(const ClassTemplateSpecializationDecl *D) const { 354 auto Tokens = getRange(D->getSourceRange()); 355 return maybeAppendSemicolon(Tokens, D); 356 } 357 358 /// Returns true if \p D is the last declarator in a chain and is thus 359 /// reponsible for creating SimpleDeclaration for the whole chain. 360 template <class T> 361 bool isResponsibleForCreatingDeclaration(const T *D) const { 362 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 363 std::is_base_of<TypedefNameDecl, T>::value), 364 "only DeclaratorDecl and TypedefNameDecl are supported."); 365 366 const Decl *Next = D->getNextDeclInContext(); 367 368 // There's no next sibling, this one is responsible. 369 if (Next == nullptr) { 370 return true; 371 } 372 const auto *NextT = dyn_cast<T>(Next); 373 374 // Next sibling is not the same type, this one is responsible. 375 if (NextT == nullptr) { 376 return true; 377 } 378 // Next sibling doesn't begin at the same loc, it must be a different 379 // declaration, so this declarator is responsible. 380 if (NextT->getBeginLoc() != D->getBeginLoc()) { 381 return true; 382 } 383 384 // NextT is a member of the same declaration, and we need the last member to 385 // create declaration. This one is not responsible. 386 return false; 387 } 388 389 ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { 390 ArrayRef<syntax::Token> Tokens; 391 // We want to drop the template parameters for specializations. 392 if (const auto *S = dyn_cast<TagDecl>(D)) 393 Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); 394 else 395 Tokens = getRange(D->getSourceRange()); 396 return maybeAppendSemicolon(Tokens, D); 397 } 398 399 ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 400 return getRange(E->getSourceRange()); 401 } 402 403 /// Find the adjusted range for the statement, consuming the trailing 404 /// semicolon when needed. 405 ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 406 auto Tokens = getRange(S->getSourceRange()); 407 if (isa<CompoundStmt>(S)) 408 return Tokens; 409 410 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 411 // all statements that end with those. Consume this semicolon here. 412 if (Tokens.back().kind() == tok::semi) 413 return Tokens; 414 return withTrailingSemicolon(Tokens); 415 } 416 417 private: 418 ArrayRef<syntax::Token> maybeAppendSemicolon(ArrayRef<syntax::Token> Tokens, 419 const Decl *D) const { 420 if (isa<NamespaceDecl>(D)) 421 return Tokens; 422 if (DeclsWithoutSemicolons.count(D)) 423 return Tokens; 424 // FIXME: do not consume trailing semicolon on function definitions. 425 // Most declarations own a semicolon in syntax trees, but not in clang AST. 426 return withTrailingSemicolon(Tokens); 427 } 428 429 ArrayRef<syntax::Token> 430 withTrailingSemicolon(ArrayRef<syntax::Token> Tokens) const { 431 assert(!Tokens.empty()); 432 assert(Tokens.back().kind() != tok::eof); 433 // We never consume 'eof', so looking at the next token is ok. 434 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 435 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 436 return Tokens; 437 } 438 439 void setRole(syntax::Node *N, NodeRole R) { 440 assert(N->role() == NodeRole::Detached); 441 N->setRole(R); 442 } 443 444 /// A collection of trees covering the input tokens. 445 /// When created, each tree corresponds to a single token in the file. 446 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 447 /// node and update the list of trees accordingly. 448 /// 449 /// Ensures that added nodes properly nest and cover the whole token stream. 450 struct Forest { 451 Forest(syntax::Arena &A) { 452 assert(!A.tokenBuffer().expandedTokens().empty()); 453 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 454 // Create all leaf nodes. 455 // Note that we do not have 'eof' in the tree. 456 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { 457 auto *L = new (A.allocator()) syntax::Leaf(&T); 458 L->Original = true; 459 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); 460 Trees.insert(Trees.end(), {&T, L}); 461 } 462 } 463 464 void assignRole(ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { 465 assert(!Range.empty()); 466 auto It = Trees.lower_bound(Range.begin()); 467 assert(It != Trees.end() && "no node found"); 468 assert(It->first == Range.begin() && "no child with the specified range"); 469 assert((std::next(It) == Trees.end() || 470 std::next(It)->first == Range.end()) && 471 "no child with the specified range"); 472 assert(It->second->role() == NodeRole::Detached && 473 "re-assigning role for a child"); 474 It->second->setRole(Role); 475 } 476 477 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 478 void foldChildren(const syntax::Arena &A, ArrayRef<syntax::Token> Tokens, 479 syntax::Tree *Node) { 480 // Attach children to `Node`. 481 assert(Node->firstChild() == nullptr && "node already has children"); 482 483 auto *FirstToken = Tokens.begin(); 484 auto BeginChildren = Trees.lower_bound(FirstToken); 485 486 assert((BeginChildren == Trees.end() || 487 BeginChildren->first == FirstToken) && 488 "fold crosses boundaries of existing subtrees"); 489 auto EndChildren = Trees.lower_bound(Tokens.end()); 490 assert( 491 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 492 "fold crosses boundaries of existing subtrees"); 493 494 // We need to go in reverse order, because we can only prepend. 495 for (auto It = EndChildren; It != BeginChildren; --It) { 496 auto *C = std::prev(It)->second; 497 if (C->role() == NodeRole::Detached) 498 C->setRole(NodeRole::Unknown); 499 Node->prependChildLowLevel(C); 500 } 501 502 // Mark that this node came from the AST and is backed by the source code. 503 Node->Original = true; 504 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); 505 506 Trees.erase(BeginChildren, EndChildren); 507 Trees.insert({FirstToken, Node}); 508 } 509 510 // EXPECTS: all tokens were consumed and are owned by a single root node. 511 syntax::Node *finalize() && { 512 assert(Trees.size() == 1); 513 auto *Root = Trees.begin()->second; 514 Trees = {}; 515 return Root; 516 } 517 518 std::string str(const syntax::Arena &A) const { 519 std::string R; 520 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 521 unsigned CoveredTokens = 522 It != Trees.end() 523 ? (std::next(It)->first - It->first) 524 : A.tokenBuffer().expandedTokens().end() - It->first; 525 526 R += std::string( 527 formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), 528 It->first->text(A.sourceManager()), CoveredTokens)); 529 R += It->second->dump(A); 530 } 531 return R; 532 } 533 534 private: 535 /// Maps from the start token to a subtree starting at that token. 536 /// Keys in the map are pointers into the array of expanded tokens, so 537 /// pointer order corresponds to the order of preprocessor tokens. 538 std::map<const syntax::Token *, syntax::Node *> Trees; 539 }; 540 541 /// For debugging purposes. 542 std::string str() { return Pending.str(Arena); } 543 544 syntax::Arena &Arena; 545 /// To quickly find tokens by their start location. 546 llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> 547 LocationToToken; 548 Forest Pending; 549 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 550 ASTToSyntaxMapping Mapping; 551 }; 552 553 namespace { 554 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 555 public: 556 explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) 557 : Builder(Builder), Context(Context) {} 558 559 bool shouldTraversePostOrder() const { return true; } 560 561 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { 562 return processDeclaratorAndDeclaration(DD); 563 } 564 565 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { 566 return processDeclaratorAndDeclaration(TD); 567 } 568 569 bool VisitDecl(Decl *D) { 570 assert(!D->isImplicit()); 571 Builder.foldNode(Builder.getDeclarationRange(D), 572 new (allocator()) syntax::UnknownDeclaration(), D); 573 return true; 574 } 575 576 // RAV does not call WalkUpFrom* on explicit instantiations, so we have to 577 // override Traverse. 578 // FIXME: make RAV call WalkUpFrom* instead. 579 bool 580 TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { 581 if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) 582 return false; 583 if (C->isExplicitSpecialization()) 584 return true; // we are only interested in explicit instantiations. 585 auto *Declaration = 586 cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); 587 foldExplicitTemplateInstantiation( 588 Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), 589 Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); 590 return true; 591 } 592 593 bool WalkUpFromTemplateDecl(TemplateDecl *S) { 594 foldTemplateDeclaration( 595 Builder.getDeclarationRange(S), 596 Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), 597 Builder.getDeclarationRange(S->getTemplatedDecl()), S); 598 return true; 599 } 600 601 bool WalkUpFromTagDecl(TagDecl *C) { 602 // FIXME: build the ClassSpecifier node. 603 if (!C->isFreeStanding()) { 604 assert(C->getNumTemplateParameterLists() == 0); 605 return true; 606 } 607 handleFreeStandingTagDecl(C); 608 return true; 609 } 610 611 syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { 612 assert(C->isFreeStanding()); 613 // Class is a declaration specifier and needs a spanning declaration node. 614 auto DeclarationRange = Builder.getDeclarationRange(C); 615 syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; 616 Builder.foldNode(DeclarationRange, Result, nullptr); 617 618 // Build TemplateDeclaration nodes if we had template parameters. 619 auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { 620 const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); 621 auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); 622 Result = 623 foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); 624 DeclarationRange = R; 625 }; 626 if (auto *S = dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) 627 ConsumeTemplateParameters(*S->getTemplateParameters()); 628 for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) 629 ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); 630 return Result; 631 } 632 633 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 634 // We do not want to call VisitDecl(), the declaration for translation 635 // unit is built by finalize(). 636 return true; 637 } 638 639 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 640 using NodeRole = syntax::NodeRole; 641 642 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 643 for (auto *Child : S->body()) 644 Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); 645 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 646 647 Builder.foldNode(Builder.getStmtRange(S), 648 new (allocator()) syntax::CompoundStatement, S); 649 return true; 650 } 651 652 // Some statements are not yet handled by syntax trees. 653 bool WalkUpFromStmt(Stmt *S) { 654 Builder.foldNode(Builder.getStmtRange(S), 655 new (allocator()) syntax::UnknownStatement, S); 656 return true; 657 } 658 659 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 660 // We override to traverse range initializer as VarDecl. 661 // RAV traverses it as a statement, we produce invalid node kinds in that 662 // case. 663 // FIXME: should do this in RAV instead? 664 bool Result = [&, this]() { 665 if (S->getInit() && !TraverseStmt(S->getInit())) 666 return false; 667 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 668 return false; 669 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 670 return false; 671 if (S->getBody() && !TraverseStmt(S->getBody())) 672 return false; 673 return true; 674 }(); 675 WalkUpFromCXXForRangeStmt(S); 676 return Result; 677 } 678 679 bool TraverseStmt(Stmt *S) { 680 if (auto *DS = dyn_cast_or_null<DeclStmt>(S)) { 681 // We want to consume the semicolon, make sure SimpleDeclaration does not. 682 for (auto *D : DS->decls()) 683 Builder.noticeDeclWithoutSemicolon(D); 684 } else if (auto *E = dyn_cast_or_null<Expr>(S)) { 685 return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); 686 } 687 return RecursiveASTVisitor::TraverseStmt(S); 688 } 689 690 // Some expressions are not yet handled by syntax trees. 691 bool WalkUpFromExpr(Expr *E) { 692 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 693 Builder.foldNode(Builder.getExprRange(E), 694 new (allocator()) syntax::UnknownExpression, E); 695 return true; 696 } 697 698 bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { 699 // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node 700 // referencing the location of the UDL suffix (`_w` in `1.2_w`). The 701 // UDL suffix location does not point to the beginning of a token, so we 702 // can't represent the UDL suffix as a separate syntax tree node. 703 704 return WalkUpFromUserDefinedLiteral(S); 705 } 706 707 syntax::UserDefinedLiteralExpression * 708 buildUserDefinedLiteral(UserDefinedLiteral *S) { 709 switch (S->getLiteralOperatorKind()) { 710 case UserDefinedLiteral::LOK_Integer: 711 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 712 case UserDefinedLiteral::LOK_Floating: 713 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 714 case UserDefinedLiteral::LOK_Character: 715 return new (allocator()) syntax::CharUserDefinedLiteralExpression; 716 case UserDefinedLiteral::LOK_String: 717 return new (allocator()) syntax::StringUserDefinedLiteralExpression; 718 case UserDefinedLiteral::LOK_Raw: 719 case UserDefinedLiteral::LOK_Template: 720 // For raw literal operator and numeric literal operator template we 721 // cannot get the type of the operand in the semantic AST. We get this 722 // information from the token. As integer and floating point have the same 723 // token kind, we run `NumericLiteralParser` again to distinguish them. 724 auto TokLoc = S->getBeginLoc(); 725 auto TokSpelling = 726 Builder.findToken(TokLoc)->text(Context.getSourceManager()); 727 auto Literal = 728 NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), 729 Context.getLangOpts(), Context.getTargetInfo(), 730 Context.getDiagnostics()); 731 if (Literal.isIntegerLiteral()) 732 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 733 else { 734 assert(Literal.isFloatingLiteral()); 735 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 736 } 737 } 738 llvm_unreachable("Unknown literal operator kind."); 739 } 740 741 bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { 742 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 743 Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S); 744 return true; 745 } 746 747 syntax::NameSpecifier *BuildNameSpecifier(const NestedNameSpecifier &NNS) { 748 switch (NNS.getKind()) { 749 case NestedNameSpecifier::Global: 750 return new (allocator()) syntax::GlobalNameSpecifier; 751 case NestedNameSpecifier::Namespace: 752 case NestedNameSpecifier::NamespaceAlias: 753 case NestedNameSpecifier::Identifier: 754 return new (allocator()) syntax::IdentifierNameSpecifier; 755 case NestedNameSpecifier::TypeSpecWithTemplate: 756 return new (allocator()) syntax::SimpleTemplateNameSpecifier; 757 case NestedNameSpecifier::TypeSpec: { 758 const auto *NNSType = NNS.getAsType(); 759 assert(NNSType); 760 if (isa<DecltypeType>(NNSType)) 761 return new (allocator()) syntax::DecltypeNameSpecifier; 762 if (isa<TemplateSpecializationType, DependentTemplateSpecializationType>( 763 NNSType)) 764 return new (allocator()) syntax::SimpleTemplateNameSpecifier; 765 return new (allocator()) syntax::IdentifierNameSpecifier; 766 } 767 case NestedNameSpecifier::Super: 768 // FIXME: Support Microsoft's __super 769 llvm::report_fatal_error("We don't yet support the __super specifier", 770 true); 771 } 772 } 773 774 // FIXME: Fix `NestedNameSpecifierLoc::getLocalSourceRange` for the 775 // `DependentTemplateSpecializationType` case. 776 /// Given a nested-name-specifier return the range for the last name specifier 777 /// 778 /// e.g. `std::T::template X<U>::` => `template X<U>::` 779 SourceRange getLocalSourceRange(const NestedNameSpecifierLoc &NNSLoc) { 780 auto SR = NNSLoc.getLocalSourceRange(); 781 782 // The method `NestedNameSpecifierLoc::getLocalSourceRange` *should* return 783 // the desired `SourceRange`, but there is a corner 784 // case. For a `DependentTemplateSpecializationType` this method returns its 785 // qualifiers as well, in other words in the example above this method 786 // returns `T::template X<U>::` instead of only `template X<U>::` 787 if (auto TL = NNSLoc.getTypeLoc()) { 788 if (auto DependentTL = 789 TL.getAs<DependentTemplateSpecializationTypeLoc>()) { 790 // The 'template' keyword is always present in dependent template 791 // specializations. Except in the case of incorrect code 792 // TODO: Treat the case of incorrect code. 793 SR.setBegin(DependentTL.getTemplateKeywordLoc()); 794 } 795 } 796 797 return SR; 798 } 799 800 syntax::NestedNameSpecifier * 801 BuildNestedNameSpecifier(const NestedNameSpecifierLoc &QualifierLoc) { 802 if (!QualifierLoc) 803 return nullptr; 804 for (auto it = QualifierLoc; it; it = it.getPrefix()) { 805 assert(it.hasQualifier()); 806 auto *NS = BuildNameSpecifier(*it.getNestedNameSpecifier()); 807 assert(NS); 808 if (!isa<syntax::GlobalNameSpecifier>(NS)) 809 Builder.foldNode(Builder.getRange(getLocalSourceRange(it)).drop_back(), 810 NS, it); 811 Builder.markChild(NS, syntax::NodeRole::NestedNameSpecifier_specifier); 812 Builder.markChildToken(it.getEndLoc(), 813 syntax::NodeRole::NestedNameSpecifier_delimiter); 814 } 815 auto *NNS = new (allocator()) syntax::NestedNameSpecifier; 816 Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), NNS, 817 QualifierLoc); 818 return NNS; 819 } 820 821 bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { 822 auto *Qualifier = BuildNestedNameSpecifier(S->getQualifierLoc()); 823 if (Qualifier) 824 Builder.markChild(Qualifier, syntax::NodeRole::IdExpression_qualifier); 825 826 auto TemplateKeywordLoc = S->getTemplateKeywordLoc(); 827 if (TemplateKeywordLoc.isValid()) 828 Builder.markChildToken(TemplateKeywordLoc, 829 syntax::NodeRole::TemplateKeyword); 830 831 auto *unqualifiedId = new (allocator()) syntax::UnqualifiedId; 832 833 Builder.foldNode(Builder.getRange(S->getLocation(), S->getEndLoc()), 834 unqualifiedId, nullptr); 835 836 Builder.markChild(unqualifiedId, syntax::NodeRole::IdExpression_id); 837 838 Builder.foldNode(Builder.getExprRange(S), 839 new (allocator()) syntax::IdExpression, S); 840 return true; 841 } 842 843 // Same logic as DeclRefExpr. 844 bool WalkUpFromDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *S) { 845 auto *Qualifier = BuildNestedNameSpecifier(S->getQualifierLoc()); 846 if (Qualifier) 847 Builder.markChild(Qualifier, syntax::NodeRole::IdExpression_qualifier); 848 849 auto TemplateKeywordLoc = S->getTemplateKeywordLoc(); 850 if (TemplateKeywordLoc.isValid()) 851 Builder.markChildToken(TemplateKeywordLoc, 852 syntax::NodeRole::TemplateKeyword); 853 854 auto *unqualifiedId = new (allocator()) syntax::UnqualifiedId; 855 856 Builder.foldNode(Builder.getRange(S->getLocation(), S->getEndLoc()), 857 unqualifiedId, nullptr); 858 859 Builder.markChild(unqualifiedId, syntax::NodeRole::IdExpression_id); 860 861 Builder.foldNode(Builder.getExprRange(S), 862 new (allocator()) syntax::IdExpression, S); 863 return true; 864 } 865 866 bool WalkUpFromParenExpr(ParenExpr *S) { 867 Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen); 868 Builder.markExprChild(S->getSubExpr(), 869 syntax::NodeRole::ParenExpression_subExpression); 870 Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen); 871 Builder.foldNode(Builder.getExprRange(S), 872 new (allocator()) syntax::ParenExpression, S); 873 return true; 874 } 875 876 bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { 877 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 878 Builder.foldNode(Builder.getExprRange(S), 879 new (allocator()) syntax::IntegerLiteralExpression, S); 880 return true; 881 } 882 883 bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { 884 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 885 Builder.foldNode(Builder.getExprRange(S), 886 new (allocator()) syntax::CharacterLiteralExpression, S); 887 return true; 888 } 889 890 bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { 891 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 892 Builder.foldNode(Builder.getExprRange(S), 893 new (allocator()) syntax::FloatingLiteralExpression, S); 894 return true; 895 } 896 897 bool WalkUpFromStringLiteral(StringLiteral *S) { 898 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 899 Builder.foldNode(Builder.getExprRange(S), 900 new (allocator()) syntax::StringLiteralExpression, S); 901 return true; 902 } 903 904 bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { 905 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 906 Builder.foldNode(Builder.getExprRange(S), 907 new (allocator()) syntax::BoolLiteralExpression, S); 908 return true; 909 } 910 911 bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { 912 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 913 Builder.foldNode(Builder.getExprRange(S), 914 new (allocator()) syntax::CxxNullPtrExpression, S); 915 return true; 916 } 917 918 bool WalkUpFromUnaryOperator(UnaryOperator *S) { 919 Builder.markChildToken(S->getOperatorLoc(), 920 syntax::NodeRole::OperatorExpression_operatorToken); 921 Builder.markExprChild(S->getSubExpr(), 922 syntax::NodeRole::UnaryOperatorExpression_operand); 923 924 if (S->isPostfix()) 925 Builder.foldNode(Builder.getExprRange(S), 926 new (allocator()) syntax::PostfixUnaryOperatorExpression, 927 S); 928 else 929 Builder.foldNode(Builder.getExprRange(S), 930 new (allocator()) syntax::PrefixUnaryOperatorExpression, 931 S); 932 933 return true; 934 } 935 936 bool WalkUpFromBinaryOperator(BinaryOperator *S) { 937 Builder.markExprChild( 938 S->getLHS(), syntax::NodeRole::BinaryOperatorExpression_leftHandSide); 939 Builder.markChildToken(S->getOperatorLoc(), 940 syntax::NodeRole::OperatorExpression_operatorToken); 941 Builder.markExprChild( 942 S->getRHS(), syntax::NodeRole::BinaryOperatorExpression_rightHandSide); 943 Builder.foldNode(Builder.getExprRange(S), 944 new (allocator()) syntax::BinaryOperatorExpression, S); 945 return true; 946 } 947 948 bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 949 if (getOperatorNodeKind(*S) == 950 syntax::NodeKind::PostfixUnaryOperatorExpression) { 951 // A postfix unary operator is declared as taking two operands. The 952 // second operand is used to distinguish from its prefix counterpart. In 953 // the semantic AST this "phantom" operand is represented as a 954 // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this 955 // operand because it does not correspond to anything written in source 956 // code 957 for (auto *child : S->children()) { 958 if (child->getSourceRange().isInvalid()) 959 continue; 960 if (!TraverseStmt(child)) 961 return false; 962 } 963 return WalkUpFromCXXOperatorCallExpr(S); 964 } else 965 return RecursiveASTVisitor::TraverseCXXOperatorCallExpr(S); 966 } 967 968 bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 969 switch (getOperatorNodeKind(*S)) { 970 case syntax::NodeKind::BinaryOperatorExpression: 971 Builder.markExprChild( 972 S->getArg(0), 973 syntax::NodeRole::BinaryOperatorExpression_leftHandSide); 974 Builder.markChildToken( 975 S->getOperatorLoc(), 976 syntax::NodeRole::OperatorExpression_operatorToken); 977 Builder.markExprChild( 978 S->getArg(1), 979 syntax::NodeRole::BinaryOperatorExpression_rightHandSide); 980 Builder.foldNode(Builder.getExprRange(S), 981 new (allocator()) syntax::BinaryOperatorExpression, S); 982 return true; 983 case syntax::NodeKind::PrefixUnaryOperatorExpression: 984 Builder.markChildToken( 985 S->getOperatorLoc(), 986 syntax::NodeRole::OperatorExpression_operatorToken); 987 Builder.markExprChild(S->getArg(0), 988 syntax::NodeRole::UnaryOperatorExpression_operand); 989 Builder.foldNode(Builder.getExprRange(S), 990 new (allocator()) syntax::PrefixUnaryOperatorExpression, 991 S); 992 return true; 993 case syntax::NodeKind::PostfixUnaryOperatorExpression: 994 Builder.markChildToken( 995 S->getOperatorLoc(), 996 syntax::NodeRole::OperatorExpression_operatorToken); 997 Builder.markExprChild(S->getArg(0), 998 syntax::NodeRole::UnaryOperatorExpression_operand); 999 Builder.foldNode(Builder.getExprRange(S), 1000 new (allocator()) syntax::PostfixUnaryOperatorExpression, 1001 S); 1002 return true; 1003 case syntax::NodeKind::UnknownExpression: 1004 return RecursiveASTVisitor::WalkUpFromCXXOperatorCallExpr(S); 1005 default: 1006 llvm_unreachable("getOperatorNodeKind() does not return this value"); 1007 } 1008 } 1009 1010 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 1011 auto Tokens = Builder.getDeclarationRange(S); 1012 if (Tokens.front().kind() == tok::coloncolon) { 1013 // Handle nested namespace definitions. Those start at '::' token, e.g. 1014 // namespace a^::b {} 1015 // FIXME: build corresponding nodes for the name of this namespace. 1016 return true; 1017 } 1018 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); 1019 return true; 1020 } 1021 1022 // FIXME: Deleting the `TraverseParenTypeLoc` override doesn't change test 1023 // results. Find test coverage or remove it. 1024 bool TraverseParenTypeLoc(ParenTypeLoc L) { 1025 // We reverse order of traversal to get the proper syntax structure. 1026 if (!WalkUpFromParenTypeLoc(L)) 1027 return false; 1028 return TraverseTypeLoc(L.getInnerLoc()); 1029 } 1030 1031 bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { 1032 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 1033 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 1034 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), 1035 new (allocator()) syntax::ParenDeclarator, L); 1036 return true; 1037 } 1038 1039 // Declarator chunks, they are produced by type locs and some clang::Decls. 1040 bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { 1041 Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); 1042 Builder.markExprChild(L.getSizeExpr(), 1043 syntax::NodeRole::ArraySubscript_sizeExpression); 1044 Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); 1045 Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), 1046 new (allocator()) syntax::ArraySubscript, L); 1047 return true; 1048 } 1049 1050 bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { 1051 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 1052 for (auto *P : L.getParams()) { 1053 Builder.markChild(P, syntax::NodeRole::ParametersAndQualifiers_parameter); 1054 } 1055 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 1056 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), 1057 new (allocator()) syntax::ParametersAndQualifiers, L); 1058 return true; 1059 } 1060 1061 bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { 1062 if (!L.getTypePtr()->hasTrailingReturn()) 1063 return WalkUpFromFunctionTypeLoc(L); 1064 1065 auto *TrailingReturnTokens = BuildTrailingReturn(L); 1066 // Finish building the node for parameters. 1067 Builder.markChild(TrailingReturnTokens, 1068 syntax::NodeRole::ParametersAndQualifiers_trailingReturn); 1069 return WalkUpFromFunctionTypeLoc(L); 1070 } 1071 1072 bool TraverseMemberPointerTypeLoc(MemberPointerTypeLoc L) { 1073 // In the source code "void (Y::*mp)()" `MemberPointerTypeLoc` corresponds 1074 // to "Y::*" but it points to a `ParenTypeLoc` that corresponds to 1075 // "(Y::*mp)" We thus reverse the order of traversal to get the proper 1076 // syntax structure. 1077 if (!WalkUpFromMemberPointerTypeLoc(L)) 1078 return false; 1079 return TraverseTypeLoc(L.getPointeeLoc()); 1080 } 1081 1082 bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { 1083 auto SR = L.getLocalSourceRange(); 1084 Builder.foldNode(Builder.getRange(SR), 1085 new (allocator()) syntax::MemberPointer, L); 1086 return true; 1087 } 1088 1089 // The code below is very regular, it could even be generated with some 1090 // preprocessor magic. We merely assign roles to the corresponding children 1091 // and fold resulting nodes. 1092 bool WalkUpFromDeclStmt(DeclStmt *S) { 1093 Builder.foldNode(Builder.getStmtRange(S), 1094 new (allocator()) syntax::DeclarationStatement, S); 1095 return true; 1096 } 1097 1098 bool WalkUpFromNullStmt(NullStmt *S) { 1099 Builder.foldNode(Builder.getStmtRange(S), 1100 new (allocator()) syntax::EmptyStatement, S); 1101 return true; 1102 } 1103 1104 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 1105 Builder.markChildToken(S->getSwitchLoc(), 1106 syntax::NodeRole::IntroducerKeyword); 1107 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1108 Builder.foldNode(Builder.getStmtRange(S), 1109 new (allocator()) syntax::SwitchStatement, S); 1110 return true; 1111 } 1112 1113 bool WalkUpFromCaseStmt(CaseStmt *S) { 1114 Builder.markChildToken(S->getKeywordLoc(), 1115 syntax::NodeRole::IntroducerKeyword); 1116 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); 1117 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1118 Builder.foldNode(Builder.getStmtRange(S), 1119 new (allocator()) syntax::CaseStatement, S); 1120 return true; 1121 } 1122 1123 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 1124 Builder.markChildToken(S->getKeywordLoc(), 1125 syntax::NodeRole::IntroducerKeyword); 1126 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1127 Builder.foldNode(Builder.getStmtRange(S), 1128 new (allocator()) syntax::DefaultStatement, S); 1129 return true; 1130 } 1131 1132 bool WalkUpFromIfStmt(IfStmt *S) { 1133 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 1134 Builder.markStmtChild(S->getThen(), 1135 syntax::NodeRole::IfStatement_thenStatement); 1136 Builder.markChildToken(S->getElseLoc(), 1137 syntax::NodeRole::IfStatement_elseKeyword); 1138 Builder.markStmtChild(S->getElse(), 1139 syntax::NodeRole::IfStatement_elseStatement); 1140 Builder.foldNode(Builder.getStmtRange(S), 1141 new (allocator()) syntax::IfStatement, S); 1142 return true; 1143 } 1144 1145 bool WalkUpFromForStmt(ForStmt *S) { 1146 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1147 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1148 Builder.foldNode(Builder.getStmtRange(S), 1149 new (allocator()) syntax::ForStatement, S); 1150 return true; 1151 } 1152 1153 bool WalkUpFromWhileStmt(WhileStmt *S) { 1154 Builder.markChildToken(S->getWhileLoc(), 1155 syntax::NodeRole::IntroducerKeyword); 1156 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1157 Builder.foldNode(Builder.getStmtRange(S), 1158 new (allocator()) syntax::WhileStatement, S); 1159 return true; 1160 } 1161 1162 bool WalkUpFromContinueStmt(ContinueStmt *S) { 1163 Builder.markChildToken(S->getContinueLoc(), 1164 syntax::NodeRole::IntroducerKeyword); 1165 Builder.foldNode(Builder.getStmtRange(S), 1166 new (allocator()) syntax::ContinueStatement, S); 1167 return true; 1168 } 1169 1170 bool WalkUpFromBreakStmt(BreakStmt *S) { 1171 Builder.markChildToken(S->getBreakLoc(), 1172 syntax::NodeRole::IntroducerKeyword); 1173 Builder.foldNode(Builder.getStmtRange(S), 1174 new (allocator()) syntax::BreakStatement, S); 1175 return true; 1176 } 1177 1178 bool WalkUpFromReturnStmt(ReturnStmt *S) { 1179 Builder.markChildToken(S->getReturnLoc(), 1180 syntax::NodeRole::IntroducerKeyword); 1181 Builder.markExprChild(S->getRetValue(), 1182 syntax::NodeRole::ReturnStatement_value); 1183 Builder.foldNode(Builder.getStmtRange(S), 1184 new (allocator()) syntax::ReturnStatement, S); 1185 return true; 1186 } 1187 1188 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 1189 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1190 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1191 Builder.foldNode(Builder.getStmtRange(S), 1192 new (allocator()) syntax::RangeBasedForStatement, S); 1193 return true; 1194 } 1195 1196 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 1197 Builder.foldNode(Builder.getDeclarationRange(S), 1198 new (allocator()) syntax::EmptyDeclaration, S); 1199 return true; 1200 } 1201 1202 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 1203 Builder.markExprChild(S->getAssertExpr(), 1204 syntax::NodeRole::StaticAssertDeclaration_condition); 1205 Builder.markExprChild(S->getMessage(), 1206 syntax::NodeRole::StaticAssertDeclaration_message); 1207 Builder.foldNode(Builder.getDeclarationRange(S), 1208 new (allocator()) syntax::StaticAssertDeclaration, S); 1209 return true; 1210 } 1211 1212 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 1213 Builder.foldNode(Builder.getDeclarationRange(S), 1214 new (allocator()) syntax::LinkageSpecificationDeclaration, 1215 S); 1216 return true; 1217 } 1218 1219 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 1220 Builder.foldNode(Builder.getDeclarationRange(S), 1221 new (allocator()) syntax::NamespaceAliasDefinition, S); 1222 return true; 1223 } 1224 1225 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 1226 Builder.foldNode(Builder.getDeclarationRange(S), 1227 new (allocator()) syntax::UsingNamespaceDirective, S); 1228 return true; 1229 } 1230 1231 bool WalkUpFromUsingDecl(UsingDecl *S) { 1232 Builder.foldNode(Builder.getDeclarationRange(S), 1233 new (allocator()) syntax::UsingDeclaration, S); 1234 return true; 1235 } 1236 1237 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 1238 Builder.foldNode(Builder.getDeclarationRange(S), 1239 new (allocator()) syntax::UsingDeclaration, S); 1240 return true; 1241 } 1242 1243 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 1244 Builder.foldNode(Builder.getDeclarationRange(S), 1245 new (allocator()) syntax::UsingDeclaration, S); 1246 return true; 1247 } 1248 1249 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 1250 Builder.foldNode(Builder.getDeclarationRange(S), 1251 new (allocator()) syntax::TypeAliasDeclaration, S); 1252 return true; 1253 } 1254 1255 private: 1256 template <class T> SourceLocation getQualifiedNameStart(T *D) { 1257 static_assert((std::is_base_of<DeclaratorDecl, T>::value || 1258 std::is_base_of<TypedefNameDecl, T>::value), 1259 "only DeclaratorDecl and TypedefNameDecl are supported."); 1260 1261 auto DN = D->getDeclName(); 1262 bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); 1263 if (IsAnonymous) 1264 return SourceLocation(); 1265 1266 if (const auto *DD = dyn_cast<DeclaratorDecl>(D)) { 1267 if (DD->getQualifierLoc()) { 1268 return DD->getQualifierLoc().getBeginLoc(); 1269 } 1270 } 1271 1272 return D->getLocation(); 1273 } 1274 1275 SourceRange getInitializerRange(Decl *D) { 1276 if (auto *V = dyn_cast<VarDecl>(D)) { 1277 auto *I = V->getInit(); 1278 // Initializers in range-based-for are not part of the declarator 1279 if (I && !V->isCXXForRangeDecl()) 1280 return I->getSourceRange(); 1281 } 1282 1283 return SourceRange(); 1284 } 1285 1286 /// Folds SimpleDeclarator node (if present) and in case this is the last 1287 /// declarator in the chain it also folds SimpleDeclaration node. 1288 template <class T> bool processDeclaratorAndDeclaration(T *D) { 1289 SourceRange Initializer = getInitializerRange(D); 1290 auto Range = getDeclaratorRange(Builder.sourceManager(), 1291 D->getTypeSourceInfo()->getTypeLoc(), 1292 getQualifiedNameStart(D), Initializer); 1293 1294 // There doesn't have to be a declarator (e.g. `void foo(int)` only has 1295 // declaration, but no declarator). 1296 if (Range.getBegin().isValid()) { 1297 auto *N = new (allocator()) syntax::SimpleDeclarator; 1298 Builder.foldNode(Builder.getRange(Range), N, nullptr); 1299 Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator); 1300 } 1301 1302 if (Builder.isResponsibleForCreatingDeclaration(D)) { 1303 Builder.foldNode(Builder.getDeclarationRange(D), 1304 new (allocator()) syntax::SimpleDeclaration, D); 1305 } 1306 return true; 1307 } 1308 1309 /// Returns the range of the built node. 1310 syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) { 1311 assert(L.getTypePtr()->hasTrailingReturn()); 1312 1313 auto ReturnedType = L.getReturnLoc(); 1314 // Build node for the declarator, if any. 1315 auto ReturnDeclaratorRange = 1316 getDeclaratorRange(this->Builder.sourceManager(), ReturnedType, 1317 /*Name=*/SourceLocation(), 1318 /*Initializer=*/SourceLocation()); 1319 syntax::SimpleDeclarator *ReturnDeclarator = nullptr; 1320 if (ReturnDeclaratorRange.isValid()) { 1321 ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; 1322 Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), 1323 ReturnDeclarator, nullptr); 1324 } 1325 1326 // Build node for trailing return type. 1327 auto Return = Builder.getRange(ReturnedType.getSourceRange()); 1328 const auto *Arrow = Return.begin() - 1; 1329 assert(Arrow->kind() == tok::arrow); 1330 auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); 1331 Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken); 1332 if (ReturnDeclarator) 1333 Builder.markChild(ReturnDeclarator, 1334 syntax::NodeRole::TrailingReturnType_declarator); 1335 auto *R = new (allocator()) syntax::TrailingReturnType; 1336 Builder.foldNode(Tokens, R, L); 1337 return R; 1338 } 1339 1340 void foldExplicitTemplateInstantiation( 1341 ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, 1342 const syntax::Token *TemplateKW, 1343 syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { 1344 assert(!ExternKW || ExternKW->kind() == tok::kw_extern); 1345 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1346 Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword); 1347 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1348 Builder.markChild( 1349 InnerDeclaration, 1350 syntax::NodeRole::ExplicitTemplateInstantiation_declaration); 1351 Builder.foldNode( 1352 Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); 1353 } 1354 1355 syntax::TemplateDeclaration *foldTemplateDeclaration( 1356 ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, 1357 ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { 1358 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1359 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1360 1361 auto *N = new (allocator()) syntax::TemplateDeclaration; 1362 Builder.foldNode(Range, N, From); 1363 Builder.markChild(N, syntax::NodeRole::TemplateDeclaration_declaration); 1364 return N; 1365 } 1366 1367 /// A small helper to save some typing. 1368 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 1369 1370 syntax::TreeBuilder &Builder; 1371 const ASTContext &Context; 1372 }; 1373 } // namespace 1374 1375 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { 1376 DeclsWithoutSemicolons.insert(D); 1377 } 1378 1379 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 1380 if (Loc.isInvalid()) 1381 return; 1382 Pending.assignRole(*findToken(Loc), Role); 1383 } 1384 1385 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { 1386 if (!T) 1387 return; 1388 Pending.assignRole(*T, R); 1389 } 1390 1391 void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { 1392 assert(N); 1393 setRole(N, R); 1394 } 1395 1396 void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { 1397 auto *SN = Mapping.find(N); 1398 assert(SN != nullptr); 1399 setRole(SN, R); 1400 } 1401 1402 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 1403 if (!Child) 1404 return; 1405 1406 syntax::Tree *ChildNode; 1407 if (Expr *ChildExpr = dyn_cast<Expr>(Child)) { 1408 // This is an expression in a statement position, consume the trailing 1409 // semicolon and form an 'ExpressionStatement' node. 1410 markExprChild(ChildExpr, NodeRole::ExpressionStatement_expression); 1411 ChildNode = new (allocator()) syntax::ExpressionStatement; 1412 // (!) 'getStmtRange()' ensures this covers a trailing semicolon. 1413 Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); 1414 } else { 1415 ChildNode = Mapping.find(Child); 1416 } 1417 assert(ChildNode != nullptr); 1418 setRole(ChildNode, Role); 1419 } 1420 1421 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 1422 if (!Child) 1423 return; 1424 Child = Child->IgnoreImplicit(); 1425 1426 syntax::Tree *ChildNode = Mapping.find(Child); 1427 assert(ChildNode != nullptr); 1428 setRole(ChildNode, Role); 1429 } 1430 1431 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 1432 if (L.isInvalid()) 1433 return nullptr; 1434 auto It = LocationToToken.find(L.getRawEncoding()); 1435 assert(It != LocationToToken.end()); 1436 return It->second; 1437 } 1438 1439 syntax::TranslationUnit * 1440 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 1441 TreeBuilder Builder(A); 1442 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 1443 return std::move(Builder).finalize(); 1444 } 1445