1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/Tree.h" 9 #include "clang/Basic/TokenKinds.h" 10 #include "clang/Tooling/Syntax/Nodes.h" 11 #include "llvm/ADT/ArrayRef.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/Support/Casting.h" 14 #include <cassert> 15 16 using namespace clang; 17 18 namespace { 19 static void traverse(const syntax::Node *N, 20 llvm::function_ref<void(const syntax::Node *)> Visit) { 21 if (auto *T = dyn_cast<syntax::Tree>(N)) { 22 for (const syntax::Node &C : T->getChildren()) 23 traverse(&C, Visit); 24 } 25 Visit(N); 26 } 27 static void traverse(syntax::Node *N, 28 llvm::function_ref<void(syntax::Node *)> Visit) { 29 traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) { 30 Visit(const_cast<syntax::Node *>(N)); 31 }); 32 } 33 } // namespace 34 35 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, 36 const TokenBuffer &Tokens) 37 : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {} 38 39 const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const { 40 return Tokens; 41 } 42 43 std::pair<FileID, ArrayRef<syntax::Token>> 44 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) { 45 auto FID = SourceMgr.createFileID(std::move(Input)); 46 auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts)); 47 assert(It.second && "duplicate FileID"); 48 return {FID, It.first->second}; 49 } 50 51 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) { 52 assert(Tok != nullptr); 53 } 54 55 bool syntax::Leaf::classof(const Node *N) { 56 return N->getKind() == NodeKind::Leaf; 57 } 58 59 syntax::Node::Node(NodeKind Kind) 60 : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr), 61 Kind(static_cast<unsigned>(Kind)), Role(0), Original(false), 62 CanModify(false) { 63 this->setRole(NodeRole::Detached); 64 } 65 66 bool syntax::Node::isDetached() const { 67 return getRole() == NodeRole::Detached; 68 } 69 70 void syntax::Node::setRole(NodeRole NR) { 71 this->Role = static_cast<unsigned>(NR); 72 } 73 74 bool syntax::Tree::classof(const Node *N) { 75 return N->getKind() > NodeKind::Leaf; 76 } 77 78 void syntax::Tree::appendChildLowLevel(Node *Child, NodeRole Role) { 79 assert(Child->getRole() == NodeRole::Detached); 80 assert(Role != NodeRole::Detached); 81 82 Child->setRole(Role); 83 appendChildLowLevel(Child); 84 } 85 86 void syntax::Tree::appendChildLowLevel(Node *Child) { 87 assert(Child->Parent == nullptr); 88 assert(Child->NextSibling == nullptr); 89 assert(Child->PreviousSibling == nullptr); 90 assert(Child->getRole() != NodeRole::Detached); 91 92 Child->Parent = this; 93 if (this->LastChild) { 94 Child->PreviousSibling = this->LastChild; 95 this->LastChild->NextSibling = Child; 96 } else 97 this->FirstChild = Child; 98 99 this->LastChild = Child; 100 } 101 102 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) { 103 assert(Child->getRole() == NodeRole::Detached); 104 assert(Role != NodeRole::Detached); 105 106 Child->setRole(Role); 107 prependChildLowLevel(Child); 108 } 109 110 void syntax::Tree::prependChildLowLevel(Node *Child) { 111 assert(Child->Parent == nullptr); 112 assert(Child->NextSibling == nullptr); 113 assert(Child->PreviousSibling == nullptr); 114 assert(Child->getRole() != NodeRole::Detached); 115 116 Child->Parent = this; 117 if (this->FirstChild) { 118 Child->NextSibling = this->FirstChild; 119 this->FirstChild->PreviousSibling = Child; 120 } else 121 this->LastChild = Child; 122 123 this->FirstChild = Child; 124 } 125 126 void syntax::Tree::replaceChildRangeLowLevel(Node *Begin, Node *End, 127 Node *New) { 128 assert((!Begin || Begin->Parent == this) && 129 "`Begin` is not a child of `this`."); 130 assert((!End || End->Parent == this) && "`End` is not a child of `this`."); 131 assert(canModify() && "Cannot modify `this`."); 132 133 #ifndef NDEBUG 134 for (auto *N = New; N; N = N->NextSibling) { 135 assert(N->Parent == nullptr); 136 assert(N->getRole() != NodeRole::Detached && "Roles must be set"); 137 // FIXME: sanity-check the role. 138 } 139 140 auto Reachable = [](Node *From, Node *N) { 141 if (!N) 142 return true; 143 for (auto *It = From; It; It = It->NextSibling) 144 if (It == N) 145 return true; 146 return false; 147 }; 148 assert(Reachable(FirstChild, Begin) && "`Begin` is not reachable."); 149 assert(Reachable(Begin, End) && "`End` is not after `Begin`."); 150 #endif 151 152 if (!New && Begin == End) 153 return; 154 155 // Mark modification. 156 for (auto *T = this; T && T->Original; T = T->Parent) 157 T->Original = false; 158 159 // Save the node before the range to be removed. Later we insert the `New` 160 // range after this node. 161 auto *BeforeBegin = Begin ? Begin->PreviousSibling : LastChild; 162 163 // Detach old nodes. 164 for (auto *N = Begin; N != End;) { 165 auto *Next = N->NextSibling; 166 167 N->setRole(NodeRole::Detached); 168 N->Parent = nullptr; 169 N->NextSibling = nullptr; 170 N->PreviousSibling = nullptr; 171 if (N->Original) 172 traverse(N, [](Node *C) { C->Original = false; }); 173 174 N = Next; 175 } 176 177 // Attach new range. 178 auto *&NewFirst = BeforeBegin ? BeforeBegin->NextSibling : FirstChild; 179 auto *&NewLast = End ? End->PreviousSibling : LastChild; 180 181 if (!New) { 182 NewFirst = End; 183 NewLast = BeforeBegin; 184 return; 185 } 186 187 New->PreviousSibling = BeforeBegin; 188 NewFirst = New; 189 190 Node *LastInNew; 191 for (auto *N = New; N != nullptr; N = N->NextSibling) { 192 LastInNew = N; 193 N->Parent = this; 194 } 195 LastInNew->NextSibling = End; 196 NewLast = LastInNew; 197 } 198 199 namespace { 200 static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L, 201 const SourceManager &SM) { 202 assert(L); 203 const auto *Token = L->getToken(); 204 assert(Token); 205 // Handle 'eof' separately, calling text() on it produces an empty string. 206 if (Token->kind() == tok::eof) 207 OS << "<eof>"; 208 else 209 OS << Token->text(SM); 210 } 211 212 static void dumpNode(raw_ostream &OS, const syntax::Node *N, 213 const SourceManager &SM, std::vector<bool> IndentMask) { 214 auto DumpExtraInfo = [&OS](const syntax::Node *N) { 215 if (N->getRole() != syntax::NodeRole::Unknown) 216 OS << " " << N->getRole(); 217 if (!N->isOriginal()) 218 OS << " synthesized"; 219 if (!N->canModify()) 220 OS << " unmodifiable"; 221 }; 222 223 assert(N); 224 if (const auto *L = dyn_cast<syntax::Leaf>(N)) { 225 OS << "'"; 226 dumpLeaf(OS, L, SM); 227 OS << "'"; 228 DumpExtraInfo(N); 229 OS << "\n"; 230 return; 231 } 232 233 const auto *T = cast<syntax::Tree>(N); 234 OS << T->getKind(); 235 DumpExtraInfo(N); 236 OS << "\n"; 237 238 for (const syntax::Node &It : T->getChildren()) { 239 for (bool Filled : IndentMask) { 240 if (Filled) 241 OS << "| "; 242 else 243 OS << " "; 244 } 245 if (!It.getNextSibling()) { 246 OS << "`-"; 247 IndentMask.push_back(false); 248 } else { 249 OS << "|-"; 250 IndentMask.push_back(true); 251 } 252 dumpNode(OS, &It, SM, IndentMask); 253 IndentMask.pop_back(); 254 } 255 } 256 } // namespace 257 258 std::string syntax::Node::dump(const SourceManager &SM) const { 259 std::string Str; 260 llvm::raw_string_ostream OS(Str); 261 dumpNode(OS, this, SM, /*IndentMask=*/{}); 262 return std::move(OS.str()); 263 } 264 265 std::string syntax::Node::dumpTokens(const SourceManager &SM) const { 266 std::string Storage; 267 llvm::raw_string_ostream OS(Storage); 268 traverse(this, [&](const syntax::Node *N) { 269 if (const auto *L = dyn_cast<syntax::Leaf>(N)) { 270 dumpLeaf(OS, L, SM); 271 OS << " "; 272 } 273 }); 274 return OS.str(); 275 } 276 277 void syntax::Node::assertInvariants() const { 278 #ifndef NDEBUG 279 if (isDetached()) 280 assert(getParent() == nullptr); 281 else 282 assert(getParent() != nullptr); 283 284 const auto *T = dyn_cast<Tree>(this); 285 if (!T) 286 return; 287 for (const Node &C : T->getChildren()) { 288 if (T->isOriginal()) 289 assert(C.isOriginal()); 290 assert(!C.isDetached()); 291 assert(C.getParent() == T); 292 const auto *Next = C.getNextSibling(); 293 assert(!Next || &C == Next->getPreviousSibling()); 294 if (!C.getNextSibling()) 295 assert(&C == T->getLastChild() && 296 "Last child is reachable by advancing from the first child."); 297 } 298 299 const auto *L = dyn_cast<List>(T); 300 if (!L) 301 return; 302 for (const Node &C : T->getChildren()) { 303 assert(C.getRole() == NodeRole::ListElement || 304 C.getRole() == NodeRole::ListDelimiter); 305 if (C.getRole() == NodeRole::ListDelimiter) { 306 assert(isa<Leaf>(C)); 307 assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind()); 308 } 309 } 310 311 #endif 312 } 313 314 void syntax::Node::assertInvariantsRecursive() const { 315 #ifndef NDEBUG 316 traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); }); 317 #endif 318 } 319 320 const syntax::Leaf *syntax::Tree::findFirstLeaf() const { 321 for (const Node &C : getChildren()) { 322 if (const auto *L = dyn_cast<syntax::Leaf>(&C)) 323 return L; 324 if (const auto *L = cast<syntax::Tree>(C).findFirstLeaf()) 325 return L; 326 } 327 return nullptr; 328 } 329 330 const syntax::Leaf *syntax::Tree::findLastLeaf() const { 331 for (const auto *C = getLastChild(); C; C = C->getPreviousSibling()) { 332 if (const auto *L = dyn_cast<syntax::Leaf>(C)) 333 return L; 334 if (const auto *L = cast<syntax::Tree>(C)->findLastLeaf()) 335 return L; 336 } 337 return nullptr; 338 } 339 340 const syntax::Node *syntax::Tree::findChild(NodeRole R) const { 341 for (const Node &C : getChildren()) { 342 if (C.getRole() == R) 343 return &C; 344 } 345 return nullptr; 346 } 347 348 bool syntax::List::classof(const syntax::Node *N) { 349 switch (N->getKind()) { 350 case syntax::NodeKind::NestedNameSpecifier: 351 case syntax::NodeKind::CallArguments: 352 case syntax::NodeKind::ParameterDeclarationList: 353 case syntax::NodeKind::DeclaratorList: 354 return true; 355 default: 356 return false; 357 } 358 } 359 360 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>> 361 syntax::List::getElementsAsNodesAndDelimiters() { 362 if (!getFirstChild()) 363 return {}; 364 365 std::vector<syntax::List::ElementAndDelimiter<Node>> Children; 366 syntax::Node *ElementWithoutDelimiter = nullptr; 367 for (Node &C : getChildren()) { 368 switch (C.getRole()) { 369 case syntax::NodeRole::ListElement: { 370 if (ElementWithoutDelimiter) { 371 Children.push_back({ElementWithoutDelimiter, nullptr}); 372 } 373 ElementWithoutDelimiter = &C; 374 break; 375 } 376 case syntax::NodeRole::ListDelimiter: { 377 Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(&C)}); 378 ElementWithoutDelimiter = nullptr; 379 break; 380 } 381 default: 382 llvm_unreachable( 383 "A list can have only elements and delimiters as children."); 384 } 385 } 386 387 switch (getTerminationKind()) { 388 case syntax::List::TerminationKind::Separated: { 389 Children.push_back({ElementWithoutDelimiter, nullptr}); 390 break; 391 } 392 case syntax::List::TerminationKind::Terminated: 393 case syntax::List::TerminationKind::MaybeTerminated: { 394 if (ElementWithoutDelimiter) { 395 Children.push_back({ElementWithoutDelimiter, nullptr}); 396 } 397 break; 398 } 399 } 400 401 return Children; 402 } 403 404 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but 405 // ignoring delimiters 406 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() { 407 if (!getFirstChild()) 408 return {}; 409 410 std::vector<syntax::Node *> Children; 411 syntax::Node *ElementWithoutDelimiter = nullptr; 412 for (Node &C : getChildren()) { 413 switch (C.getRole()) { 414 case syntax::NodeRole::ListElement: { 415 if (ElementWithoutDelimiter) { 416 Children.push_back(ElementWithoutDelimiter); 417 } 418 ElementWithoutDelimiter = &C; 419 break; 420 } 421 case syntax::NodeRole::ListDelimiter: { 422 Children.push_back(ElementWithoutDelimiter); 423 ElementWithoutDelimiter = nullptr; 424 break; 425 } 426 default: 427 llvm_unreachable("A list has only elements or delimiters."); 428 } 429 } 430 431 switch (getTerminationKind()) { 432 case syntax::List::TerminationKind::Separated: { 433 Children.push_back(ElementWithoutDelimiter); 434 break; 435 } 436 case syntax::List::TerminationKind::Terminated: 437 case syntax::List::TerminationKind::MaybeTerminated: { 438 if (ElementWithoutDelimiter) { 439 Children.push_back(ElementWithoutDelimiter); 440 } 441 break; 442 } 443 } 444 445 return Children; 446 } 447 448 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const { 449 switch (this->getKind()) { 450 case NodeKind::NestedNameSpecifier: 451 return clang::tok::coloncolon; 452 case NodeKind::CallArguments: 453 case NodeKind::ParameterDeclarationList: 454 case NodeKind::DeclaratorList: 455 return clang::tok::comma; 456 default: 457 llvm_unreachable("This is not a subclass of List, thus " 458 "getDelimiterTokenKind() cannot be called"); 459 } 460 } 461 462 syntax::List::TerminationKind syntax::List::getTerminationKind() const { 463 switch (this->getKind()) { 464 case NodeKind::NestedNameSpecifier: 465 return TerminationKind::Terminated; 466 case NodeKind::CallArguments: 467 case NodeKind::ParameterDeclarationList: 468 case NodeKind::DeclaratorList: 469 return TerminationKind::Separated; 470 default: 471 llvm_unreachable("This is not a subclass of List, thus " 472 "getTerminationKind() cannot be called"); 473 } 474 } 475 476 bool syntax::List::canBeEmpty() const { 477 switch (this->getKind()) { 478 case NodeKind::NestedNameSpecifier: 479 return false; 480 case NodeKind::CallArguments: 481 return true; 482 case NodeKind::ParameterDeclarationList: 483 return true; 484 case NodeKind::DeclaratorList: 485 return true; 486 default: 487 llvm_unreachable("This is not a subclass of List, thus canBeEmpty() " 488 "cannot be called"); 489 } 490 } 491