1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/Tree.h" 9 #include "clang/Basic/TokenKinds.h" 10 #include "clang/Tooling/Syntax/Nodes.h" 11 #include "llvm/ADT/ArrayRef.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/Support/Casting.h" 14 #include <cassert> 15 16 using namespace clang; 17 18 namespace { 19 static void traverse(const syntax::Node *N, 20 llvm::function_ref<void(const syntax::Node *)> Visit) { 21 if (auto *T = dyn_cast<syntax::Tree>(N)) { 22 for (const syntax::Node &C : T->getChildren()) 23 traverse(&C, Visit); 24 } 25 Visit(N); 26 } 27 static void traverse(syntax::Node *N, 28 llvm::function_ref<void(syntax::Node *)> Visit) { 29 traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) { 30 Visit(const_cast<syntax::Node *>(N)); 31 }); 32 } 33 } // namespace 34 35 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, 36 const TokenBuffer &Tokens) 37 : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {} 38 39 const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const { 40 return Tokens; 41 } 42 43 std::pair<FileID, ArrayRef<syntax::Token>> 44 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) { 45 auto FID = SourceMgr.createFileID(std::move(Input)); 46 auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts)); 47 assert(It.second && "duplicate FileID"); 48 return {FID, It.first->second}; 49 } 50 51 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) { 52 assert(Tok != nullptr); 53 } 54 55 bool syntax::Leaf::classof(const Node *N) { 56 return N->getKind() == NodeKind::Leaf; 57 } 58 59 syntax::Node::Node(NodeKind Kind) 60 : Parent(nullptr), NextSibling(nullptr), Kind(static_cast<unsigned>(Kind)), 61 Role(0), Original(false), CanModify(false) { 62 this->setRole(NodeRole::Detached); 63 } 64 65 bool syntax::Node::isDetached() const { 66 return getRole() == NodeRole::Detached; 67 } 68 69 void syntax::Node::setRole(NodeRole NR) { 70 this->Role = static_cast<unsigned>(NR); 71 } 72 73 bool syntax::Tree::classof(const Node *N) { 74 return N->getKind() > NodeKind::Leaf; 75 } 76 77 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) { 78 assert(Child->getRole() == NodeRole::Detached); 79 assert(Role != NodeRole::Detached); 80 81 Child->setRole(Role); 82 prependChildLowLevel(Child); 83 } 84 85 void syntax::Tree::prependChildLowLevel(Node *Child) { 86 assert(Child->Parent == nullptr); 87 assert(Child->NextSibling == nullptr); 88 assert(Child->getRole() != NodeRole::Detached); 89 90 Child->Parent = this; 91 Child->NextSibling = this->FirstChild; 92 this->FirstChild = Child; 93 } 94 95 void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, 96 Node *New) { 97 assert((!BeforeBegin || BeforeBegin->Parent == this) && 98 "`BeforeBegin` is not a child of `this`."); 99 assert((!End || End->Parent == this) && "`End` is not a child of `this`."); 100 assert(canModify() && "Cannot modify `this`."); 101 102 Node *&Begin = BeforeBegin ? BeforeBegin->NextSibling : FirstChild; 103 104 #ifndef NDEBUG 105 for (auto *N = New; N; N = N->NextSibling) { 106 assert(N->Parent == nullptr); 107 assert(N->getRole() != NodeRole::Detached && "Roles must be set"); 108 // FIXME: sanity-check the role. 109 } 110 111 auto Reachable = [](Node *From, Node *N) { 112 if (!N) 113 return true; 114 for (auto *It = From; It; It = It->NextSibling) 115 if (It == N) 116 return true; 117 return false; 118 }; 119 assert(Reachable(FirstChild, BeforeBegin) && 120 "`BeforeBegin` is not reachable."); 121 assert(Reachable(Begin, End) && "`End` is not after `BeforeBegin`."); 122 #endif 123 124 if (!New && Begin == End) 125 return; 126 127 // Mark modification. 128 for (auto *T = this; T && T->Original; T = T->Parent) 129 T->Original = false; 130 131 // Detach old nodes. 132 for (auto *N = Begin; N != End;) { 133 auto *Next = N->NextSibling; 134 135 N->setRole(NodeRole::Detached); 136 N->Parent = nullptr; 137 N->NextSibling = nullptr; 138 if (N->Original) 139 traverse(N, [](Node *C) { C->Original = false; }); 140 141 N = Next; 142 } 143 144 if (!New) { 145 Begin = End; 146 return; 147 } 148 // Attach new nodes. 149 Begin = New; 150 auto *Last = New; 151 for (auto *N = New; N != nullptr; N = N->NextSibling) { 152 Last = N; 153 N->Parent = this; 154 } 155 Last->NextSibling = End; 156 } 157 158 namespace { 159 static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L, 160 const SourceManager &SM) { 161 assert(L); 162 const auto *Token = L->getToken(); 163 assert(Token); 164 // Handle 'eof' separately, calling text() on it produces an empty string. 165 if (Token->kind() == tok::eof) 166 OS << "<eof>"; 167 else 168 OS << Token->text(SM); 169 } 170 171 static void dumpNode(raw_ostream &OS, const syntax::Node *N, 172 const SourceManager &SM, std::vector<bool> IndentMask) { 173 auto DumpExtraInfo = [&OS](const syntax::Node *N) { 174 if (N->getRole() != syntax::NodeRole::Unknown) 175 OS << " " << N->getRole(); 176 if (!N->isOriginal()) 177 OS << " synthesized"; 178 if (!N->canModify()) 179 OS << " unmodifiable"; 180 }; 181 182 assert(N); 183 if (const auto *L = dyn_cast<syntax::Leaf>(N)) { 184 OS << "'"; 185 dumpLeaf(OS, L, SM); 186 OS << "'"; 187 DumpExtraInfo(N); 188 OS << "\n"; 189 return; 190 } 191 192 const auto *T = cast<syntax::Tree>(N); 193 OS << T->getKind(); 194 DumpExtraInfo(N); 195 OS << "\n"; 196 197 for (const syntax::Node &It : T->getChildren()) { 198 for (bool Filled : IndentMask) { 199 if (Filled) 200 OS << "| "; 201 else 202 OS << " "; 203 } 204 if (!It.getNextSibling()) { 205 OS << "`-"; 206 IndentMask.push_back(false); 207 } else { 208 OS << "|-"; 209 IndentMask.push_back(true); 210 } 211 dumpNode(OS, &It, SM, IndentMask); 212 IndentMask.pop_back(); 213 } 214 } 215 } // namespace 216 217 std::string syntax::Node::dump(const SourceManager &SM) const { 218 std::string Str; 219 llvm::raw_string_ostream OS(Str); 220 dumpNode(OS, this, SM, /*IndentMask=*/{}); 221 return std::move(OS.str()); 222 } 223 224 std::string syntax::Node::dumpTokens(const SourceManager &SM) const { 225 std::string Storage; 226 llvm::raw_string_ostream OS(Storage); 227 traverse(this, [&](const syntax::Node *N) { 228 if (const auto *L = dyn_cast<syntax::Leaf>(N)) { 229 dumpLeaf(OS, L, SM); 230 OS << " "; 231 } 232 }); 233 return OS.str(); 234 } 235 236 void syntax::Node::assertInvariants() const { 237 #ifndef NDEBUG 238 if (isDetached()) 239 assert(getParent() == nullptr); 240 else 241 assert(getParent() != nullptr); 242 243 const auto *T = dyn_cast<Tree>(this); 244 if (!T) 245 return; 246 for (const Node &C : T->getChildren()) { 247 if (T->isOriginal()) 248 assert(C.isOriginal()); 249 assert(!C.isDetached()); 250 assert(C.getParent() == T); 251 } 252 253 const auto *L = dyn_cast<List>(T); 254 if (!L) 255 return; 256 for (const Node &C : T->getChildren()) { 257 assert(C.getRole() == NodeRole::ListElement || 258 C.getRole() == NodeRole::ListDelimiter); 259 if (C.getRole() == NodeRole::ListDelimiter) { 260 assert(isa<Leaf>(C)); 261 assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind()); 262 } 263 } 264 265 #endif 266 } 267 268 void syntax::Node::assertInvariantsRecursive() const { 269 #ifndef NDEBUG 270 traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); }); 271 #endif 272 } 273 274 const syntax::Leaf *syntax::Tree::findFirstLeaf() const { 275 for (const Node &C : getChildren()) { 276 if (const auto *L = dyn_cast<syntax::Leaf>(&C)) 277 return L; 278 if (const auto *L = cast<syntax::Tree>(C).findFirstLeaf()) 279 return L; 280 } 281 return nullptr; 282 } 283 284 const syntax::Leaf *syntax::Tree::findLastLeaf() const { 285 const syntax::Leaf *Last = nullptr; 286 for (const Node &C : getChildren()) { 287 if (const auto *L = dyn_cast<syntax::Leaf>(&C)) 288 Last = L; 289 else if (const auto *L = cast<syntax::Tree>(C).findLastLeaf()) 290 Last = L; 291 } 292 return Last; 293 } 294 295 const syntax::Node *syntax::Tree::findChild(NodeRole R) const { 296 for (const Node &C : getChildren()) { 297 if (C.getRole() == R) 298 return &C; 299 } 300 return nullptr; 301 } 302 303 bool syntax::List::classof(const syntax::Node *N) { 304 switch (N->getKind()) { 305 case syntax::NodeKind::NestedNameSpecifier: 306 case syntax::NodeKind::CallArguments: 307 case syntax::NodeKind::ParameterDeclarationList: 308 return true; 309 default: 310 return false; 311 } 312 } 313 314 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>> 315 syntax::List::getElementsAsNodesAndDelimiters() { 316 if (!getFirstChild()) 317 return {}; 318 319 std::vector<syntax::List::ElementAndDelimiter<Node>> Children; 320 syntax::Node *ElementWithoutDelimiter = nullptr; 321 for (Node &C : getChildren()) { 322 switch (C.getRole()) { 323 case syntax::NodeRole::ListElement: { 324 if (ElementWithoutDelimiter) { 325 Children.push_back({ElementWithoutDelimiter, nullptr}); 326 } 327 ElementWithoutDelimiter = &C; 328 break; 329 } 330 case syntax::NodeRole::ListDelimiter: { 331 Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(&C)}); 332 ElementWithoutDelimiter = nullptr; 333 break; 334 } 335 default: 336 llvm_unreachable( 337 "A list can have only elements and delimiters as children."); 338 } 339 } 340 341 switch (getTerminationKind()) { 342 case syntax::List::TerminationKind::Separated: { 343 Children.push_back({ElementWithoutDelimiter, nullptr}); 344 break; 345 } 346 case syntax::List::TerminationKind::Terminated: 347 case syntax::List::TerminationKind::MaybeTerminated: { 348 if (ElementWithoutDelimiter) { 349 Children.push_back({ElementWithoutDelimiter, nullptr}); 350 } 351 break; 352 } 353 } 354 355 return Children; 356 } 357 358 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but 359 // ignoring delimiters 360 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() { 361 if (!getFirstChild()) 362 return {}; 363 364 std::vector<syntax::Node *> Children; 365 syntax::Node *ElementWithoutDelimiter = nullptr; 366 for (Node &C : getChildren()) { 367 switch (C.getRole()) { 368 case syntax::NodeRole::ListElement: { 369 if (ElementWithoutDelimiter) { 370 Children.push_back(ElementWithoutDelimiter); 371 } 372 ElementWithoutDelimiter = &C; 373 break; 374 } 375 case syntax::NodeRole::ListDelimiter: { 376 Children.push_back(ElementWithoutDelimiter); 377 ElementWithoutDelimiter = nullptr; 378 break; 379 } 380 default: 381 llvm_unreachable("A list has only elements or delimiters."); 382 } 383 } 384 385 switch (getTerminationKind()) { 386 case syntax::List::TerminationKind::Separated: { 387 Children.push_back(ElementWithoutDelimiter); 388 break; 389 } 390 case syntax::List::TerminationKind::Terminated: 391 case syntax::List::TerminationKind::MaybeTerminated: { 392 if (ElementWithoutDelimiter) { 393 Children.push_back(ElementWithoutDelimiter); 394 } 395 break; 396 } 397 } 398 399 return Children; 400 } 401 402 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const { 403 switch (this->getKind()) { 404 case NodeKind::NestedNameSpecifier: 405 return clang::tok::coloncolon; 406 case NodeKind::CallArguments: 407 case NodeKind::ParameterDeclarationList: 408 return clang::tok::comma; 409 default: 410 llvm_unreachable("This is not a subclass of List, thus " 411 "getDelimiterTokenKind() cannot be called"); 412 } 413 } 414 415 syntax::List::TerminationKind syntax::List::getTerminationKind() const { 416 switch (this->getKind()) { 417 case NodeKind::NestedNameSpecifier: 418 return TerminationKind::Terminated; 419 case NodeKind::CallArguments: 420 case NodeKind::ParameterDeclarationList: 421 return TerminationKind::Separated; 422 default: 423 llvm_unreachable("This is not a subclass of List, thus " 424 "getTerminationKind() cannot be called"); 425 } 426 } 427 428 bool syntax::List::canBeEmpty() const { 429 switch (this->getKind()) { 430 case NodeKind::NestedNameSpecifier: 431 return false; 432 case NodeKind::CallArguments: 433 return true; 434 case NodeKind::ParameterDeclarationList: 435 return true; 436 default: 437 llvm_unreachable("This is not a subclass of List, thus canBeEmpty() " 438 "cannot be called"); 439 } 440 } 441