1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/Tree.h"
9 #include "clang/Basic/TokenKinds.h"
10 #include "clang/Tooling/Syntax/Nodes.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/Support/Casting.h"
14 #include <cassert>
15 
16 using namespace clang;
17 
18 namespace {
19 static void traverse(const syntax::Node *N,
20                      llvm::function_ref<void(const syntax::Node *)> Visit) {
21   if (auto *T = dyn_cast<syntax::Tree>(N)) {
22     for (const syntax::Node &C : T->getChildren())
23       traverse(&C, Visit);
24   }
25   Visit(N);
26 }
27 static void traverse(syntax::Node *N,
28                      llvm::function_ref<void(syntax::Node *)> Visit) {
29   traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) {
30     Visit(const_cast<syntax::Node *>(N));
31   });
32 }
33 } // namespace
34 
35 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
36                      const TokenBuffer &Tokens)
37     : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
38 
39 const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
40   return Tokens;
41 }
42 
43 std::pair<FileID, ArrayRef<syntax::Token>>
44 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
45   auto FID = SourceMgr.createFileID(std::move(Input));
46   auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
47   assert(It.second && "duplicate FileID");
48   return {FID, It.first->second};
49 }
50 
51 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
52   assert(Tok != nullptr);
53 }
54 
55 bool syntax::Leaf::classof(const Node *N) {
56   return N->getKind() == NodeKind::Leaf;
57 }
58 
59 syntax::Node::Node(NodeKind Kind)
60     : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr),
61       Kind(static_cast<unsigned>(Kind)), Role(0), Original(false),
62       CanModify(false) {
63   this->setRole(NodeRole::Detached);
64 }
65 
66 bool syntax::Node::isDetached() const {
67   return getRole() == NodeRole::Detached;
68 }
69 
70 void syntax::Node::setRole(NodeRole NR) {
71   this->Role = static_cast<unsigned>(NR);
72 }
73 
74 bool syntax::Tree::classof(const Node *N) {
75   return N->getKind() > NodeKind::Leaf;
76 }
77 
78 void syntax::Tree::appendChildLowLevel(Node *Child, NodeRole Role) {
79   assert(Child->getRole() == NodeRole::Detached);
80   assert(Role != NodeRole::Detached);
81 
82   Child->setRole(Role);
83   appendChildLowLevel(Child);
84 }
85 
86 void syntax::Tree::appendChildLowLevel(Node *Child) {
87   assert(Child->Parent == nullptr);
88   assert(Child->NextSibling == nullptr);
89   assert(Child->PreviousSibling == nullptr);
90   assert(Child->getRole() != NodeRole::Detached);
91 
92   Child->Parent = this;
93   if (this->LastChild) {
94     Child->PreviousSibling = this->LastChild;
95     this->LastChild->NextSibling = Child;
96   } else
97     this->FirstChild = Child;
98 
99   this->LastChild = Child;
100 }
101 
102 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
103   assert(Child->getRole() == NodeRole::Detached);
104   assert(Role != NodeRole::Detached);
105 
106   Child->setRole(Role);
107   prependChildLowLevel(Child);
108 }
109 
110 void syntax::Tree::prependChildLowLevel(Node *Child) {
111   assert(Child->Parent == nullptr);
112   assert(Child->NextSibling == nullptr);
113   assert(Child->PreviousSibling == nullptr);
114   assert(Child->getRole() != NodeRole::Detached);
115 
116   Child->Parent = this;
117   if (this->FirstChild) {
118     Child->NextSibling = this->FirstChild;
119     this->FirstChild->PreviousSibling = Child;
120   } else
121     this->LastChild = Child;
122 
123   this->FirstChild = Child;
124 }
125 
126 void syntax::Tree::replaceChildRangeLowLevel(Node *Begin, Node *End,
127                                              Node *New) {
128   assert((!Begin || Begin->Parent == this) &&
129          "`Begin` is not a child of `this`.");
130   assert((!End || End->Parent == this) && "`End` is not a child of `this`.");
131   assert(canModify() && "Cannot modify `this`.");
132 
133 #ifndef NDEBUG
134   for (auto *N = New; N; N = N->NextSibling) {
135     assert(N->Parent == nullptr);
136     assert(N->getRole() != NodeRole::Detached && "Roles must be set");
137     // FIXME: sanity-check the role.
138   }
139 
140   auto Reachable = [](Node *From, Node *N) {
141     if (!N)
142       return true;
143     for (auto *It = From; It; It = It->NextSibling)
144       if (It == N)
145         return true;
146     return false;
147   };
148   assert(Reachable(FirstChild, Begin) && "`Begin` is not reachable.");
149   assert(Reachable(Begin, End) && "`End` is not after `Begin`.");
150 #endif
151 
152   if (!New && Begin == End)
153     return;
154 
155   // Mark modification.
156   for (auto *T = this; T && T->Original; T = T->Parent)
157     T->Original = false;
158 
159   // Save the node before the range to be removed. Later we insert the `New`
160   // range after this node.
161   auto *BeforeBegin = Begin ? Begin->PreviousSibling : LastChild;
162 
163   // Detach old nodes.
164   for (auto *N = Begin; N != End;) {
165     auto *Next = N->NextSibling;
166 
167     N->setRole(NodeRole::Detached);
168     N->Parent = nullptr;
169     N->NextSibling = nullptr;
170     N->PreviousSibling = nullptr;
171     if (N->Original)
172       traverse(N, [](Node *C) { C->Original = false; });
173 
174     N = Next;
175   }
176 
177   // Attach new range.
178   auto *&NewFirst = BeforeBegin ? BeforeBegin->NextSibling : FirstChild;
179   auto *&NewLast = End ? End->PreviousSibling : LastChild;
180 
181   if (!New) {
182     NewFirst = End;
183     NewLast = BeforeBegin;
184     return;
185   }
186 
187   New->PreviousSibling = BeforeBegin;
188   NewFirst = New;
189 
190   Node *LastInNew;
191   for (auto *N = New; N != nullptr; N = N->NextSibling) {
192     LastInNew = N;
193     N->Parent = this;
194   }
195   LastInNew->NextSibling = End;
196   NewLast = LastInNew;
197 }
198 
199 namespace {
200 static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
201                      const SourceManager &SM) {
202   assert(L);
203   const auto *Token = L->getToken();
204   assert(Token);
205   // Handle 'eof' separately, calling text() on it produces an empty string.
206   if (Token->kind() == tok::eof)
207     OS << "<eof>";
208   else
209     OS << Token->text(SM);
210 }
211 
212 static void dumpNode(raw_ostream &OS, const syntax::Node *N,
213                      const SourceManager &SM, std::vector<bool> IndentMask) {
214   auto DumpExtraInfo = [&OS](const syntax::Node *N) {
215     if (N->getRole() != syntax::NodeRole::Unknown)
216       OS << " " << N->getRole();
217     if (!N->isOriginal())
218       OS << " synthesized";
219     if (!N->canModify())
220       OS << " unmodifiable";
221   };
222 
223   assert(N);
224   if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
225     OS << "'";
226     dumpLeaf(OS, L, SM);
227     OS << "'";
228     DumpExtraInfo(N);
229     OS << "\n";
230     return;
231   }
232 
233   const auto *T = cast<syntax::Tree>(N);
234   OS << T->getKind();
235   DumpExtraInfo(N);
236   OS << "\n";
237 
238   for (const syntax::Node &It : T->getChildren()) {
239     for (bool Filled : IndentMask) {
240       if (Filled)
241         OS << "| ";
242       else
243         OS << "  ";
244     }
245     if (!It.getNextSibling()) {
246       OS << "`-";
247       IndentMask.push_back(false);
248     } else {
249       OS << "|-";
250       IndentMask.push_back(true);
251     }
252     dumpNode(OS, &It, SM, IndentMask);
253     IndentMask.pop_back();
254   }
255 }
256 } // namespace
257 
258 std::string syntax::Node::dump(const SourceManager &SM) const {
259   std::string Str;
260   llvm::raw_string_ostream OS(Str);
261   dumpNode(OS, this, SM, /*IndentMask=*/{});
262   return std::move(OS.str());
263 }
264 
265 std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
266   std::string Storage;
267   llvm::raw_string_ostream OS(Storage);
268   traverse(this, [&](const syntax::Node *N) {
269     if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
270       dumpLeaf(OS, L, SM);
271       OS << " ";
272     }
273   });
274   return OS.str();
275 }
276 
277 void syntax::Node::assertInvariants() const {
278 #ifndef NDEBUG
279   if (isDetached())
280     assert(getParent() == nullptr);
281   else
282     assert(getParent() != nullptr);
283 
284   const auto *T = dyn_cast<Tree>(this);
285   if (!T)
286     return;
287   for (const Node &C : T->getChildren()) {
288     if (T->isOriginal())
289       assert(C.isOriginal());
290     assert(!C.isDetached());
291     assert(C.getParent() == T);
292     const auto *Next = C.getNextSibling();
293     assert(!Next || &C == Next->getPreviousSibling());
294     if (!C.getNextSibling())
295       assert(&C == T->getLastChild() &&
296              "Last child is reachable by advancing from the first child.");
297   }
298 
299   const auto *L = dyn_cast<List>(T);
300   if (!L)
301     return;
302   for (const Node &C : T->getChildren()) {
303     assert(C.getRole() == NodeRole::ListElement ||
304            C.getRole() == NodeRole::ListDelimiter);
305     if (C.getRole() == NodeRole::ListDelimiter) {
306       assert(isa<Leaf>(C));
307       assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
308     }
309   }
310 
311 #endif
312 }
313 
314 void syntax::Node::assertInvariantsRecursive() const {
315 #ifndef NDEBUG
316   traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); });
317 #endif
318 }
319 
320 const syntax::Leaf *syntax::Tree::findFirstLeaf() const {
321   for (const Node &C : getChildren()) {
322     if (const auto *L = dyn_cast<syntax::Leaf>(&C))
323       return L;
324     if (const auto *L = cast<syntax::Tree>(C).findFirstLeaf())
325       return L;
326   }
327   return nullptr;
328 }
329 
330 const syntax::Leaf *syntax::Tree::findLastLeaf() const {
331   for (const auto *C = getLastChild(); C; C = C->getPreviousSibling()) {
332     if (const auto *L = dyn_cast<syntax::Leaf>(C))
333       return L;
334     if (const auto *L = cast<syntax::Tree>(C)->findLastLeaf())
335       return L;
336   }
337   return nullptr;
338 }
339 
340 const syntax::Node *syntax::Tree::findChild(NodeRole R) const {
341   for (const Node &C : getChildren()) {
342     if (C.getRole() == R)
343       return &C;
344   }
345   return nullptr;
346 }
347 
348 bool syntax::List::classof(const syntax::Node *N) {
349   switch (N->getKind()) {
350   case syntax::NodeKind::NestedNameSpecifier:
351   case syntax::NodeKind::CallArguments:
352   case syntax::NodeKind::ParameterDeclarationList:
353   case syntax::NodeKind::DeclaratorList:
354     return true;
355   default:
356     return false;
357   }
358 }
359 
360 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>>
361 syntax::List::getElementsAsNodesAndDelimiters() {
362   if (!getFirstChild())
363     return {};
364 
365   std::vector<syntax::List::ElementAndDelimiter<Node>> Children;
366   syntax::Node *ElementWithoutDelimiter = nullptr;
367   for (Node &C : getChildren()) {
368     switch (C.getRole()) {
369     case syntax::NodeRole::ListElement: {
370       if (ElementWithoutDelimiter) {
371         Children.push_back({ElementWithoutDelimiter, nullptr});
372       }
373       ElementWithoutDelimiter = &C;
374       break;
375     }
376     case syntax::NodeRole::ListDelimiter: {
377       Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(&C)});
378       ElementWithoutDelimiter = nullptr;
379       break;
380     }
381     default:
382       llvm_unreachable(
383           "A list can have only elements and delimiters as children.");
384     }
385   }
386 
387   switch (getTerminationKind()) {
388   case syntax::List::TerminationKind::Separated: {
389     Children.push_back({ElementWithoutDelimiter, nullptr});
390     break;
391   }
392   case syntax::List::TerminationKind::Terminated:
393   case syntax::List::TerminationKind::MaybeTerminated: {
394     if (ElementWithoutDelimiter) {
395       Children.push_back({ElementWithoutDelimiter, nullptr});
396     }
397     break;
398   }
399   }
400 
401   return Children;
402 }
403 
404 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but
405 // ignoring delimiters
406 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() {
407   if (!getFirstChild())
408     return {};
409 
410   std::vector<syntax::Node *> Children;
411   syntax::Node *ElementWithoutDelimiter = nullptr;
412   for (Node &C : getChildren()) {
413     switch (C.getRole()) {
414     case syntax::NodeRole::ListElement: {
415       if (ElementWithoutDelimiter) {
416         Children.push_back(ElementWithoutDelimiter);
417       }
418       ElementWithoutDelimiter = &C;
419       break;
420     }
421     case syntax::NodeRole::ListDelimiter: {
422       Children.push_back(ElementWithoutDelimiter);
423       ElementWithoutDelimiter = nullptr;
424       break;
425     }
426     default:
427       llvm_unreachable("A list has only elements or delimiters.");
428     }
429   }
430 
431   switch (getTerminationKind()) {
432   case syntax::List::TerminationKind::Separated: {
433     Children.push_back(ElementWithoutDelimiter);
434     break;
435   }
436   case syntax::List::TerminationKind::Terminated:
437   case syntax::List::TerminationKind::MaybeTerminated: {
438     if (ElementWithoutDelimiter) {
439       Children.push_back(ElementWithoutDelimiter);
440     }
441     break;
442   }
443   }
444 
445   return Children;
446 }
447 
448 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const {
449   switch (this->getKind()) {
450   case NodeKind::NestedNameSpecifier:
451     return clang::tok::coloncolon;
452   case NodeKind::CallArguments:
453   case NodeKind::ParameterDeclarationList:
454   case NodeKind::DeclaratorList:
455     return clang::tok::comma;
456   default:
457     llvm_unreachable("This is not a subclass of List, thus "
458                      "getDelimiterTokenKind() cannot be called");
459   }
460 }
461 
462 syntax::List::TerminationKind syntax::List::getTerminationKind() const {
463   switch (this->getKind()) {
464   case NodeKind::NestedNameSpecifier:
465     return TerminationKind::Terminated;
466   case NodeKind::CallArguments:
467   case NodeKind::ParameterDeclarationList:
468   case NodeKind::DeclaratorList:
469     return TerminationKind::Separated;
470   default:
471     llvm_unreachable("This is not a subclass of List, thus "
472                      "getTerminationKind() cannot be called");
473   }
474 }
475 
476 bool syntax::List::canBeEmpty() const {
477   switch (this->getKind()) {
478   case NodeKind::NestedNameSpecifier:
479     return false;
480   case NodeKind::CallArguments:
481     return true;
482   case NodeKind::ParameterDeclarationList:
483     return true;
484   case NodeKind::DeclaratorList:
485     return true;
486   default:
487     llvm_unreachable("This is not a subclass of List, thus canBeEmpty() "
488                      "cannot be called");
489   }
490 }
491