1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/Tree.h"
9 #include "clang/Basic/TokenKinds.h"
10 #include "clang/Tooling/Syntax/Nodes.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/Support/Casting.h"
14 #include <cassert>
15 
16 using namespace clang;
17 
18 namespace {
19 static void traverse(const syntax::Node *N,
20                      llvm::function_ref<void(const syntax::Node *)> Visit) {
21   if (auto *T = dyn_cast<syntax::Tree>(N)) {
22     for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling())
23       traverse(C, Visit);
24   }
25   Visit(N);
26 }
27 static void traverse(syntax::Node *N,
28                      llvm::function_ref<void(syntax::Node *)> Visit) {
29   traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) {
30     Visit(const_cast<syntax::Node *>(N));
31   });
32 }
33 } // namespace
34 
35 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
36                      const TokenBuffer &Tokens)
37     : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
38 
39 const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
40   return Tokens;
41 }
42 
43 std::pair<FileID, ArrayRef<syntax::Token>>
44 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
45   auto FID = SourceMgr.createFileID(std::move(Input));
46   auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
47   assert(It.second && "duplicate FileID");
48   return {FID, It.first->second};
49 }
50 
51 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
52   assert(Tok != nullptr);
53 }
54 
55 bool syntax::Leaf::classof(const Node *N) {
56   return N->getKind() == NodeKind::Leaf;
57 }
58 
59 syntax::Node::Node(NodeKind Kind)
60     : Parent(nullptr), NextSibling(nullptr), Kind(static_cast<unsigned>(Kind)),
61       Role(0), Original(false), CanModify(false) {
62   this->setRole(NodeRole::Detached);
63 }
64 
65 bool syntax::Node::isDetached() const {
66   return getRole() == NodeRole::Detached;
67 }
68 
69 void syntax::Node::setRole(NodeRole NR) {
70   this->Role = static_cast<unsigned>(NR);
71 }
72 
73 bool syntax::Tree::classof(const Node *N) {
74   return N->getKind() > NodeKind::Leaf;
75 }
76 
77 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
78   assert(Child->getRole() == NodeRole::Detached);
79   assert(Role != NodeRole::Detached);
80 
81   Child->setRole(Role);
82   prependChildLowLevel(Child);
83 }
84 
85 void syntax::Tree::prependChildLowLevel(Node *Child) {
86   assert(Child->Parent == nullptr);
87   assert(Child->NextSibling == nullptr);
88   assert(Child->getRole() != NodeRole::Detached);
89 
90   Child->Parent = this;
91   Child->NextSibling = this->FirstChild;
92   this->FirstChild = Child;
93 }
94 
95 void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End,
96                                              Node *New) {
97   assert(!BeforeBegin || BeforeBegin->Parent == this);
98 
99 #ifndef NDEBUG
100   for (auto *N = New; N; N = N->getNextSibling()) {
101     assert(N->Parent == nullptr);
102     assert(N->getRole() != NodeRole::Detached && "Roles must be set");
103     // FIXME: sanity-check the role.
104   }
105 #endif
106 
107   // Detach old nodes.
108   for (auto *N = !BeforeBegin ? FirstChild : BeforeBegin->getNextSibling();
109        N != End;) {
110     auto *Next = N->NextSibling;
111 
112     N->setRole(NodeRole::Detached);
113     N->Parent = nullptr;
114     N->NextSibling = nullptr;
115     if (N->Original)
116       traverse(N, [&](Node *C) { C->Original = false; });
117 
118     N = Next;
119   }
120 
121   // Attach new nodes.
122   if (BeforeBegin)
123     BeforeBegin->NextSibling = New ? New : End;
124   else
125     FirstChild = New ? New : End;
126 
127   if (New) {
128     auto *Last = New;
129     for (auto *N = New; N != nullptr; N = N->getNextSibling()) {
130       Last = N;
131       N->Parent = this;
132     }
133     Last->NextSibling = End;
134   }
135 
136   // Mark the node as modified.
137   for (auto *T = this; T && T->Original; T = T->Parent)
138     T->Original = false;
139 }
140 
141 namespace {
142 static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
143                      const SourceManager &SM) {
144   assert(L);
145   const auto *Token = L->getToken();
146   assert(Token);
147   // Handle 'eof' separately, calling text() on it produces an empty string.
148   if (Token->kind() == tok::eof)
149     OS << "<eof>";
150   else
151     OS << Token->text(SM);
152 }
153 
154 static void dumpNode(raw_ostream &OS, const syntax::Node *N,
155                      const SourceManager &SM, std::vector<bool> IndentMask) {
156   auto DumpExtraInfo = [&OS](const syntax::Node *N) {
157     if (N->getRole() != syntax::NodeRole::Unknown)
158       OS << " " << N->getRole();
159     if (!N->isOriginal())
160       OS << " synthesized";
161     if (!N->canModify())
162       OS << " unmodifiable";
163   };
164 
165   assert(N);
166   if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
167     OS << "'";
168     dumpLeaf(OS, L, SM);
169     OS << "'";
170     DumpExtraInfo(N);
171     OS << "\n";
172     return;
173   }
174 
175   const auto *T = cast<syntax::Tree>(N);
176   OS << T->getKind();
177   DumpExtraInfo(N);
178   OS << "\n";
179 
180   for (const auto *It = T->getFirstChild(); It; It = It->getNextSibling()) {
181     for (bool Filled : IndentMask) {
182       if (Filled)
183         OS << "| ";
184       else
185         OS << "  ";
186     }
187     if (!It->getNextSibling()) {
188       OS << "`-";
189       IndentMask.push_back(false);
190     } else {
191       OS << "|-";
192       IndentMask.push_back(true);
193     }
194     dumpNode(OS, It, SM, IndentMask);
195     IndentMask.pop_back();
196   }
197 }
198 } // namespace
199 
200 std::string syntax::Node::dump(const SourceManager &SM) const {
201   std::string Str;
202   llvm::raw_string_ostream OS(Str);
203   dumpNode(OS, this, SM, /*IndentMask=*/{});
204   return std::move(OS.str());
205 }
206 
207 std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
208   std::string Storage;
209   llvm::raw_string_ostream OS(Storage);
210   traverse(this, [&](const syntax::Node *N) {
211     if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
212       dumpLeaf(OS, L, SM);
213       OS << " ";
214     }
215   });
216   return OS.str();
217 }
218 
219 void syntax::Node::assertInvariants() const {
220 #ifndef NDEBUG
221   if (isDetached())
222     assert(getParent() == nullptr);
223   else
224     assert(getParent() != nullptr);
225 
226   const auto *T = dyn_cast<Tree>(this);
227   if (!T)
228     return;
229   for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling()) {
230     if (T->isOriginal())
231       assert(C->isOriginal());
232     assert(!C->isDetached());
233     assert(C->getParent() == T);
234   }
235 
236   const auto *L = dyn_cast<List>(T);
237   if (!L)
238     return;
239   for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling()) {
240     assert(C->getRole() == NodeRole::ListElement ||
241            C->getRole() == NodeRole::ListDelimiter);
242     if (C->getRole() == NodeRole::ListDelimiter) {
243       assert(isa<Leaf>(C));
244       assert(cast<Leaf>(C)->getToken()->kind() == L->getDelimiterTokenKind());
245     }
246   }
247 
248 #endif
249 }
250 
251 void syntax::Node::assertInvariantsRecursive() const {
252 #ifndef NDEBUG
253   traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); });
254 #endif
255 }
256 
257 syntax::Leaf *syntax::Tree::findFirstLeaf() {
258   for (auto *C = getFirstChild(); C; C = C->getNextSibling()) {
259     if (auto *L = dyn_cast<syntax::Leaf>(C))
260       return L;
261     if (auto *L = cast<syntax::Tree>(C)->findFirstLeaf())
262       return L;
263   }
264   return nullptr;
265 }
266 
267 syntax::Leaf *syntax::Tree::findLastLeaf() {
268   syntax::Leaf *Last = nullptr;
269   for (auto *C = getFirstChild(); C; C = C->getNextSibling()) {
270     if (auto *L = dyn_cast<syntax::Leaf>(C))
271       Last = L;
272     else if (auto *L = cast<syntax::Tree>(C)->findLastLeaf())
273       Last = L;
274   }
275   return Last;
276 }
277 
278 syntax::Node *syntax::Tree::findChild(NodeRole R) {
279   for (auto *C = FirstChild; C; C = C->getNextSibling()) {
280     if (C->getRole() == R)
281       return C;
282   }
283   return nullptr;
284 }
285 
286 bool syntax::List::classof(const syntax::Node *N) {
287   switch (N->getKind()) {
288   case syntax::NodeKind::NestedNameSpecifier:
289   case syntax::NodeKind::CallArguments:
290   case syntax::NodeKind::ParameterDeclarationList:
291     return true;
292   default:
293     return false;
294   }
295 }
296 
297 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>>
298 syntax::List::getElementsAsNodesAndDelimiters() {
299   if (!getFirstChild())
300     return {};
301 
302   std::vector<syntax::List::ElementAndDelimiter<Node>> Children;
303   syntax::Node *ElementWithoutDelimiter = nullptr;
304   for (auto *C = getFirstChild(); C; C = C->getNextSibling()) {
305     switch (C->getRole()) {
306     case syntax::NodeRole::ListElement: {
307       if (ElementWithoutDelimiter) {
308         Children.push_back({ElementWithoutDelimiter, nullptr});
309       }
310       ElementWithoutDelimiter = C;
311       break;
312     }
313     case syntax::NodeRole::ListDelimiter: {
314       Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(C)});
315       ElementWithoutDelimiter = nullptr;
316       break;
317     }
318     default:
319       llvm_unreachable(
320           "A list can have only elements and delimiters as children.");
321     }
322   }
323 
324   switch (getTerminationKind()) {
325   case syntax::List::TerminationKind::Separated: {
326     Children.push_back({ElementWithoutDelimiter, nullptr});
327     break;
328   }
329   case syntax::List::TerminationKind::Terminated:
330   case syntax::List::TerminationKind::MaybeTerminated: {
331     if (ElementWithoutDelimiter) {
332       Children.push_back({ElementWithoutDelimiter, nullptr});
333     }
334     break;
335   }
336   }
337 
338   return Children;
339 }
340 
341 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but
342 // ignoring delimiters
343 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() {
344   if (!getFirstChild())
345     return {};
346 
347   std::vector<syntax::Node *> Children;
348   syntax::Node *ElementWithoutDelimiter = nullptr;
349   for (auto *C = getFirstChild(); C; C = C->getNextSibling()) {
350     switch (C->getRole()) {
351     case syntax::NodeRole::ListElement: {
352       if (ElementWithoutDelimiter) {
353         Children.push_back(ElementWithoutDelimiter);
354       }
355       ElementWithoutDelimiter = C;
356       break;
357     }
358     case syntax::NodeRole::ListDelimiter: {
359       Children.push_back(ElementWithoutDelimiter);
360       ElementWithoutDelimiter = nullptr;
361       break;
362     }
363     default:
364       llvm_unreachable("A list has only elements or delimiters.");
365     }
366   }
367 
368   switch (getTerminationKind()) {
369   case syntax::List::TerminationKind::Separated: {
370     Children.push_back(ElementWithoutDelimiter);
371     break;
372   }
373   case syntax::List::TerminationKind::Terminated:
374   case syntax::List::TerminationKind::MaybeTerminated: {
375     if (ElementWithoutDelimiter) {
376       Children.push_back(ElementWithoutDelimiter);
377     }
378     break;
379   }
380   }
381 
382   return Children;
383 }
384 
385 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const {
386   switch (this->getKind()) {
387   case NodeKind::NestedNameSpecifier:
388     return clang::tok::coloncolon;
389   case NodeKind::CallArguments:
390   case NodeKind::ParameterDeclarationList:
391     return clang::tok::comma;
392   default:
393     llvm_unreachable("This is not a subclass of List, thus "
394                      "getDelimiterTokenKind() cannot be called");
395   }
396 }
397 
398 syntax::List::TerminationKind syntax::List::getTerminationKind() const {
399   switch (this->getKind()) {
400   case NodeKind::NestedNameSpecifier:
401     return TerminationKind::Terminated;
402   case NodeKind::CallArguments:
403   case NodeKind::ParameterDeclarationList:
404     return TerminationKind::Separated;
405   default:
406     llvm_unreachable("This is not a subclass of List, thus "
407                      "getTerminationKind() cannot be called");
408   }
409 }
410 
411 bool syntax::List::canBeEmpty() const {
412   switch (this->getKind()) {
413   case NodeKind::NestedNameSpecifier:
414     return false;
415   case NodeKind::CallArguments:
416     return true;
417   case NodeKind::ParameterDeclarationList:
418     return true;
419   default:
420     llvm_unreachable("This is not a subclass of List, thus canBeEmpty() "
421                      "cannot be called");
422   }
423 }
424