1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/Tree.h"
9 #include "clang/Basic/TokenKinds.h"
10 #include "clang/Tooling/Syntax/Nodes.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/Support/Casting.h"
14 #include <cassert>
15 
16 using namespace clang;
17 
18 namespace {
19 static void traverse(const syntax::Node *N,
20                      llvm::function_ref<void(const syntax::Node *)> Visit) {
21   if (auto *T = dyn_cast<syntax::Tree>(N)) {
22     for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling())
23       traverse(C, Visit);
24   }
25   Visit(N);
26 }
27 static void traverse(syntax::Node *N,
28                      llvm::function_ref<void(syntax::Node *)> Visit) {
29   traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) {
30     Visit(const_cast<syntax::Node *>(N));
31   });
32 }
33 } // namespace
34 
35 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
36                      const TokenBuffer &Tokens)
37     : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
38 
39 const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
40   return Tokens;
41 }
42 
43 std::pair<FileID, ArrayRef<syntax::Token>>
44 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
45   auto FID = SourceMgr.createFileID(std::move(Input));
46   auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
47   assert(It.second && "duplicate FileID");
48   return {FID, It.first->second};
49 }
50 
51 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
52   assert(Tok != nullptr);
53 }
54 
55 bool syntax::Leaf::classof(const Node *N) {
56   return N->getKind() == NodeKind::Leaf;
57 }
58 
59 syntax::Node::Node(NodeKind Kind)
60     : Parent(nullptr), NextSibling(nullptr), Kind(static_cast<unsigned>(Kind)),
61       Role(0), Original(false), CanModify(false) {
62   this->setRole(NodeRole::Detached);
63 }
64 
65 bool syntax::Node::isDetached() const {
66   return getRole() == NodeRole::Detached;
67 }
68 
69 void syntax::Node::setRole(NodeRole NR) {
70   this->Role = static_cast<unsigned>(NR);
71 }
72 
73 bool syntax::Tree::classof(const Node *N) {
74   return N->getKind() > NodeKind::Leaf;
75 }
76 
77 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
78   assert(Child->getRole() == NodeRole::Detached);
79   assert(Role != NodeRole::Detached);
80 
81   Child->setRole(Role);
82   prependChildLowLevel(Child);
83 }
84 
85 void syntax::Tree::prependChildLowLevel(Node *Child) {
86   assert(Child->Parent == nullptr);
87   assert(Child->NextSibling == nullptr);
88   assert(Child->getRole() != NodeRole::Detached);
89 
90   Child->Parent = this;
91   Child->NextSibling = this->FirstChild;
92   this->FirstChild = Child;
93 }
94 
95 void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End,
96                                              Node *New) {
97   assert(!BeforeBegin || BeforeBegin->Parent == this);
98 
99 #ifndef NDEBUG
100   for (auto *N = New; N; N = N->getNextSibling()) {
101     assert(N->Parent == nullptr);
102     assert(N->getRole() != NodeRole::Detached && "Roles must be set");
103     // FIXME: sanity-check the role.
104   }
105 #endif
106 
107   // Detach old nodes.
108   for (auto *N = !BeforeBegin ? FirstChild : BeforeBegin->getNextSibling();
109        N != End;) {
110     auto *Next = N->NextSibling;
111 
112     N->setRole(NodeRole::Detached);
113     N->Parent = nullptr;
114     N->NextSibling = nullptr;
115     if (N->Original)
116       traverse(N, [&](Node *C) { C->Original = false; });
117 
118     N = Next;
119   }
120 
121   // Attach new nodes.
122   if (BeforeBegin)
123     BeforeBegin->NextSibling = New ? New : End;
124   else
125     FirstChild = New ? New : End;
126 
127   if (New) {
128     auto *Last = New;
129     for (auto *N = New; N != nullptr; N = N->getNextSibling()) {
130       Last = N;
131       N->Parent = this;
132     }
133     Last->NextSibling = End;
134   }
135 
136   // Mark the node as modified.
137   for (auto *T = this; T && T->Original; T = T->Parent)
138     T->Original = false;
139 }
140 
141 namespace {
142 static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
143                      const SourceManager &SM) {
144   assert(L);
145   const auto *Token = L->getToken();
146   assert(Token);
147   // Handle 'eof' separately, calling text() on it produces an empty string.
148   if (Token->kind() == tok::eof)
149     OS << "<eof>";
150   else
151     OS << Token->text(SM);
152 }
153 
154 static void dumpNode(raw_ostream &OS, const syntax::Node *N,
155                      const SourceManager &SM, std::vector<bool> IndentMask) {
156   auto DumpExtraInfo = [&OS](const syntax::Node *N) {
157     if (N->getRole() != syntax::NodeRole::Unknown)
158       OS << " " << N->getRole();
159     if (!N->isOriginal())
160       OS << " synthesized";
161     if (!N->canModify())
162       OS << " unmodifiable";
163   };
164 
165   assert(N);
166   if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
167     OS << "'";
168     dumpLeaf(OS, L, SM);
169     OS << "'";
170     DumpExtraInfo(N);
171     OS << "\n";
172     return;
173   }
174 
175   const auto *T = cast<syntax::Tree>(N);
176   OS << T->getKind();
177   DumpExtraInfo(N);
178   OS << "\n";
179 
180   for (const auto *It = T->getFirstChild(); It; It = It->getNextSibling()) {
181     for (bool Filled : IndentMask) {
182       if (Filled)
183         OS << "| ";
184       else
185         OS << "  ";
186     }
187     if (!It->getNextSibling()) {
188       OS << "`-";
189       IndentMask.push_back(false);
190     } else {
191       OS << "|-";
192       IndentMask.push_back(true);
193     }
194     dumpNode(OS, It, SM, IndentMask);
195     IndentMask.pop_back();
196   }
197 }
198 } // namespace
199 
200 std::string syntax::Node::dump(const SourceManager &SM) const {
201   std::string Str;
202   llvm::raw_string_ostream OS(Str);
203   dumpNode(OS, this, SM, /*IndentMask=*/{});
204   return std::move(OS.str());
205 }
206 
207 std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
208   std::string Storage;
209   llvm::raw_string_ostream OS(Storage);
210   traverse(this, [&](const syntax::Node *N) {
211     if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
212       dumpLeaf(OS, L, SM);
213       OS << " ";
214     }
215   });
216   return OS.str();
217 }
218 
219 void syntax::Node::assertInvariants() const {
220 #ifndef NDEBUG
221   if (isDetached())
222     assert(getParent() == nullptr);
223   else
224     assert(getParent() != nullptr);
225 
226   const auto *T = dyn_cast<Tree>(this);
227   if (!T)
228     return;
229   for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling()) {
230     if (T->isOriginal())
231       assert(C->isOriginal());
232     assert(!C->isDetached());
233     assert(C->getParent() == T);
234   }
235 
236   const auto *L = dyn_cast<List>(T);
237   if (!L)
238     return;
239   for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling()) {
240     assert(C->getRole() == NodeRole::ListElement ||
241            C->getRole() == NodeRole::ListDelimiter);
242     if (C->getRole() == NodeRole::ListDelimiter) {
243       assert(isa<Leaf>(C));
244       assert(cast<Leaf>(C)->getToken()->kind() == L->getDelimiterTokenKind());
245     }
246   }
247 
248 #endif
249 }
250 
251 void syntax::Node::assertInvariantsRecursive() const {
252 #ifndef NDEBUG
253   traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); });
254 #endif
255 }
256 
257 syntax::Leaf *syntax::Tree::findFirstLeaf() {
258   auto *T = this;
259   while (auto *C = T->getFirstChild()) {
260     if (auto *L = dyn_cast<syntax::Leaf>(C))
261       return L;
262     T = cast<syntax::Tree>(C);
263   }
264   return nullptr;
265 }
266 
267 syntax::Leaf *syntax::Tree::findLastLeaf() {
268   auto *T = this;
269   while (auto *C = T->getFirstChild()) {
270     // Find the last child.
271     while (auto *Next = C->getNextSibling())
272       C = Next;
273 
274     if (auto *L = dyn_cast<syntax::Leaf>(C))
275       return L;
276     T = cast<syntax::Tree>(C);
277   }
278   return nullptr;
279 }
280 
281 syntax::Node *syntax::Tree::findChild(NodeRole R) {
282   for (auto *C = FirstChild; C; C = C->getNextSibling()) {
283     if (C->getRole() == R)
284       return C;
285   }
286   return nullptr;
287 }
288 
289 bool syntax::List::classof(const syntax::Node *N) {
290   switch (N->getKind()) {
291   case syntax::NodeKind::NestedNameSpecifier:
292   case syntax::NodeKind::CallArguments:
293   case syntax::NodeKind::ParameterDeclarationList:
294     return true;
295   default:
296     return false;
297   }
298 }
299 
300 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>>
301 syntax::List::getElementsAsNodesAndDelimiters() {
302   if (!getFirstChild())
303     return {};
304 
305   std::vector<syntax::List::ElementAndDelimiter<Node>> Children;
306   syntax::Node *ElementWithoutDelimiter = nullptr;
307   for (auto *C = getFirstChild(); C; C = C->getNextSibling()) {
308     switch (C->getRole()) {
309     case syntax::NodeRole::ListElement: {
310       if (ElementWithoutDelimiter) {
311         Children.push_back({ElementWithoutDelimiter, nullptr});
312       }
313       ElementWithoutDelimiter = C;
314       break;
315     }
316     case syntax::NodeRole::ListDelimiter: {
317       Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(C)});
318       ElementWithoutDelimiter = nullptr;
319       break;
320     }
321     default:
322       llvm_unreachable(
323           "A list can have only elements and delimiters as children.");
324     }
325   }
326 
327   switch (getTerminationKind()) {
328   case syntax::List::TerminationKind::Separated: {
329     Children.push_back({ElementWithoutDelimiter, nullptr});
330     break;
331   }
332   case syntax::List::TerminationKind::Terminated:
333   case syntax::List::TerminationKind::MaybeTerminated: {
334     if (ElementWithoutDelimiter) {
335       Children.push_back({ElementWithoutDelimiter, nullptr});
336     }
337     break;
338   }
339   }
340 
341   return Children;
342 }
343 
344 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but
345 // ignoring delimiters
346 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() {
347   if (!getFirstChild())
348     return {};
349 
350   std::vector<syntax::Node *> Children;
351   syntax::Node *ElementWithoutDelimiter = nullptr;
352   for (auto *C = getFirstChild(); C; C = C->getNextSibling()) {
353     switch (C->getRole()) {
354     case syntax::NodeRole::ListElement: {
355       if (ElementWithoutDelimiter) {
356         Children.push_back(ElementWithoutDelimiter);
357       }
358       ElementWithoutDelimiter = C;
359       break;
360     }
361     case syntax::NodeRole::ListDelimiter: {
362       Children.push_back(ElementWithoutDelimiter);
363       ElementWithoutDelimiter = nullptr;
364       break;
365     }
366     default:
367       llvm_unreachable("A list has only elements or delimiters.");
368     }
369   }
370 
371   switch (getTerminationKind()) {
372   case syntax::List::TerminationKind::Separated: {
373     Children.push_back(ElementWithoutDelimiter);
374     break;
375   }
376   case syntax::List::TerminationKind::Terminated:
377   case syntax::List::TerminationKind::MaybeTerminated: {
378     if (ElementWithoutDelimiter) {
379       Children.push_back(ElementWithoutDelimiter);
380     }
381     break;
382   }
383   }
384 
385   return Children;
386 }
387 
388 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const {
389   switch (this->getKind()) {
390   case NodeKind::NestedNameSpecifier:
391     return clang::tok::coloncolon;
392   case NodeKind::CallArguments:
393   case NodeKind::ParameterDeclarationList:
394     return clang::tok::comma;
395   default:
396     llvm_unreachable("This is not a subclass of List, thus "
397                      "getDelimiterTokenKind() cannot be called");
398   }
399 }
400 
401 syntax::List::TerminationKind syntax::List::getTerminationKind() const {
402   switch (this->getKind()) {
403   case NodeKind::NestedNameSpecifier:
404     return TerminationKind::Terminated;
405   case NodeKind::CallArguments:
406   case NodeKind::ParameterDeclarationList:
407     return TerminationKind::Separated;
408   default:
409     llvm_unreachable("This is not a subclass of List, thus "
410                      "getTerminationKind() cannot be called");
411   }
412 }
413 
414 bool syntax::List::canBeEmpty() const {
415   switch (this->getKind()) {
416   case NodeKind::NestedNameSpecifier:
417     return false;
418   case NodeKind::CallArguments:
419     return true;
420   case NodeKind::ParameterDeclarationList:
421     return true;
422   default:
423     llvm_unreachable("This is not a subclass of List, thus canBeEmpty() "
424                      "cannot be called");
425   }
426 }
427