1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/Tree.h"
9 #include "clang/Basic/TokenKinds.h"
10 #include "clang/Tooling/Syntax/Nodes.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/Support/Casting.h"
14 #include <cassert>
15 
16 using namespace clang;
17 
18 namespace {
19 static void traverse(const syntax::Node *N,
20                      llvm::function_ref<void(const syntax::Node *)> Visit) {
21   if (auto *T = dyn_cast<syntax::Tree>(N)) {
22     for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling())
23       traverse(C, Visit);
24   }
25   Visit(N);
26 }
27 static void traverse(syntax::Node *N,
28                      llvm::function_ref<void(syntax::Node *)> Visit) {
29   traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) {
30     Visit(const_cast<syntax::Node *>(N));
31   });
32 }
33 } // namespace
34 
35 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
36                      const TokenBuffer &Tokens)
37     : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
38 
39 const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
40   return Tokens;
41 }
42 
43 std::pair<FileID, ArrayRef<syntax::Token>>
44 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
45   auto FID = SourceMgr.createFileID(std::move(Input));
46   auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
47   assert(It.second && "duplicate FileID");
48   return {FID, It.first->second};
49 }
50 
51 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
52   assert(Tok != nullptr);
53 }
54 
55 bool syntax::Leaf::classof(const Node *N) {
56   return N->getKind() == NodeKind::Leaf;
57 }
58 
59 syntax::Node::Node(NodeKind Kind)
60     : Parent(nullptr), NextSibling(nullptr), Kind(static_cast<unsigned>(Kind)),
61       Role(0), Original(false), CanModify(false) {
62   this->setRole(NodeRole::Detached);
63 }
64 
65 bool syntax::Node::isDetached() const {
66   return getRole() == NodeRole::Detached;
67 }
68 
69 void syntax::Node::setRole(NodeRole NR) {
70   this->Role = static_cast<unsigned>(NR);
71 }
72 
73 bool syntax::Tree::classof(const Node *N) {
74   return N->getKind() > NodeKind::Leaf;
75 }
76 
77 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
78   assert(Child->getRole() == NodeRole::Detached);
79   assert(Role != NodeRole::Detached);
80 
81   Child->setRole(Role);
82   prependChildLowLevel(Child);
83 }
84 
85 void syntax::Tree::prependChildLowLevel(Node *Child) {
86   assert(Child->Parent == nullptr);
87   assert(Child->NextSibling == nullptr);
88   assert(Child->getRole() != NodeRole::Detached);
89 
90   Child->Parent = this;
91   Child->NextSibling = this->FirstChild;
92   this->FirstChild = Child;
93 }
94 
95 void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End,
96                                              Node *New) {
97   assert(!BeforeBegin || BeforeBegin->Parent == this);
98 
99 #ifndef NDEBUG
100   for (auto *N = New; N; N = N->getNextSibling()) {
101     assert(N->Parent == nullptr);
102     assert(N->getRole() != NodeRole::Detached && "Roles must be set");
103     // FIXME: sanity-check the role.
104   }
105 #endif
106 
107   // Detach old nodes.
108   for (auto *N = !BeforeBegin ? FirstChild : BeforeBegin->getNextSibling();
109        N != End;) {
110     auto *Next = N->NextSibling;
111 
112     N->setRole(NodeRole::Detached);
113     N->Parent = nullptr;
114     N->NextSibling = nullptr;
115     if (N->Original)
116       traverse(N, [&](Node *C) { C->Original = false; });
117 
118     N = Next;
119   }
120 
121   // Attach new nodes.
122   if (BeforeBegin)
123     BeforeBegin->NextSibling = New ? New : End;
124   else
125     FirstChild = New ? New : End;
126 
127   if (New) {
128     auto *Last = New;
129     for (auto *N = New; N != nullptr; N = N->getNextSibling()) {
130       Last = N;
131       N->Parent = this;
132     }
133     Last->NextSibling = End;
134   }
135 
136   // Mark the node as modified.
137   for (auto *T = this; T && T->Original; T = T->Parent)
138     T->Original = false;
139 }
140 
141 namespace {
142 static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
143                      const SourceManager &SM) {
144   assert(L);
145   const auto *Token = L->getToken();
146   assert(Token);
147   // Handle 'eof' separately, calling text() on it produces an empty string.
148   if (Token->kind() == tok::eof)
149     OS << "<eof>";
150   else
151     OS << Token->text(SM);
152 }
153 
154 static void dumpNode(raw_ostream &OS, const syntax::Node *N,
155                      const SourceManager &SM, std::vector<bool> IndentMask) {
156   auto dumpExtraInfo = [&OS](const syntax::Node *N) {
157     if (N->getRole() != syntax::NodeRole::Unknown)
158       OS << " " << N->getRole();
159     if (!N->isOriginal())
160       OS << " synthesized";
161     if (!N->canModify())
162       OS << " unmodifiable";
163   };
164 
165   assert(N);
166   if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
167     OS << "'";
168     dumpLeaf(OS, L, SM);
169     OS << "'";
170     dumpExtraInfo(N);
171     OS << "\n";
172     return;
173   }
174 
175   const auto *T = cast<syntax::Tree>(N);
176   OS << T->getKind();
177   dumpExtraInfo(N);
178   OS << "\n";
179 
180   for (const auto *It = T->getFirstChild(); It; It = It->getNextSibling()) {
181     for (bool Filled : IndentMask) {
182       if (Filled)
183         OS << "| ";
184       else
185         OS << "  ";
186     }
187     if (!It->getNextSibling()) {
188       OS << "`-";
189       IndentMask.push_back(false);
190     } else {
191       OS << "|-";
192       IndentMask.push_back(true);
193     }
194     dumpNode(OS, It, SM, IndentMask);
195     IndentMask.pop_back();
196   }
197 }
198 } // namespace
199 
200 std::string syntax::Node::dump(const SourceManager &SM) const {
201   std::string Str;
202   llvm::raw_string_ostream OS(Str);
203   dumpNode(OS, this, SM, /*IndentMask=*/{});
204   return std::move(OS.str());
205 }
206 
207 std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
208   std::string Storage;
209   llvm::raw_string_ostream OS(Storage);
210   traverse(this, [&](const syntax::Node *N) {
211     if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
212       dumpLeaf(OS, L, SM);
213       OS << " ";
214     }
215   });
216   return OS.str();
217 }
218 
219 void syntax::Node::assertInvariants() const {
220 #ifndef NDEBUG
221   if (isDetached())
222     assert(getParent() == nullptr);
223   else
224     assert(getParent() != nullptr);
225 
226   auto *T = dyn_cast<Tree>(this);
227   if (!T)
228     return;
229   for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling()) {
230     if (T->isOriginal())
231       assert(C->isOriginal());
232     assert(!C->isDetached());
233     assert(C->getParent() == T);
234   }
235 #endif
236 }
237 
238 void syntax::Node::assertInvariantsRecursive() const {
239 #ifndef NDEBUG
240   traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); });
241 #endif
242 }
243 
244 syntax::Leaf *syntax::Tree::findFirstLeaf() {
245   auto *T = this;
246   while (auto *C = T->getFirstChild()) {
247     if (auto *L = dyn_cast<syntax::Leaf>(C))
248       return L;
249     T = cast<syntax::Tree>(C);
250   }
251   return nullptr;
252 }
253 
254 syntax::Leaf *syntax::Tree::findLastLeaf() {
255   auto *T = this;
256   while (auto *C = T->getFirstChild()) {
257     // Find the last child.
258     while (auto *Next = C->getNextSibling())
259       C = Next;
260 
261     if (auto *L = dyn_cast<syntax::Leaf>(C))
262       return L;
263     T = cast<syntax::Tree>(C);
264   }
265   return nullptr;
266 }
267 
268 syntax::Node *syntax::Tree::findChild(NodeRole R) {
269   for (auto *C = FirstChild; C; C = C->getNextSibling()) {
270     if (C->getRole() == R)
271       return C;
272   }
273   return nullptr;
274 }
275 
276 bool classof(const syntax::Node *N) {
277   switch (N->getKind()) {
278   case syntax::NodeKind::NestedNameSpecifier:
279   case syntax::NodeKind::CallArguments:
280   case syntax::NodeKind::ParameterDeclarationList:
281     return true;
282   default:
283     return false;
284   }
285 }
286 
287 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>>
288 syntax::List::getElementsAsNodesAndDelimiters() {
289   if (!getFirstChild())
290     return {};
291 
292   auto children = std::vector<syntax::List::ElementAndDelimiter<Node>>();
293   syntax::Node *elementWithoutDelimiter = nullptr;
294   for (auto *C = getFirstChild(); C; C = C->getNextSibling()) {
295     switch (C->getRole()) {
296     case syntax::NodeRole::ListElement: {
297       if (elementWithoutDelimiter) {
298         children.push_back({elementWithoutDelimiter, nullptr});
299       }
300       elementWithoutDelimiter = C;
301       break;
302     }
303     case syntax::NodeRole::ListDelimiter: {
304       children.push_back({elementWithoutDelimiter, cast<syntax::Leaf>(C)});
305       elementWithoutDelimiter = nullptr;
306       break;
307     }
308     default:
309       llvm_unreachable(
310           "A list can have only elements and delimiters as children.");
311     }
312   }
313 
314   switch (getTerminationKind()) {
315   case syntax::List::TerminationKind::Separated: {
316     children.push_back({elementWithoutDelimiter, nullptr});
317     break;
318   }
319   case syntax::List::TerminationKind::Terminated:
320   case syntax::List::TerminationKind::MaybeTerminated: {
321     if (elementWithoutDelimiter) {
322       children.push_back({elementWithoutDelimiter, nullptr});
323     }
324     break;
325   }
326   }
327 
328   return children;
329 }
330 
331 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but
332 // ignoring delimiters
333 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() {
334   if (!getFirstChild())
335     return {};
336 
337   auto children = std::vector<syntax::Node *>();
338   syntax::Node *elementWithoutDelimiter = nullptr;
339   for (auto *C = getFirstChild(); C; C = C->getNextSibling()) {
340     switch (C->getRole()) {
341     case syntax::NodeRole::ListElement: {
342       if (elementWithoutDelimiter) {
343         children.push_back(elementWithoutDelimiter);
344       }
345       elementWithoutDelimiter = C;
346       break;
347     }
348     case syntax::NodeRole::ListDelimiter: {
349       children.push_back(elementWithoutDelimiter);
350       elementWithoutDelimiter = nullptr;
351       break;
352     }
353     default:
354       llvm_unreachable("A list has only elements or delimiters.");
355     }
356   }
357 
358   switch (getTerminationKind()) {
359   case syntax::List::TerminationKind::Separated: {
360     children.push_back(elementWithoutDelimiter);
361     break;
362   }
363   case syntax::List::TerminationKind::Terminated:
364   case syntax::List::TerminationKind::MaybeTerminated: {
365     if (elementWithoutDelimiter) {
366       children.push_back(elementWithoutDelimiter);
367     }
368     break;
369   }
370   }
371 
372   return children;
373 }
374 
375 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() {
376   switch (this->getKind()) {
377   case NodeKind::NestedNameSpecifier:
378     return clang::tok::coloncolon;
379   case NodeKind::CallArguments:
380   case NodeKind::ParameterDeclarationList:
381     return clang::tok::comma;
382   default:
383     llvm_unreachable("This is not a subclass of List, thus "
384                      "getDelimiterTokenKind() cannot be called");
385   }
386 }
387 
388 syntax::List::TerminationKind syntax::List::getTerminationKind() {
389   switch (this->getKind()) {
390   case NodeKind::NestedNameSpecifier:
391     return TerminationKind::Terminated;
392   case NodeKind::CallArguments:
393   case NodeKind::ParameterDeclarationList:
394     return TerminationKind::Separated;
395   default:
396     llvm_unreachable("This is not a subclass of List, thus "
397                      "getTerminationKind() cannot be called");
398   }
399 }
400 
401 bool syntax::List::canBeEmpty() {
402   switch (this->getKind()) {
403   case NodeKind::NestedNameSpecifier:
404     return false;
405   case NodeKind::CallArguments:
406     return true;
407   case NodeKind::ParameterDeclarationList:
408     return true;
409   default:
410     llvm_unreachable("This is not a subclass of List, thus canBeEmpty() "
411                      "cannot be called");
412   }
413 }
414