1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/Tree.h"
9 #include "clang/Basic/TokenKinds.h"
10 #include "clang/Tooling/Syntax/Nodes.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/Support/Casting.h"
14 #include <cassert>
15 
16 using namespace clang;
17 
18 namespace {
19 static void traverse(const syntax::Node *N,
20                      llvm::function_ref<void(const syntax::Node *)> Visit) {
21   if (auto *T = dyn_cast<syntax::Tree>(N)) {
22     for (auto *C = T->firstChild(); C; C = C->nextSibling())
23       traverse(C, Visit);
24   }
25   Visit(N);
26 }
27 static void traverse(syntax::Node *N,
28                      llvm::function_ref<void(syntax::Node *)> Visit) {
29   traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) {
30     Visit(const_cast<syntax::Node *>(N));
31   });
32 }
33 } // namespace
34 
35 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
36                      const TokenBuffer &Tokens)
37     : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
38 
39 const syntax::TokenBuffer &syntax::Arena::tokenBuffer() const { return Tokens; }
40 
41 std::pair<FileID, ArrayRef<syntax::Token>>
42 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
43   auto FID = SourceMgr.createFileID(std::move(Input));
44   auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
45   assert(It.second && "duplicate FileID");
46   return {FID, It.first->second};
47 }
48 
49 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
50   assert(Tok != nullptr);
51 }
52 
53 bool syntax::Leaf::classof(const Node *N) {
54   return N->kind() == NodeKind::Leaf;
55 }
56 
57 syntax::Node::Node(NodeKind Kind)
58     : Parent(nullptr), NextSibling(nullptr), Kind(static_cast<unsigned>(Kind)),
59       Role(0), Original(false), CanModify(false) {
60   this->setRole(NodeRole::Detached);
61 }
62 
63 bool syntax::Node::isDetached() const { return role() == NodeRole::Detached; }
64 
65 void syntax::Node::setRole(NodeRole NR) {
66   this->Role = static_cast<unsigned>(NR);
67 }
68 
69 bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; }
70 
71 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
72   assert(Child->role() == NodeRole::Detached);
73   assert(Role != NodeRole::Detached);
74 
75   Child->setRole(Role);
76   prependChildLowLevel(Child);
77 }
78 
79 void syntax::Tree::prependChildLowLevel(Node *Child) {
80   assert(Child->Parent == nullptr);
81   assert(Child->NextSibling == nullptr);
82   assert(Child->role() != NodeRole::Detached);
83 
84   Child->Parent = this;
85   Child->NextSibling = this->FirstChild;
86   this->FirstChild = Child;
87 }
88 
89 void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End,
90                                              Node *New) {
91   assert(!BeforeBegin || BeforeBegin->Parent == this);
92 
93 #ifndef NDEBUG
94   for (auto *N = New; N; N = N->nextSibling()) {
95     assert(N->Parent == nullptr);
96     assert(N->role() != NodeRole::Detached && "Roles must be set");
97     // FIXME: sanity-check the role.
98   }
99 #endif
100 
101   // Detach old nodes.
102   for (auto *N = !BeforeBegin ? FirstChild : BeforeBegin->nextSibling();
103        N != End;) {
104     auto *Next = N->NextSibling;
105 
106     N->setRole(NodeRole::Detached);
107     N->Parent = nullptr;
108     N->NextSibling = nullptr;
109     if (N->Original)
110       traverse(N, [&](Node *C) { C->Original = false; });
111 
112     N = Next;
113   }
114 
115   // Attach new nodes.
116   if (BeforeBegin)
117     BeforeBegin->NextSibling = New ? New : End;
118   else
119     FirstChild = New ? New : End;
120 
121   if (New) {
122     auto *Last = New;
123     for (auto *N = New; N != nullptr; N = N->nextSibling()) {
124       Last = N;
125       N->Parent = this;
126     }
127     Last->NextSibling = End;
128   }
129 
130   // Mark the node as modified.
131   for (auto *T = this; T && T->Original; T = T->Parent)
132     T->Original = false;
133 }
134 
135 namespace {
136 static void dumpTokens(raw_ostream &OS, ArrayRef<syntax::Token> Tokens,
137                        const SourceManager &SM) {
138   assert(!Tokens.empty());
139   bool First = true;
140   for (const auto &T : Tokens) {
141     if (!First)
142       OS << " ";
143     else
144       First = false;
145     // Handle 'eof' separately, calling text() on it produces an empty string.
146     if (T.kind() == tok::eof) {
147       OS << "<eof>";
148       continue;
149     }
150     OS << T.text(SM);
151   }
152 }
153 
154 static void dumpTree(raw_ostream &OS, const syntax::Node *N,
155                      const syntax::Arena &A, std::vector<bool> IndentMask) {
156   std::string Marks;
157   if (!N->isOriginal())
158     Marks += "M";
159   if (N->role() == syntax::NodeRole::Detached)
160     Marks += "*"; // FIXME: find a nice way to print other roles.
161   if (!N->canModify())
162     Marks += "I";
163   if (!Marks.empty())
164     OS << Marks << ": ";
165 
166   if (auto *L = dyn_cast<syntax::Leaf>(N)) {
167     dumpTokens(OS, *L->token(), A.sourceManager());
168     OS << "\n";
169     return;
170   }
171 
172   auto *T = cast<syntax::Tree>(N);
173   OS << T->kind() << "\n";
174 
175   for (auto It = T->firstChild(); It != nullptr; It = It->nextSibling()) {
176     for (bool Filled : IndentMask) {
177       if (Filled)
178         OS << "| ";
179       else
180         OS << "  ";
181     }
182     if (!It->nextSibling()) {
183       OS << "`-";
184       IndentMask.push_back(false);
185     } else {
186       OS << "|-";
187       IndentMask.push_back(true);
188     }
189     dumpTree(OS, It, A, IndentMask);
190     IndentMask.pop_back();
191   }
192 }
193 } // namespace
194 
195 std::string syntax::Node::dump(const Arena &A) const {
196   std::string Str;
197   llvm::raw_string_ostream OS(Str);
198   dumpTree(OS, this, A, /*IndentMask=*/{});
199   return std::move(OS.str());
200 }
201 
202 std::string syntax::Node::dumpTokens(const Arena &A) const {
203   std::string Storage;
204   llvm::raw_string_ostream OS(Storage);
205   traverse(this, [&](const syntax::Node *N) {
206     auto *L = dyn_cast<syntax::Leaf>(N);
207     if (!L)
208       return;
209     ::dumpTokens(OS, *L->token(), A.sourceManager());
210     OS << " ";
211   });
212   return OS.str();
213 }
214 
215 void syntax::Node::assertInvariants() const {
216 #ifndef NDEBUG
217   if (isDetached())
218     assert(parent() == nullptr);
219   else
220     assert(parent() != nullptr);
221 
222   auto *T = dyn_cast<Tree>(this);
223   if (!T)
224     return;
225   for (auto *C = T->firstChild(); C; C = C->nextSibling()) {
226     if (T->isOriginal())
227       assert(C->isOriginal());
228     assert(!C->isDetached());
229     assert(C->parent() == T);
230   }
231 #endif
232 }
233 
234 void syntax::Node::assertInvariantsRecursive() const {
235 #ifndef NDEBUG
236   traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); });
237 #endif
238 }
239 
240 syntax::Leaf *syntax::Tree::firstLeaf() {
241   auto *T = this;
242   while (auto *C = T->firstChild()) {
243     if (auto *L = dyn_cast<syntax::Leaf>(C))
244       return L;
245     T = cast<syntax::Tree>(C);
246   }
247   return nullptr;
248 }
249 
250 syntax::Leaf *syntax::Tree::lastLeaf() {
251   auto *T = this;
252   while (auto *C = T->firstChild()) {
253     // Find the last child.
254     while (auto *Next = C->nextSibling())
255       C = Next;
256 
257     if (auto *L = dyn_cast<syntax::Leaf>(C))
258       return L;
259     T = cast<syntax::Tree>(C);
260   }
261   return nullptr;
262 }
263 
264 syntax::Node *syntax::Tree::findChild(NodeRole R) {
265   for (auto *C = FirstChild; C; C = C->nextSibling()) {
266     if (C->role() == R)
267       return C;
268   }
269   return nullptr;
270 }
271 
272 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>>
273 syntax::List::getElementsAsNodesAndDelimiters() {
274   if (!firstChild())
275     return {};
276 
277   auto children = std::vector<syntax::List::ElementAndDelimiter<Node>>();
278   syntax::Node *elementWithoutDelimiter = nullptr;
279   for (auto *C = firstChild(); C; C = C->nextSibling()) {
280     switch (C->role()) {
281     case syntax::NodeRole::List_element: {
282       if (elementWithoutDelimiter) {
283         children.push_back({elementWithoutDelimiter, nullptr});
284       }
285       elementWithoutDelimiter = C;
286       break;
287     }
288     case syntax::NodeRole::List_delimiter: {
289       children.push_back({elementWithoutDelimiter, cast<syntax::Leaf>(C)});
290       elementWithoutDelimiter = nullptr;
291       break;
292     }
293     default:
294       llvm_unreachable(
295           "A list can have only elements and delimiters as children.");
296     }
297   }
298 
299   switch (getTerminationKind()) {
300   case syntax::List::TerminationKind::Separated: {
301     children.push_back({elementWithoutDelimiter, nullptr});
302     break;
303   }
304   case syntax::List::TerminationKind::Terminated:
305   case syntax::List::TerminationKind::MaybeTerminated: {
306     if (elementWithoutDelimiter) {
307       children.push_back({elementWithoutDelimiter, nullptr});
308     }
309     break;
310   }
311   }
312 
313   return children;
314 }
315 
316 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but
317 // ignoring delimiters
318 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() {
319   if (!firstChild())
320     return {};
321 
322   auto children = std::vector<syntax::Node *>();
323   syntax::Node *elementWithoutDelimiter = nullptr;
324   for (auto *C = firstChild(); C; C = C->nextSibling()) {
325     switch (C->role()) {
326     case syntax::NodeRole::List_element: {
327       if (elementWithoutDelimiter) {
328         children.push_back(elementWithoutDelimiter);
329       }
330       elementWithoutDelimiter = C;
331       break;
332     }
333     case syntax::NodeRole::List_delimiter: {
334       children.push_back(elementWithoutDelimiter);
335       elementWithoutDelimiter = nullptr;
336       break;
337     }
338     default:
339       llvm_unreachable("A list has only elements or delimiters.");
340     }
341   }
342 
343   switch (getTerminationKind()) {
344   case syntax::List::TerminationKind::Separated: {
345     children.push_back(elementWithoutDelimiter);
346     break;
347   }
348   case syntax::List::TerminationKind::Terminated:
349   case syntax::List::TerminationKind::MaybeTerminated: {
350     if (elementWithoutDelimiter) {
351       children.push_back(elementWithoutDelimiter);
352     }
353     break;
354   }
355   }
356 
357   return children;
358 }
359 
360 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() {
361   switch (this->kind()) {
362   case NodeKind::NestedNameSpecifier:
363     return clang::tok::coloncolon;
364   default:
365     llvm_unreachable("This is not a subclass of List, thus "
366                      "getDelimiterTokenKind() cannot be called");
367   }
368 }
369 
370 syntax::List::TerminationKind syntax::List::getTerminationKind() {
371   switch (this->kind()) {
372   case NodeKind::NestedNameSpecifier:
373     return TerminationKind::Terminated;
374   default:
375     llvm_unreachable("This is not a subclass of List, thus "
376                      "getTerminationKind() cannot be called");
377   }
378 }
379 
380 bool syntax::List::canBeEmpty() {
381   switch (this->kind()) {
382   case NodeKind::NestedNameSpecifier:
383     return false;
384   default:
385     llvm_unreachable("This is not a subclass of List, thus canBeEmpty() "
386                      "cannot be called");
387   }
388 }
389