1 //===- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 // See https://llvm.org/LICENSE.txt for license information.
7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 // These backends consume the definitions of Syntax Tree nodes.
12 // See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td
13 //
14 // The -gen-clang-syntax-node-list backend produces a .inc with macro calls
15 //   NODE(Kind, BaseKind)
16 //   ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
17 // similar to those for AST nodes such as AST/DeclNodes.inc.
18 //
19 // The -gen-clang-syntax-node-classes backend produces definitions for the
20 // syntax::Node subclasses (except those marked as External).
21 //
22 // In future, another backend will encode the structure of the various node
23 // types in tables so their invariants can be checked and enforced.
24 //
25 //===----------------------------------------------------------------------===//
26 #include "TableGenBackends.h"
27 
28 #include <deque>
29 
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Support/FormatVariadic.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 
36 namespace {
37 using llvm::formatv;
38 
39 // The class hierarchy of Node types.
40 // We assemble this in order to be able to define the NodeKind enum in a
41 // stable and useful way, where abstract Node subclasses correspond to ranges.
42 class Hierarchy {
43 public:
44   Hierarchy(const llvm::RecordKeeper &Records) {
45     for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType"))
46       add(T);
47     for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
48       if (llvm::Record *Base = Derived->getValueAsOptionalDef("base"))
49         link(Derived, Base);
50     for (NodeType &N : AllTypes) {
51       llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) {
52         return L->Record->getName() < R->Record->getName();
53       });
54       // Alternatives nodes must have subclasses, External nodes may do.
55       assert(N.Record->isSubClassOf("Alternatives") ||
56              N.Record->isSubClassOf("External") || N.Derived.empty());
57       assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty());
58     }
59   }
60 
61   struct NodeType {
62     const llvm::Record *Record = nullptr;
63     const NodeType *Base = nullptr;
64     std::vector<const NodeType *> Derived;
65     llvm::StringRef name() const { return Record->getName(); }
66   };
67 
68   NodeType &get(llvm::StringRef Name = "Node") {
69     auto NI = ByName.find(Name);
70     assert(NI != ByName.end() && "no such node");
71     return *NI->second;
72   }
73 
74   // Traverse the hierarchy in pre-order (base classes before derived).
75   void visit(llvm::function_ref<void(const NodeType &)> CB,
76              const NodeType *Start = nullptr) {
77     if (Start == nullptr)
78       Start = &get();
79     CB(*Start);
80     for (const NodeType *D : Start->Derived)
81       visit(CB, D);
82   }
83 
84 private:
85   void add(const llvm::Record *R) {
86     AllTypes.emplace_back();
87     AllTypes.back().Record = R;
88     bool Inserted = ByName.try_emplace(R->getName(), &AllTypes.back()).second;
89     assert(Inserted && "Duplicate node name");
90     (void)Inserted;
91   }
92 
93   void link(const llvm::Record *Derived, const llvm::Record *Base) {
94     auto &CN = get(Derived->getName()), &PN = get(Base->getName());
95     assert(CN.Base == nullptr && "setting base twice");
96     PN.Derived.push_back(&CN);
97     CN.Base = &PN;
98   }
99 
100   std::deque<NodeType> AllTypes;
101   llvm::DenseMap<llvm::StringRef, NodeType *> ByName;
102 };
103 
104 const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) {
105   return N.Derived.empty() ? N : firstConcrete(*N.Derived.front());
106 }
107 const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
108   return N.Derived.empty() ? N : lastConcrete(*N.Derived.back());
109 }
110 
111 } // namespace
112 
113 void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
114                                     llvm::raw_ostream &OS) {
115   llvm::emitSourceFileHeader("Syntax tree node list", OS);
116   Hierarchy H(Records);
117   OS << R"cpp(
118 #ifndef NODE
119 #define NODE(Kind, Base)
120 #endif
121 
122 #ifndef CONCRETE_NODE
123 #define CONCRETE_NODE(Kind, Base) NODE(Kind, Base)
124 #endif
125 
126 #ifndef ABSTRACT_NODE
127 #define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base)
128 #endif
129 
130 )cpp";
131   H.visit([&](const Hierarchy::NodeType &N) {
132     // Don't emit ABSTRACT_NODE for node itself, which has no parent.
133     if (N.Base == nullptr)
134       return;
135     if (N.Derived.empty())
136       OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
137     else
138       OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
139                     N.Base->name(), firstConcrete(N).name(),
140                     lastConcrete(N).name());
141   });
142   OS << R"cpp(
143 #undef NODE
144 #undef CONCRETE_NODE
145 #undef ABSTRACT_NODE
146 )cpp";
147 }
148 
149 // Format a documentation string as a C++ comment.
150 // Trims leading whitespace handling since comments come from a TableGen file:
151 //    documentation = [{
152 //      This is a widget. Example:
153 //        widget.explode()
154 //    }];
155 // and should be formatted as:
156 //    /// This is a widget. Example:
157 //    ///   widget.explode()
158 // Leading and trailing whitespace lines are stripped.
159 // The indentation of the first line is stripped from all lines.
160 static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) {
161   Doc = Doc.rtrim();
162   llvm::StringRef Line;
163   while (Line.trim().empty() && !Doc.empty())
164     std::tie(Line, Doc) = Doc.split('\n');
165   llvm::StringRef Indent = Line.take_while(llvm::isSpace);
166   for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) {
167     Line.consume_front(Indent);
168     OS << "/// " << Line << "\n";
169   }
170 }
171 
172 void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
173                                        llvm::raw_ostream &OS) {
174   llvm::emitSourceFileHeader("Syntax tree node list", OS);
175   Hierarchy H(Records);
176 
177   OS << "\n// Forward-declare node types so we don't have to carefully "
178         "sequence definitions.\n";
179   H.visit([&](const Hierarchy::NodeType &N) {
180     OS << "class " << N.name() << ";\n";
181   });
182 
183   OS << "\n// Node definitions\n\n";
184   H.visit([&](const Hierarchy::NodeType &N) {
185     if (N.Record->isSubClassOf("External"))
186       return;
187     printDoc(N.Record->getValueAsString("documentation"), OS);
188     OS << formatv("class {0}{1} : public {2} {{\n", N.name(),
189                   N.Derived.empty() ? " final" : "", N.Base->name());
190 
191     // Constructor.
192     if (N.Derived.empty())
193       OS << formatv("public:\n  {0}() : {1}(NodeKind::{0}) {{}\n", N.name(),
194                     N.Base->name());
195     else
196       OS << formatv("protected:\n  {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
197                     N.name(), N.Base->name());
198 
199     // classof. FIXME: move definition inline once ~all nodes are generated.
200     OS << "  static bool classof(const Node *N);\n";
201 
202     OS << "};\n\n";
203   });
204 }
205