1454579e4SSam McCall //===- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ----------===//
2454579e4SSam McCall //
3454579e4SSam McCall // The LLVM Compiler Infrastructure
4454579e4SSam McCall //
5454579e4SSam McCall // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6454579e4SSam McCall // See https://llvm.org/LICENSE.txt for license information.
7454579e4SSam McCall // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8454579e4SSam McCall //
9454579e4SSam McCall //===----------------------------------------------------------------------===//
10454579e4SSam McCall //
11454579e4SSam McCall // These backends consume the definitions of Syntax Tree nodes.
12454579e4SSam McCall // See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td
13454579e4SSam McCall //
14454579e4SSam McCall // The -gen-clang-syntax-node-list backend produces a .inc with macro calls
15454579e4SSam McCall // NODE(Kind, BaseKind)
16454579e4SSam McCall // ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
17454579e4SSam McCall // similar to those for AST nodes such as AST/DeclNodes.inc.
18454579e4SSam McCall //
1998aa0671SSam McCall // The -gen-clang-syntax-node-classes backend produces definitions for the
2098aa0671SSam McCall // syntax::Node subclasses (except those marked as External).
2198aa0671SSam McCall //
2298aa0671SSam McCall // In future, another backend will encode the structure of the various node
2398aa0671SSam McCall // types in tables so their invariants can be checked and enforced.
24454579e4SSam McCall //
25454579e4SSam McCall //===----------------------------------------------------------------------===//
26454579e4SSam McCall #include "TableGenBackends.h"
27454579e4SSam McCall
28454579e4SSam McCall #include <deque>
29454579e4SSam McCall
3098aa0671SSam McCall #include "llvm/ADT/StringExtras.h"
31454579e4SSam McCall #include "llvm/Support/FormatVariadic.h"
32454579e4SSam McCall #include "llvm/Support/raw_ostream.h"
33454579e4SSam McCall #include "llvm/TableGen/Record.h"
34454579e4SSam McCall #include "llvm/TableGen/TableGenBackend.h"
35454579e4SSam McCall
36454579e4SSam McCall namespace {
3798aa0671SSam McCall using llvm::formatv;
38454579e4SSam McCall
39454579e4SSam McCall // The class hierarchy of Node types.
40454579e4SSam McCall // We assemble this in order to be able to define the NodeKind enum in a
41454579e4SSam McCall // stable and useful way, where abstract Node subclasses correspond to ranges.
42454579e4SSam McCall class Hierarchy {
43454579e4SSam McCall public:
Hierarchy(const llvm::RecordKeeper & Records)44454579e4SSam McCall Hierarchy(const llvm::RecordKeeper &Records) {
45454579e4SSam McCall for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType"))
46454579e4SSam McCall add(T);
47454579e4SSam McCall for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
48454579e4SSam McCall if (llvm::Record *Base = Derived->getValueAsOptionalDef("base"))
49454579e4SSam McCall link(Derived, Base);
5098aa0671SSam McCall for (NodeType &N : AllTypes) {
51454579e4SSam McCall llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) {
52454579e4SSam McCall return L->Record->getName() < R->Record->getName();
53454579e4SSam McCall });
5498aa0671SSam McCall // Alternatives nodes must have subclasses, External nodes may do.
5598aa0671SSam McCall assert(N.Record->isSubClassOf("Alternatives") ||
5698aa0671SSam McCall N.Record->isSubClassOf("External") || N.Derived.empty());
5798aa0671SSam McCall assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty());
5898aa0671SSam McCall }
59454579e4SSam McCall }
60454579e4SSam McCall
61454579e4SSam McCall struct NodeType {
62454579e4SSam McCall const llvm::Record *Record = nullptr;
63454579e4SSam McCall const NodeType *Base = nullptr;
64454579e4SSam McCall std::vector<const NodeType *> Derived;
name__anonf5ef44bb0111::Hierarchy::NodeType65454579e4SSam McCall llvm::StringRef name() const { return Record->getName(); }
66454579e4SSam McCall };
67454579e4SSam McCall
get(llvm::StringRef Name="Node")68454579e4SSam McCall NodeType &get(llvm::StringRef Name = "Node") {
69454579e4SSam McCall auto NI = ByName.find(Name);
70454579e4SSam McCall assert(NI != ByName.end() && "no such node");
71454579e4SSam McCall return *NI->second;
72454579e4SSam McCall }
73454579e4SSam McCall
7498aa0671SSam McCall // Traverse the hierarchy in pre-order (base classes before derived).
visit(llvm::function_ref<void (const NodeType &)> CB,const NodeType * Start=nullptr)7598aa0671SSam McCall void visit(llvm::function_ref<void(const NodeType &)> CB,
7698aa0671SSam McCall const NodeType *Start = nullptr) {
7798aa0671SSam McCall if (Start == nullptr)
7898aa0671SSam McCall Start = &get();
7998aa0671SSam McCall CB(*Start);
8098aa0671SSam McCall for (const NodeType *D : Start->Derived)
8198aa0671SSam McCall visit(CB, D);
8298aa0671SSam McCall }
8398aa0671SSam McCall
84454579e4SSam McCall private:
add(const llvm::Record * R)85454579e4SSam McCall void add(const llvm::Record *R) {
86454579e4SSam McCall AllTypes.emplace_back();
87454579e4SSam McCall AllTypes.back().Record = R;
88454579e4SSam McCall bool Inserted = ByName.try_emplace(R->getName(), &AllTypes.back()).second;
89454579e4SSam McCall assert(Inserted && "Duplicate node name");
90454579e4SSam McCall (void)Inserted;
91454579e4SSam McCall }
92454579e4SSam McCall
link(const llvm::Record * Derived,const llvm::Record * Base)93454579e4SSam McCall void link(const llvm::Record *Derived, const llvm::Record *Base) {
94454579e4SSam McCall auto &CN = get(Derived->getName()), &PN = get(Base->getName());
95454579e4SSam McCall assert(CN.Base == nullptr && "setting base twice");
96454579e4SSam McCall PN.Derived.push_back(&CN);
97454579e4SSam McCall CN.Base = &PN;
98454579e4SSam McCall }
99454579e4SSam McCall
100454579e4SSam McCall std::deque<NodeType> AllTypes;
101454579e4SSam McCall llvm::DenseMap<llvm::StringRef, NodeType *> ByName;
102454579e4SSam McCall };
103454579e4SSam McCall
firstConcrete(const Hierarchy::NodeType & N)104454579e4SSam McCall const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) {
105454579e4SSam McCall return N.Derived.empty() ? N : firstConcrete(*N.Derived.front());
106454579e4SSam McCall }
lastConcrete(const Hierarchy::NodeType & N)107454579e4SSam McCall const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
108454579e4SSam McCall return N.Derived.empty() ? N : lastConcrete(*N.Derived.back());
109454579e4SSam McCall }
110454579e4SSam McCall
111*ea4d24c8SSam McCall struct SyntaxConstraint {
SyntaxConstraint__anonf5ef44bb0111::SyntaxConstraint112*ea4d24c8SSam McCall SyntaxConstraint(const llvm::Record &R) {
113*ea4d24c8SSam McCall if (R.isSubClassOf("Optional")) {
114*ea4d24c8SSam McCall *this = SyntaxConstraint(*R.getValueAsDef("inner"));
115*ea4d24c8SSam McCall } else if (R.isSubClassOf("AnyToken")) {
116*ea4d24c8SSam McCall NodeType = "Leaf";
117*ea4d24c8SSam McCall } else if (R.isSubClassOf("NodeType")) {
118*ea4d24c8SSam McCall NodeType = R.getName().str();
119*ea4d24c8SSam McCall } else {
120*ea4d24c8SSam McCall assert(false && "Unhandled Syntax kind");
121*ea4d24c8SSam McCall }
122*ea4d24c8SSam McCall }
123*ea4d24c8SSam McCall
124*ea4d24c8SSam McCall std::string NodeType;
125*ea4d24c8SSam McCall // optional and leaf types also go here, once we want to use them.
126*ea4d24c8SSam McCall };
127*ea4d24c8SSam McCall
128454579e4SSam McCall } // namespace
129454579e4SSam McCall
EmitClangSyntaxNodeList(llvm::RecordKeeper & Records,llvm::raw_ostream & OS)130454579e4SSam McCall void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
131454579e4SSam McCall llvm::raw_ostream &OS) {
132454579e4SSam McCall llvm::emitSourceFileHeader("Syntax tree node list", OS);
13398aa0671SSam McCall Hierarchy H(Records);
134454579e4SSam McCall OS << R"cpp(
135454579e4SSam McCall #ifndef NODE
136454579e4SSam McCall #define NODE(Kind, Base)
137454579e4SSam McCall #endif
138454579e4SSam McCall
139454579e4SSam McCall #ifndef CONCRETE_NODE
140454579e4SSam McCall #define CONCRETE_NODE(Kind, Base) NODE(Kind, Base)
141454579e4SSam McCall #endif
142454579e4SSam McCall
143454579e4SSam McCall #ifndef ABSTRACT_NODE
144454579e4SSam McCall #define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base)
145454579e4SSam McCall #endif
146454579e4SSam McCall
147454579e4SSam McCall )cpp";
14898aa0671SSam McCall H.visit([&](const Hierarchy::NodeType &N) {
14998aa0671SSam McCall // Don't emit ABSTRACT_NODE for node itself, which has no parent.
15098aa0671SSam McCall if (N.Base == nullptr)
15198aa0671SSam McCall return;
15298aa0671SSam McCall if (N.Derived.empty())
15398aa0671SSam McCall OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
15498aa0671SSam McCall else
15598aa0671SSam McCall OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
15698aa0671SSam McCall N.Base->name(), firstConcrete(N).name(),
15798aa0671SSam McCall lastConcrete(N).name());
15898aa0671SSam McCall });
159454579e4SSam McCall OS << R"cpp(
160454579e4SSam McCall #undef NODE
161454579e4SSam McCall #undef CONCRETE_NODE
162454579e4SSam McCall #undef ABSTRACT_NODE
163454579e4SSam McCall )cpp";
164454579e4SSam McCall }
16598aa0671SSam McCall
16698aa0671SSam McCall // Format a documentation string as a C++ comment.
16798aa0671SSam McCall // Trims leading whitespace handling since comments come from a TableGen file:
16898aa0671SSam McCall // documentation = [{
16998aa0671SSam McCall // This is a widget. Example:
17098aa0671SSam McCall // widget.explode()
17198aa0671SSam McCall // }];
17298aa0671SSam McCall // and should be formatted as:
17398aa0671SSam McCall // /// This is a widget. Example:
17498aa0671SSam McCall // /// widget.explode()
17598aa0671SSam McCall // Leading and trailing whitespace lines are stripped.
17698aa0671SSam McCall // The indentation of the first line is stripped from all lines.
printDoc(llvm::StringRef Doc,llvm::raw_ostream & OS)17798aa0671SSam McCall static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) {
17898aa0671SSam McCall Doc = Doc.rtrim();
17998aa0671SSam McCall llvm::StringRef Line;
18098aa0671SSam McCall while (Line.trim().empty() && !Doc.empty())
18198aa0671SSam McCall std::tie(Line, Doc) = Doc.split('\n');
18298aa0671SSam McCall llvm::StringRef Indent = Line.take_while(llvm::isSpace);
18398aa0671SSam McCall for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) {
18498aa0671SSam McCall Line.consume_front(Indent);
18598aa0671SSam McCall OS << "/// " << Line << "\n";
18698aa0671SSam McCall }
18798aa0671SSam McCall }
18898aa0671SSam McCall
EmitClangSyntaxNodeClasses(llvm::RecordKeeper & Records,llvm::raw_ostream & OS)18998aa0671SSam McCall void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
19098aa0671SSam McCall llvm::raw_ostream &OS) {
19198aa0671SSam McCall llvm::emitSourceFileHeader("Syntax tree node list", OS);
19298aa0671SSam McCall Hierarchy H(Records);
19398aa0671SSam McCall
19498aa0671SSam McCall OS << "\n// Forward-declare node types so we don't have to carefully "
19598aa0671SSam McCall "sequence definitions.\n";
19698aa0671SSam McCall H.visit([&](const Hierarchy::NodeType &N) {
19798aa0671SSam McCall OS << "class " << N.name() << ";\n";
19898aa0671SSam McCall });
19998aa0671SSam McCall
20098aa0671SSam McCall OS << "\n// Node definitions\n\n";
20198aa0671SSam McCall H.visit([&](const Hierarchy::NodeType &N) {
20298aa0671SSam McCall if (N.Record->isSubClassOf("External"))
20398aa0671SSam McCall return;
20498aa0671SSam McCall printDoc(N.Record->getValueAsString("documentation"), OS);
20598aa0671SSam McCall OS << formatv("class {0}{1} : public {2} {{\n", N.name(),
20698aa0671SSam McCall N.Derived.empty() ? " final" : "", N.Base->name());
20798aa0671SSam McCall
20898aa0671SSam McCall // Constructor.
20998aa0671SSam McCall if (N.Derived.empty())
21098aa0671SSam McCall OS << formatv("public:\n {0}() : {1}(NodeKind::{0}) {{}\n", N.name(),
21198aa0671SSam McCall N.Base->name());
21298aa0671SSam McCall else
21398aa0671SSam McCall OS << formatv("protected:\n {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
21498aa0671SSam McCall N.name(), N.Base->name());
21598aa0671SSam McCall
216*ea4d24c8SSam McCall if (N.Record->isSubClassOf("Sequence")) {
217*ea4d24c8SSam McCall // Getters for sequence elements.
218*ea4d24c8SSam McCall for (const auto &C : N.Record->getValueAsListOfDefs("children")) {
219*ea4d24c8SSam McCall assert(C->isSubClassOf("Role"));
220*ea4d24c8SSam McCall llvm::StringRef Role = C->getValueAsString("role");
221*ea4d24c8SSam McCall SyntaxConstraint Constraint(*C->getValueAsDef("syntax"));
222*ea4d24c8SSam McCall for (const char *Const : {"", "const "})
223*ea4d24c8SSam McCall OS << formatv(
224*ea4d24c8SSam McCall " {2}{1} *get{0}() {2} {{\n"
225*ea4d24c8SSam McCall " return llvm::cast_or_null<{1}>(findChild(NodeRole::{0}));\n"
226*ea4d24c8SSam McCall " }\n",
227*ea4d24c8SSam McCall Role, Constraint.NodeType, Const);
228*ea4d24c8SSam McCall }
229*ea4d24c8SSam McCall }
230*ea4d24c8SSam McCall
23198aa0671SSam McCall // classof. FIXME: move definition inline once ~all nodes are generated.
23298aa0671SSam McCall OS << " static bool classof(const Node *N);\n";
23398aa0671SSam McCall
23498aa0671SSam McCall OS << "};\n\n";
23598aa0671SSam McCall });
23698aa0671SSam McCall }
237