1 //===-- HTMLForest.cpp - browser-based parse forest explorer
2 //---------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The plain text forest node dump (clang-pseudo -print-forest) is useful but
11 // hard to reconcile with the code being examined, especially when it is large.
12 //
13 // HTMLForest produces a self-contained HTML file containing both the code and
14 // the forest representation, linking them interactively with javascript.
15 // At any given time, a single parse tree is shown (ambiguities resolved).
16 // The user can switch between ambiguous alternatives.
17 //
18 // +-------+---------------+
19 // |       |        +-----+|
20 // | #tree |  #code |#info||
21 // |       |        +-----+|
22 // |       |               |
23 // +-------+---------------+
24 //
25 // #tree is a hierarchical view of the nodes (nested <ul>s), like -print-forest.
26 // (It is a simple tree, not a DAG, because ambiguities have been resolved).
27 // Like -print-forest, trivial sequences are collapsed (expression~IDENTIFIER).
28 //
29 // #code is the source code, annotated with <span>s marking the node ranges.
30 // These spans are usually invisible (exception: ambiguities are marked), but
31 // they are used to show and change the selection.
32 //
33 // #info is a floating box that shows details of the currently selected node:
34 //  - rule (for sequence nodes). Abbreviated rules are also shown.
35 //  - alternatives (for ambiguous nodes). The user can choose an alternative.
36 //  - ancestors. The parent nodes show how this node fits in translation-unit.
37 //
38 // There are two types of 'active' node:
39 //  - *highlight* is what the cursor is over, and is colored blue.
40 //    Near ancestors are shaded faintly (onion-skin) to show local structure.
41 //  - *selection* is set by clicking.
42 //    The #info box shows the selection, and selected nodes have a dashed ring.
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #include "clang-pseudo/Forest.h"
47 #include "clang-pseudo/grammar/Grammar.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Support/JSON.h"
50 #include "llvm/Support/raw_ostream.h"
51 namespace clang {
52 namespace pseudo {
53 namespace {
54 
55 // Defines const char HTMLForest_css[] = "...contents of HTMLForest.css..."; etc
56 #include "HTMLForestResources.inc"
57 
58 struct Writer {
59   llvm::raw_ostream &Out;
60   const Grammar &G;
61   const ForestNode &Root;
62   const TokenStream &Stream;
63 
writeclang::pseudo::__anondec53d070111::Writer64   void write() {
65     Out << "<!doctype html>\n";
66     tag("html", [&] {
67       tag("head", [&] {
68         tag("title", [&] { Out << "HTMLForest"; });
69         tag("script", [&] { Out << HTMLForest_js; });
70         tag("style", [&] { Out << HTMLForest_css; });
71         tag("script", [&] {
72           Out << "var forest=";
73           writeForestJSON();
74           Out << ";";
75         });
76         tag("pre id='hidden-code' hidden", [&] { writeCode(); });
77       });
78       tag("body", [&] { Out << HTMLForest_html; });
79     });
80   }
81 
82   void writeCode();
83   void writeForestJSON();
tagclang::pseudo::__anondec53d070111::Writer84   void tag(llvm::StringRef Opener, llvm::function_ref<void()> Body) {
85     Out << "<" << Opener << ">";
86     Body();
87     Out << "</" << Opener.split(' ').first << ">\n";
88   }
89 };
90 
writeCode()91 void Writer::writeCode() {
92   // This loop (whitespace logic) is cribbed from TokenStream::Print.
93   bool FirstToken = true;
94   unsigned LastLine = -1;
95   StringRef LastText;
96   for (const auto &T : Stream.tokens()) {
97     StringRef Text = T.text();
98     if (FirstToken) {
99       FirstToken = false;
100     } else if (T.Line == LastLine) {
101       if (LastText.data() + LastText.size() != Text.data())
102         Out << ' ';
103     } else {
104       Out << " \n"; // Extra space aids selection.
105       Out.indent(T.Indent);
106     }
107     Out << "<span class='token' id='t" << Stream.index(T) << "'>";
108     llvm::printHTMLEscaped(Text, Out);
109     Out << "</span>";
110     LastLine = T.Line;
111     LastText = Text;
112   }
113   if (!FirstToken)
114     Out << '\n';
115 }
116 
117 // Writes a JSON array of forest nodes. Items are e.g.:
118 //   {kind:'sequence', symbol:'compound-stmt', children:[5,8,33],
119 //   rule:'compound-stmt := ...'} {kind:'terminal', symbol:'VOID', token:'t52'}
120 //   {kind:'ambiguous', symbol:'type-specifier', children:[3,100] selected:3}
121 //   {kind:'opaque', symbol:'statement-seq', firstToken:'t5', lastToken:'t6'}
writeForestJSON()122 void Writer::writeForestJSON() {
123   // This is the flat array of nodes: the index into this array is the node ID.
124   std::vector<std::pair<const ForestNode *, /*End*/ Token::Index>> Sequence;
125   llvm::DenseMap<const ForestNode *, unsigned> Index;
126   auto AssignID = [&](const ForestNode *N, Token::Index End) -> unsigned {
127     auto R = Index.try_emplace(N, Sequence.size());
128     if (R.second)
129       Sequence.push_back({N, End});
130     return R.first->second;
131   };
132   AssignID(&Root, Stream.tokens().size());
133   auto TokenID = [](Token::Index I) { return ("t" + llvm::Twine(I)).str(); };
134 
135   llvm::json::OStream Out(this->Out, 2);
136   Out.array([&] {
137     for (unsigned I = 0; I < Sequence.size(); ++I) {
138       const ForestNode *N = Sequence[I].first;
139       Token::Index End = Sequence[I].second;
140       Out.object([&] {
141         Out.attribute("symbol", G.symbolName(N->symbol()));
142         switch (N->kind()) {
143         case ForestNode::Terminal:
144           Out.attribute("kind", "terminal");
145           Out.attribute("token", TokenID(N->startTokenIndex()));
146           break;
147         case ForestNode::Sequence:
148           Out.attribute("kind", "sequence");
149           Out.attribute("rule", G.dumpRule(N->rule()));
150           break;
151         case ForestNode::Ambiguous:
152           Out.attribute("kind", "ambiguous");
153           Out.attribute("selected", AssignID(N->children().front(), End));
154           break;
155         case ForestNode::Opaque:
156           Out.attribute("kind", "opaque");
157           Out.attribute("firstToken", TokenID(N->startTokenIndex()));
158           // [firstToken, lastToken] is a closed range.
159           // If empty, lastToken is omitted.
160           if (N->startTokenIndex() != End)
161             Out.attribute("lastToken", TokenID(End - 1));
162           break;
163         }
164         auto Children = N->children();
165         if (!Children.empty())
166           Out.attributeArray("children", [&] {
167             for (unsigned I = 0; I < Children.size(); ++I)
168               Out.value(AssignID(Children[I],
169                                  I + 1 == Children.size()
170                                      ? End
171                                      : Children[I + 1]->startTokenIndex()));
172           });
173       });
174     }
175   });
176 }
177 
178 } // namespace
179 
180 // We only accept the derived stream here.
181 // FIXME: allow the original stream instead?
writeHTMLForest(llvm::raw_ostream & OS,const Grammar & G,const ForestNode & Root,const TokenStream & Stream)182 void writeHTMLForest(llvm::raw_ostream &OS, const Grammar &G,
183                      const ForestNode &Root, const TokenStream &Stream) {
184   Writer{OS, G, Root, Stream}.write();
185 }
186 
187 } // namespace pseudo
188 } // namespace clang
189