1 //===-- HTMLForest.cpp - browser-based parse forest explorer 2 //---------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The plain text forest node dump (clang-pseudo -print-forest) is useful but 11 // hard to reconcile with the code being examined, especially when it is large. 12 // 13 // HTMLForest produces a self-contained HTML file containing both the code and 14 // the forest representation, linking them interactively with javascript. 15 // At any given time, a single parse tree is shown (ambiguities resolved). 16 // The user can switch between ambiguous alternatives. 17 // 18 // +-------+---------------+ 19 // | | +-----+| 20 // | #tree | #code |#info|| 21 // | | +-----+| 22 // | | | 23 // +-------+---------------+ 24 // 25 // #tree is a hierarchical view of the nodes (nested <ul>s), like -print-forest. 26 // (It is a simple tree, not a DAG, because ambiguities have been resolved). 27 // Like -print-forest, trivial sequences are collapsed (expression~IDENTIFIER). 28 // 29 // #code is the source code, annotated with <span>s marking the node ranges. 30 // These spans are usually invisible (exception: ambiguities are marked), but 31 // they are used to show and change the selection. 32 // 33 // #info is a floating box that shows details of the currently selected node: 34 // - rule (for sequence nodes). Abbreviated rules are also shown. 35 // - alternatives (for ambiguous nodes). The user can choose an alternative. 36 // - ancestors. The parent nodes show how this node fits in translation-unit. 37 // 38 // There are two types of 'active' node: 39 // - *highlight* is what the cursor is over, and is colored blue. 40 // Near ancestors are shaded faintly (onion-skin) to show local structure. 41 // - *selection* is set by clicking. 42 // The #info box shows the selection, and selected nodes have a dashed ring. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "clang-pseudo/Forest.h" 47 #include "clang-pseudo/grammar/Grammar.h" 48 #include "llvm/ADT/StringExtras.h" 49 #include "llvm/Support/JSON.h" 50 #include "llvm/Support/raw_ostream.h" 51 namespace clang { 52 namespace pseudo { 53 namespace { 54 55 // Defines const char HTMLForest_css[] = "...contents of HTMLForest.css..."; etc 56 #include "HTMLForestResources.inc" 57 58 struct Writer { 59 llvm::raw_ostream &Out; 60 const Grammar &G; 61 const ForestNode &Root; 62 const TokenStream &Stream; 63 64 void write() { 65 Out << "<!doctype html>\n"; 66 tag("html", [&] { 67 tag("head", [&] { 68 tag("title", [&] { Out << "HTMLForest"; }); 69 tag("script", [&] { Out << HTMLForest_js; }); 70 tag("style", [&] { Out << HTMLForest_css; }); 71 tag("script", [&] { 72 Out << "var forest="; 73 writeForestJSON(); 74 Out << ";"; 75 }); 76 tag("pre id='hidden-code' hidden", [&] { writeCode(); }); 77 }); 78 tag("body", [&] { Out << HTMLForest_html; }); 79 }); 80 } 81 82 void writeCode(); 83 void writeForestJSON(); 84 void tag(llvm::StringRef Opener, llvm::function_ref<void()> Body) { 85 Out << "<" << Opener << ">"; 86 Body(); 87 Out << "</" << Opener.split(' ').first << ">\n"; 88 } 89 }; 90 91 void Writer::writeCode() { 92 // This loop (whitespace logic) is cribbed from TokenStream::Print. 93 bool FirstToken = true; 94 unsigned LastLine = -1; 95 StringRef LastText; 96 for (const auto &T : Stream.tokens()) { 97 StringRef Text = T.text(); 98 if (FirstToken) { 99 FirstToken = false; 100 } else if (T.Line == LastLine) { 101 if (LastText.data() + LastText.size() != Text.data()) 102 Out << ' '; 103 } else { 104 Out << " \n"; // Extra space aids selection. 105 Out.indent(T.Indent); 106 } 107 Out << "<span class='token' id='t" << Stream.index(T) << "'>"; 108 llvm::printHTMLEscaped(Text, Out); 109 Out << "</span>"; 110 LastLine = T.Line; 111 LastText = Text; 112 } 113 if (!FirstToken) 114 Out << '\n'; 115 } 116 117 // Writes a JSON array of forest nodes. Items are e.g.: 118 // {kind:'sequence', symbol:'compound-stmt', children:[5,8,33], 119 // rule:'compound-stmt := ...'} {kind:'terminal', symbol:'VOID', token:'t52'} 120 // {kind:'ambiguous', symbol:'type-specifier', children:[3,100] selected:3} 121 // {kind:'opaque', symbol:'statement-seq', firstToken:'t5', lastToken:'t6'} 122 void Writer::writeForestJSON() { 123 // This is the flat array of nodes: the index into this array is the node ID. 124 std::vector<std::pair<const ForestNode *, /*End*/ Token::Index>> Sequence; 125 llvm::DenseMap<const ForestNode *, unsigned> Index; 126 auto AssignID = [&](const ForestNode *N, Token::Index End) -> unsigned { 127 auto R = Index.try_emplace(N, Sequence.size()); 128 if (R.second) 129 Sequence.push_back({N, End}); 130 return R.first->second; 131 }; 132 AssignID(&Root, Stream.tokens().size()); 133 auto TokenID = [](Token::Index I) { return ("t" + llvm::Twine(I)).str(); }; 134 135 llvm::json::OStream Out(this->Out, 2); 136 Out.array([&] { 137 for (unsigned I = 0; I < Sequence.size(); ++I) { 138 const ForestNode *N = Sequence[I].first; 139 Token::Index End = Sequence[I].second; 140 Out.object([&] { 141 Out.attribute("symbol", G.symbolName(N->symbol())); 142 switch (N->kind()) { 143 case ForestNode::Terminal: 144 Out.attribute("kind", "terminal"); 145 Out.attribute("token", TokenID(N->startTokenIndex())); 146 break; 147 case ForestNode::Sequence: 148 Out.attribute("kind", "sequence"); 149 Out.attribute("rule", G.dumpRule(N->rule())); 150 break; 151 case ForestNode::Ambiguous: 152 Out.attribute("kind", "ambiguous"); 153 Out.attribute("selected", AssignID(N->children().front(), End)); 154 break; 155 case ForestNode::Opaque: 156 Out.attribute("kind", "opaque"); 157 Out.attribute("firstToken", TokenID(N->startTokenIndex())); 158 // [firstToken, lastToken] is a closed range. 159 // If empty, lastToken is omitted. 160 if (N->startTokenIndex() != End) 161 Out.attribute("lastToken", TokenID(End - 1)); 162 break; 163 } 164 auto Children = N->children(); 165 if (!Children.empty()) 166 Out.attributeArray("children", [&] { 167 for (unsigned I = 0; I < Children.size(); ++I) 168 Out.value(AssignID(Children[I], 169 I + 1 == Children.size() 170 ? End 171 : Children[I + 1]->startTokenIndex())); 172 }); 173 }); 174 } 175 }); 176 } 177 178 } // namespace 179 180 // We only accept the derived stream here. 181 // FIXME: allow the original stream instead? 182 void writeHTMLForest(llvm::raw_ostream &OS, const Grammar &G, 183 const ForestNode &Root, const TokenStream &Stream) { 184 Writer{OS, G, Root, Stream}.write(); 185 } 186 187 } // namespace pseudo 188 } // namespace clang 189