1 //===--- Forest.cpp - Parse forest ------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang-pseudo/Forest.h"
10 #include "clang-pseudo/Token.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/None.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/Support/FormatVariadic.h"
15
16 namespace clang {
17 namespace pseudo {
18
operator ++()19 void ForestNode::RecursiveIterator::operator++() {
20 auto C = Cur->children();
21 // Try to find a child of the current node to descend into.
22 for (unsigned I = 0; I < C.size(); ++I) {
23 if (Seen.insert(C[I]).second) {
24 Stack.push_back({Cur, I});
25 Cur = C[I];
26 return;
27 }
28 }
29 // Try to find a sibling af an ancestor to advance to.
30 for (; !Stack.empty(); Stack.pop_back()) {
31 C = Stack.back().Parent->children();
32 unsigned &Index = Stack.back().ChildIndex;
33 while (++Index < C.size()) {
34 if (Seen.insert(C[Index]).second) {
35 Cur = C[Index];
36 return;
37 }
38 }
39 }
40 Cur = nullptr;
41 }
42
43 llvm::iterator_range<ForestNode::RecursiveIterator>
descendants() const44 ForestNode::descendants() const {
45 return {RecursiveIterator(this), RecursiveIterator()};
46 }
47
dump(const Grammar & G) const48 std::string ForestNode::dump(const Grammar &G) const {
49 switch (kind()) {
50 case Ambiguous:
51 return llvm::formatv("{0} := <ambiguous>", G.symbolName(symbol()));
52 case Terminal:
53 return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()),
54 startTokenIndex());
55 case Sequence:
56 return G.dumpRule(rule());
57 case Opaque:
58 return llvm::formatv("{0} := <opaque>", G.symbolName(symbol()));
59 }
60 llvm_unreachable("Unhandled node kind!");
61 }
62
dumpRecursive(const Grammar & G,bool Abbreviated) const63 std::string ForestNode::dumpRecursive(const Grammar &G,
64 bool Abbreviated) const {
65 using llvm::formatv;
66 Token::Index MaxToken = 0;
67 // Count visits of nodes so we can mark those seen multiple times.
68 llvm::DenseMap<const ForestNode *, /*VisitCount*/ unsigned> VisitCounts;
69 std::function<void(const ForestNode *)> CountVisits =
70 [&](const ForestNode *P) {
71 MaxToken = std::max(MaxToken, P->startTokenIndex());
72 if (VisitCounts[P]++ > 0)
73 return; // Don't count children as multiply visited.
74 if (P->kind() == Ambiguous)
75 llvm::for_each(P->alternatives(), CountVisits);
76 else if (P->kind() == Sequence)
77 llvm::for_each(P->elements(), CountVisits);
78 };
79 CountVisits(this);
80
81 unsigned IndexWidth = std::max(3, (int)std::to_string(MaxToken).size());
82 // e.g. "[{0,4}, {1,4})" if MaxToken is 5742.
83 std::string RangeFormat = formatv("[{{0,{0}}, {{1,{0}}) ", IndexWidth);
84
85 // The box-drawing characters that should be added as a child is rendered.
86 struct LineDecoration {
87 std::string Prefix; // Prepended to every line.
88 llvm::StringRef First; // added to the child's line.
89 llvm::StringRef Subsequent; // added to descendants' lines.
90 };
91
92 // We print a "#<id>" for nonterminal forest nodes that are being dumped
93 // multiple times.
94 llvm::DenseMap<const ForestNode *, size_t> ReferenceIds;
95 std::string Result;
96 constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max();
97 std::function<void(const ForestNode *, Token::Index, llvm::Optional<SymbolID>,
98 LineDecoration &LineDec)>
99 Dump = [&](const ForestNode *P, Token::Index End,
100 llvm::Optional<SymbolID> ElidedParent,
101 LineDecoration LineDec) {
102 bool SharedNode = VisitCounts.find(P)->getSecond() > 1;
103 llvm::ArrayRef<const ForestNode *> Children;
104 auto EndOfElement = [&](size_t ChildIndex) {
105 return ChildIndex + 1 == Children.size()
106 ? End
107 : Children[ChildIndex + 1]->startTokenIndex();
108 };
109 if (P->kind() == Ambiguous) {
110 Children = P->alternatives();
111 } else if (P->kind() == Sequence) {
112 Children = P->elements();
113 if (Abbreviated) {
114 // Abbreviate chains of trivial sequence nodes.
115 // A := B, B := C, C := D, D := X Y Z
116 // becomes
117 // A~D := X Y Z
118 //
119 // We can't hide nodes that appear multiple times in the tree,
120 // because we need to call out their identity with IDs.
121 if (Children.size() == 1 && !SharedNode) {
122 assert(Children[0]->startTokenIndex() == P->startTokenIndex() &&
123 EndOfElement(0) == End);
124 return Dump(Children[0], End,
125 /*ElidedParent=*/ElidedParent.value_or(P->symbol()),
126 LineDec);
127 }
128 }
129 }
130
131 if (End == KEnd)
132 Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), "end");
133 else
134 Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), End);
135 Result += LineDec.Prefix;
136 Result += LineDec.First;
137 if (ElidedParent) {
138 Result += G.symbolName(*ElidedParent);
139 Result += "~";
140 }
141
142 if (SharedNode && P->kind() != ForestNode::Terminal) {
143 auto It = ReferenceIds.try_emplace(P, ReferenceIds.size() + 1);
144 bool First = It.second;
145 unsigned ID = It.first->second;
146
147 // The first time, print as #1. Later, =#1.
148 if (First) {
149 Result += formatv("{0} #{1}", P->dump(G), ID);
150 } else {
151 Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID);
152 Children = {}; // Don't walk the children again.
153 }
154 } else {
155 Result.append(P->dump(G));
156 }
157 Result.push_back('\n');
158
159 auto OldPrefixSize = LineDec.Prefix.size();
160 LineDec.Prefix += LineDec.Subsequent;
161 for (size_t I = 0; I < Children.size(); ++I) {
162 if (I == Children.size() - 1) {
163 LineDec.First = "└─";
164 LineDec.Subsequent = " ";
165 } else {
166 LineDec.First = "├─";
167 LineDec.Subsequent = "│ ";
168 }
169 Dump(Children[I], P->kind() == Sequence ? EndOfElement(I) : End,
170 llvm::None, LineDec);
171 }
172 LineDec.Prefix.resize(OldPrefixSize);
173 };
174 LineDecoration LineDec;
175 Dump(this, KEnd, llvm::None, LineDec);
176 return Result;
177 }
178
179 llvm::ArrayRef<ForestNode>
createTerminals(const TokenStream & Code)180 ForestArena::createTerminals(const TokenStream &Code) {
181 ForestNode *Terminals = Arena.Allocate<ForestNode>(Code.tokens().size());
182 size_t Index = 0;
183 for (const auto &T : Code.tokens()) {
184 new (&Terminals[Index])
185 ForestNode(ForestNode::Terminal, tokenSymbol(T.Kind),
186 /*Start=*/Index, /*TerminalData*/ 0);
187 ++Index;
188 }
189 NodeCount = Index;
190 return llvm::makeArrayRef(Terminals, Index);
191 }
192
193 } // namespace pseudo
194 } // namespace clang
195