1 //===--- Forest.cpp - Parse forest  ------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang-pseudo/Forest.h"
10 #include "clang-pseudo/Token.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/None.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/Support/FormatVariadic.h"
15 
16 namespace clang {
17 namespace pseudo {
18 
operator ++()19 void ForestNode::RecursiveIterator::operator++() {
20   auto C = Cur->children();
21   // Try to find a child of the current node to descend into.
22   for (unsigned I = 0; I < C.size(); ++I) {
23     if (Seen.insert(C[I]).second) {
24       Stack.push_back({Cur, I});
25       Cur = C[I];
26       return;
27     }
28   }
29   // Try to find a sibling af an ancestor to advance to.
30   for (; !Stack.empty(); Stack.pop_back()) {
31     C = Stack.back().Parent->children();
32     unsigned &Index = Stack.back().ChildIndex;
33     while (++Index < C.size()) {
34       if (Seen.insert(C[Index]).second) {
35         Cur = C[Index];
36         return;
37       }
38     }
39   }
40   Cur = nullptr;
41 }
42 
43 llvm::iterator_range<ForestNode::RecursiveIterator>
descendants() const44 ForestNode::descendants() const {
45   return {RecursiveIterator(this), RecursiveIterator()};
46 }
47 
dump(const Grammar & G) const48 std::string ForestNode::dump(const Grammar &G) const {
49   switch (kind()) {
50   case Ambiguous:
51     return llvm::formatv("{0} := <ambiguous>", G.symbolName(symbol()));
52   case Terminal:
53     return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()),
54                          startTokenIndex());
55   case Sequence:
56     return G.dumpRule(rule());
57   case Opaque:
58     return llvm::formatv("{0} := <opaque>", G.symbolName(symbol()));
59   }
60   llvm_unreachable("Unhandled node kind!");
61 }
62 
dumpRecursive(const Grammar & G,bool Abbreviated) const63 std::string ForestNode::dumpRecursive(const Grammar &G,
64                                       bool Abbreviated) const {
65   using llvm::formatv;
66   Token::Index MaxToken = 0;
67   // Count visits of nodes so we can mark those seen multiple times.
68   llvm::DenseMap<const ForestNode *, /*VisitCount*/ unsigned> VisitCounts;
69   std::function<void(const ForestNode *)> CountVisits =
70       [&](const ForestNode *P) {
71         MaxToken = std::max(MaxToken, P->startTokenIndex());
72         if (VisitCounts[P]++ > 0)
73           return; // Don't count children as multiply visited.
74         if (P->kind() == Ambiguous)
75           llvm::for_each(P->alternatives(), CountVisits);
76         else if (P->kind() == Sequence)
77           llvm::for_each(P->elements(), CountVisits);
78       };
79   CountVisits(this);
80 
81   unsigned IndexWidth = std::max(3, (int)std::to_string(MaxToken).size());
82   // e.g. "[{0,4}, {1,4})" if MaxToken is 5742.
83   std::string RangeFormat = formatv("[{{0,{0}}, {{1,{0}}) ", IndexWidth);
84 
85   // The box-drawing characters that should be added as a child is rendered.
86   struct LineDecoration {
87     std::string Prefix;         // Prepended to every line.
88     llvm::StringRef First;      // added to the child's line.
89     llvm::StringRef Subsequent; // added to descendants' lines.
90   };
91 
92   // We print a "#<id>" for nonterminal forest nodes that are being dumped
93   // multiple times.
94   llvm::DenseMap<const ForestNode *, size_t> ReferenceIds;
95   std::string Result;
96   constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max();
97   std::function<void(const ForestNode *, Token::Index, llvm::Optional<SymbolID>,
98                      LineDecoration &LineDec)>
99       Dump = [&](const ForestNode *P, Token::Index End,
100                  llvm::Optional<SymbolID> ElidedParent,
101                  LineDecoration LineDec) {
102         bool SharedNode = VisitCounts.find(P)->getSecond() > 1;
103         llvm::ArrayRef<const ForestNode *> Children;
104         auto EndOfElement = [&](size_t ChildIndex) {
105           return ChildIndex + 1 == Children.size()
106                      ? End
107                      : Children[ChildIndex + 1]->startTokenIndex();
108         };
109         if (P->kind() == Ambiguous) {
110           Children = P->alternatives();
111         } else if (P->kind() == Sequence) {
112           Children = P->elements();
113           if (Abbreviated) {
114             // Abbreviate chains of trivial sequence nodes.
115             //    A := B, B := C, C := D, D := X Y Z
116             // becomes
117             //    A~D := X Y Z
118             //
119             // We can't hide nodes that appear multiple times in the tree,
120             // because we need to call out their identity with IDs.
121             if (Children.size() == 1 && !SharedNode) {
122               assert(Children[0]->startTokenIndex() == P->startTokenIndex() &&
123                      EndOfElement(0) == End);
124               return Dump(Children[0], End,
125                           /*ElidedParent=*/ElidedParent.value_or(P->symbol()),
126                           LineDec);
127             }
128           }
129         }
130 
131         if (End == KEnd)
132           Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), "end");
133         else
134           Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), End);
135         Result += LineDec.Prefix;
136         Result += LineDec.First;
137         if (ElidedParent) {
138           Result += G.symbolName(*ElidedParent);
139           Result += "~";
140         }
141 
142         if (SharedNode && P->kind() != ForestNode::Terminal) {
143           auto It = ReferenceIds.try_emplace(P, ReferenceIds.size() + 1);
144           bool First = It.second;
145           unsigned ID = It.first->second;
146 
147           // The first time, print as #1. Later, =#1.
148           if (First) {
149             Result += formatv("{0} #{1}", P->dump(G), ID);
150           } else {
151             Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID);
152             Children = {}; // Don't walk the children again.
153           }
154         } else {
155           Result.append(P->dump(G));
156         }
157         Result.push_back('\n');
158 
159         auto OldPrefixSize = LineDec.Prefix.size();
160         LineDec.Prefix += LineDec.Subsequent;
161         for (size_t I = 0; I < Children.size(); ++I) {
162           if (I == Children.size() - 1) {
163             LineDec.First = "└─";
164             LineDec.Subsequent = "  ";
165           } else {
166             LineDec.First = "├─";
167             LineDec.Subsequent = "│ ";
168           }
169           Dump(Children[I], P->kind() == Sequence ? EndOfElement(I) : End,
170                llvm::None, LineDec);
171         }
172         LineDec.Prefix.resize(OldPrefixSize);
173       };
174   LineDecoration LineDec;
175   Dump(this, KEnd, llvm::None, LineDec);
176   return Result;
177 }
178 
179 llvm::ArrayRef<ForestNode>
createTerminals(const TokenStream & Code)180 ForestArena::createTerminals(const TokenStream &Code) {
181   ForestNode *Terminals = Arena.Allocate<ForestNode>(Code.tokens().size());
182   size_t Index = 0;
183   for (const auto &T : Code.tokens()) {
184     new (&Terminals[Index])
185         ForestNode(ForestNode::Terminal, tokenSymbol(T.Kind),
186                    /*Start=*/Index, /*TerminalData*/ 0);
187     ++Index;
188   }
189   NodeCount = Index;
190   return llvm::makeArrayRef(Terminals, Index);
191 }
192 
193 } // namespace pseudo
194 } // namespace clang
195