1 //===--- LRGraph.cpp - -------------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang-pseudo/grammar/LRGraph.h"
10 #include "clang-pseudo/grammar/Grammar.h"
11 #include "llvm/ADT/DenseSet.h"
12 #include "llvm/ADT/Hashing.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/Support/FormatVariadic.h"
16 #include "llvm/Support/raw_ostream.h"
17 
18 using ItemSet = std::vector<clang::pseudo::Item>;
19 
20 namespace llvm {
21 // Support clang::pseudo::Item as DenseMap keys.
22 template <> struct DenseMapInfo<ItemSet> {
23   static inline ItemSet getEmptyKey() {
24     return {DenseMapInfo<clang::pseudo::Item>::getEmptyKey()};
25   }
26   static inline ItemSet getTombstoneKey() {
27     return {DenseMapInfo<clang::pseudo::Item>::getTombstoneKey()};
28   }
29   static unsigned getHashValue(const ItemSet &I) {
30     return llvm::hash_combine_range(I.begin(), I.end());
31   }
32   static bool isEqual(const ItemSet &LHS, const ItemSet &RHS) {
33     return LHS == RHS;
34   }
35 };
36 } // namespace llvm
37 
38 namespace clang {
39 namespace pseudo {
40 namespace {
41 
42 struct SortByNextSymbol {
43   SortByNextSymbol(const Grammar &G) : G(G) {}
44   bool operator()(const Item &L, const Item &R) {
45     if (L.hasNext() && R.hasNext() && L.next(G) != R.next(G))
46       return L.next(G) < R.next(G);
47     if (L.hasNext() != R.hasNext())
48       return L.hasNext() < R.hasNext(); //  a trailing dot is minimal.
49     return L < R;
50   }
51   const Grammar &G;
52 };
53 
54 // Computes a closure of the given item set S:
55 //  - extends the given S to contain all options for parsing next token;
56 //  - nonterminals after a dot are recursively expanded into the begin-state
57 //    of all production rules that produce that nonterminal;
58 //
59 // Given
60 //   Grammar rules = [ _ := E, E := E - T, E := T, T := n, T := ( E ) ]
61 //   Input = [ E := . T ]
62 // returns [ E :=  . T, T := . n, T := . ( E ) ]
63 State closure(ItemSet Queue, const Grammar &G) {
64   llvm::DenseSet<Item> InQueue = {Queue.begin(), Queue.end()};
65   // We reuse the passed-by-value Queue as the final result, as it's already
66   // initialized to the right elements.
67   size_t ItIndex = 0;
68   while (ItIndex < Queue.size()) {
69     const Item &ExpandingItem = Queue[ItIndex];
70     ++ItIndex;
71     if (!ExpandingItem.hasNext())
72       continue;
73 
74     SymbolID NextSym = ExpandingItem.next(G);
75     if (pseudo::isToken(NextSym))
76       continue;
77     auto RRange = G.table().Nonterminals[NextSym].RuleRange;
78     for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) {
79       Item NewItem = Item::start(RID, G);
80       if (InQueue.insert(NewItem).second) // new
81         Queue.push_back(std::move(NewItem));
82     }
83   }
84   Queue.shrink_to_fit();
85   llvm::sort(Queue, SortByNextSymbol(G));
86   return {std::move(Queue)};
87 }
88 
89 // Returns all next (with a dot advanced) kernel item sets, partitioned by the
90 // advanced symbol.
91 //
92 // Given
93 //  S = [ E := . a b, E := E . - T ]
94 // returns [
95 //   {id(a), [ E := a . b ]},
96 //   {id(-), [ E := E - . T ]}
97 // ]
98 std::vector<std::pair<SymbolID, ItemSet>>
99 nextAvailableKernelItems(const State &S, const Grammar &G) {
100   std::vector<std::pair<SymbolID, ItemSet>> Results;
101   llvm::ArrayRef<Item> AllItems = S.Items;
102   AllItems = AllItems.drop_while([](const Item &I) { return !I.hasNext(); });
103   while (!AllItems.empty()) {
104     SymbolID AdvancedSymbol = AllItems.front().next(G);
105     auto Batch = AllItems.take_while([AdvancedSymbol, &G](const Item &I) {
106       assert(I.hasNext());
107       return I.next(G) == AdvancedSymbol;
108     });
109     assert(!Batch.empty());
110     AllItems = AllItems.drop_front(Batch.size());
111 
112     // Advance a dot over the Symbol.
113     ItemSet Next;
114     for (const Item &I : Batch)
115       Next.push_back(I.advance());
116     // sort the set to keep order determinism for hash computation.
117     llvm::sort(Next);
118     Results.push_back({AdvancedSymbol, std::move(Next)});
119   }
120   return Results;
121 }
122 
123 } // namespace
124 
125 std::string Item::dump(const Grammar &G) const {
126   const auto &Rule = G.lookupRule(RID);
127   auto ToNames = [&](llvm::ArrayRef<SymbolID> Syms) {
128     std::vector<llvm::StringRef> Results;
129     for (auto SID : Syms)
130       Results.push_back(G.symbolName(SID));
131     return Results;
132   };
133   return llvm::formatv("{0} := {1} • {2}", G.symbolName(Rule.Target),
134                        llvm::join(ToNames(Rule.seq().take_front(DotPos)), " "),
135                        llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " "))
136       .str();
137 }
138 
139 std::string State::dump(const Grammar &G, unsigned Indent) const {
140   std::string Result;
141   llvm::raw_string_ostream OS(Result);
142   for (const auto &Item : Items)
143     OS.indent(Indent) << llvm::formatv("{0}\n", Item.dump(G));
144   return OS.str();
145 }
146 
147 std::string LRGraph::dumpForTests(const Grammar &G) const {
148   std::string Result;
149   llvm::raw_string_ostream OS(Result);
150   OS << "States:\n";
151   for (StateID ID = 0; ID < States.size(); ++ID) {
152     OS << llvm::formatv("State {0}\n", ID);
153     OS << States[ID].dump(G, /*Indent*/ 4);
154   }
155   for (const auto &E : Edges) {
156     OS << llvm::formatv("{0} ->[{1}] {2}\n", E.Src, G.symbolName(E.Label),
157                         E.Dst);
158   }
159   return OS.str();
160 }
161 
162 LRGraph LRGraph::buildLR0(const Grammar &G) {
163   class Builder {
164   public:
165     Builder(const Grammar &G) : G(G) {}
166 
167     // Adds a given state if not existed.
168     std::pair<StateID, /*inserted*/ bool> insert(ItemSet KernelItems) {
169       assert(llvm::is_sorted(KernelItems) &&
170              "Item must be sorted before inserting to a hash map!");
171       auto It = StatesIndex.find(KernelItems);
172       if (It != StatesIndex.end())
173         return {It->second, false};
174       States.push_back(closure(KernelItems, G));
175       StateID NextStateID = States.size() - 1;
176       StatesIndex.insert({std::move(KernelItems), NextStateID});
177       return {NextStateID, true};
178     }
179 
180     void insertEdge(StateID Src, StateID Dst, SymbolID Label) {
181       Edges.push_back({Src, Dst, Label});
182     }
183 
184     // Returns a state with the given id.
185     const State &find(StateID ID) const {
186       assert(ID < States.size());
187       return States[ID];
188     }
189 
190     void addStartState(SymbolID Sym, StateID State) {
191       StartStates.push_back({Sym, State});
192     }
193 
194     LRGraph build() && {
195       States.shrink_to_fit();
196       Edges.shrink_to_fit();
197       llvm::sort(StartStates);
198       StartStates.shrink_to_fit();
199       return LRGraph(std::move(States), std::move(Edges),
200                      std::move(StartStates));
201     }
202 
203   private:
204     // Key is the **kernel** item sets.
205     llvm::DenseMap<ItemSet, /*index of States*/ size_t> StatesIndex;
206     std::vector<State> States;
207     std::vector<Edge> Edges;
208     const Grammar &G;
209     std::vector<std::pair<SymbolID, StateID>> StartStates;
210   } Builder(G);
211 
212   std::vector<StateID> PendingStates;
213   // Initialize states with the start symbol.
214   auto RRange = G.table().Nonterminals[G.underscore()].RuleRange;
215   for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) {
216     auto StartState = std::vector<Item>{Item::start(RID, G)};
217     auto Result = Builder.insert(std::move(StartState));
218     assert(Result.second && "State must be new");
219     PendingStates.push_back(Result.first);
220 
221     const Rule &StartRule = G.lookupRule(RID);
222     assert(StartRule.Size == 1 &&
223            "Start rule must have exactly one symbol in its body!");
224     Builder.addStartState(StartRule.seq().front(), Result.first);
225   }
226 
227   while (!PendingStates.empty()) {
228     auto CurrentStateID = PendingStates.back();
229     PendingStates.pop_back();
230     for (auto Next :
231          nextAvailableKernelItems(Builder.find(CurrentStateID), G)) {
232       auto Insert = Builder.insert(Next.second);
233       if (Insert.second) // new state, insert to the pending queue.
234         PendingStates.push_back(Insert.first);
235       Builder.insertEdge(CurrentStateID, Insert.first, Next.first);
236     }
237   }
238   return std::move(Builder).build();
239 }
240 
241 } // namespace pseudo
242 } // namespace clang
243