1 //===--- LRGraph.cpp - -------------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang-pseudo/grammar/LRGraph.h" 10 #include "clang-pseudo/grammar/Grammar.h" 11 #include "llvm/ADT/DenseSet.h" 12 #include "llvm/ADT/Hashing.h" 13 #include "llvm/ADT/STLExtras.h" 14 #include "llvm/ADT/StringExtras.h" 15 #include "llvm/Support/FormatVariadic.h" 16 #include "llvm/Support/raw_ostream.h" 17 18 using ItemSet = std::vector<clang::pseudo::Item>; 19 20 namespace llvm { 21 // Support clang::pseudo::Item as DenseMap keys. 22 template <> struct DenseMapInfo<ItemSet> { 23 static inline ItemSet getEmptyKey() { 24 return {DenseMapInfo<clang::pseudo::Item>::getEmptyKey()}; 25 } 26 static inline ItemSet getTombstoneKey() { 27 return {DenseMapInfo<clang::pseudo::Item>::getTombstoneKey()}; 28 } 29 static unsigned getHashValue(const ItemSet &I) { 30 return llvm::hash_combine_range(I.begin(), I.end()); 31 } 32 static bool isEqual(const ItemSet &LHS, const ItemSet &RHS) { 33 return LHS == RHS; 34 } 35 }; 36 } // namespace llvm 37 38 namespace clang { 39 namespace pseudo { 40 namespace { 41 42 struct SortByNextSymbol { 43 SortByNextSymbol(const Grammar &G) : G(G) {} 44 bool operator()(const Item &L, const Item &R) { 45 if (L.hasNext() && R.hasNext() && L.next(G) != R.next(G)) 46 return L.next(G) < R.next(G); 47 if (L.hasNext() != R.hasNext()) 48 return L.hasNext() < R.hasNext(); // a trailing dot is minimal. 49 return L < R; 50 } 51 const Grammar &G; 52 }; 53 54 // Computes a closure of the given item set S: 55 // - extends the given S to contain all options for parsing next token; 56 // - nonterminals after a dot are recursively expanded into the begin-state 57 // of all production rules that produce that nonterminal; 58 // 59 // Given 60 // Grammar rules = [ _ := E, E := E - T, E := T, T := n, T := ( E ) ] 61 // Input = [ E := . T ] 62 // returns [ E := . T, T := . n, T := . ( E ) ] 63 State closure(ItemSet Queue, const Grammar &G) { 64 llvm::DenseSet<Item> InQueue = {Queue.begin(), Queue.end()}; 65 // We reuse the passed-by-value Queue as the final result, as it's already 66 // initialized to the right elements. 67 size_t ItIndex = 0; 68 while (ItIndex < Queue.size()) { 69 const Item &ExpandingItem = Queue[ItIndex]; 70 ++ItIndex; 71 if (!ExpandingItem.hasNext()) 72 continue; 73 74 SymbolID NextSym = ExpandingItem.next(G); 75 if (pseudo::isToken(NextSym)) 76 continue; 77 auto RRange = G.table().Nonterminals[NextSym].RuleRange; 78 for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) { 79 Item NewItem = Item::start(RID, G); 80 if (InQueue.insert(NewItem).second) // new 81 Queue.push_back(std::move(NewItem)); 82 } 83 } 84 Queue.shrink_to_fit(); 85 llvm::sort(Queue, SortByNextSymbol(G)); 86 return {std::move(Queue)}; 87 } 88 89 // Returns all next (with a dot advanced) kernel item sets, partitioned by the 90 // advanced symbol. 91 // 92 // Given 93 // S = [ E := . a b, E := E . - T ] 94 // returns [ 95 // {id(a), [ E := a . b ]}, 96 // {id(-), [ E := E - . T ]} 97 // ] 98 std::vector<std::pair<SymbolID, ItemSet>> 99 nextAvailableKernelItems(const State &S, const Grammar &G) { 100 std::vector<std::pair<SymbolID, ItemSet>> Results; 101 llvm::ArrayRef<Item> AllItems = S.Items; 102 AllItems = AllItems.drop_while([](const Item &I) { return !I.hasNext(); }); 103 while (!AllItems.empty()) { 104 SymbolID AdvancedSymbol = AllItems.front().next(G); 105 auto Batch = AllItems.take_while([AdvancedSymbol, &G](const Item &I) { 106 assert(I.hasNext()); 107 return I.next(G) == AdvancedSymbol; 108 }); 109 assert(!Batch.empty()); 110 AllItems = AllItems.drop_front(Batch.size()); 111 112 // Advance a dot over the Symbol. 113 ItemSet Next; 114 for (const Item &I : Batch) 115 Next.push_back(I.advance()); 116 // sort the set to keep order determinism for hash computation. 117 llvm::sort(Next); 118 Results.push_back({AdvancedSymbol, std::move(Next)}); 119 } 120 return Results; 121 } 122 123 } // namespace 124 125 std::string Item::dump(const Grammar &G) const { 126 const auto &Rule = G.lookupRule(RID); 127 auto ToNames = [&](llvm::ArrayRef<SymbolID> Syms) { 128 std::vector<llvm::StringRef> Results; 129 for (auto SID : Syms) 130 Results.push_back(G.symbolName(SID)); 131 return Results; 132 }; 133 return llvm::formatv("{0} := {1} • {2}", G.symbolName(Rule.Target), 134 llvm::join(ToNames(Rule.seq().take_front(DotPos)), " "), 135 llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " ")) 136 .str(); 137 } 138 139 std::string State::dump(const Grammar &G, unsigned Indent) const { 140 std::string Result; 141 llvm::raw_string_ostream OS(Result); 142 for (const auto &Item : Items) 143 OS.indent(Indent) << llvm::formatv("{0}\n", Item.dump(G)); 144 return OS.str(); 145 } 146 147 std::string LRGraph::dumpForTests(const Grammar &G) const { 148 std::string Result; 149 llvm::raw_string_ostream OS(Result); 150 OS << "States:\n"; 151 for (StateID ID = 0; ID < States.size(); ++ID) { 152 OS << llvm::formatv("State {0}\n", ID); 153 OS << States[ID].dump(G, /*Indent*/ 4); 154 } 155 for (const auto &E : Edges) { 156 OS << llvm::formatv("{0} ->[{1}] {2}\n", E.Src, G.symbolName(E.Label), 157 E.Dst); 158 } 159 return OS.str(); 160 } 161 162 LRGraph LRGraph::buildLR0(const Grammar &G) { 163 class Builder { 164 public: 165 Builder(const Grammar &G) : G(G) {} 166 167 // Adds a given state if not existed. 168 std::pair<StateID, /*inserted*/ bool> insert(ItemSet KernelItems) { 169 assert(llvm::is_sorted(KernelItems) && 170 "Item must be sorted before inserting to a hash map!"); 171 auto It = StatesIndex.find(KernelItems); 172 if (It != StatesIndex.end()) 173 return {It->second, false}; 174 States.push_back(closure(KernelItems, G)); 175 StateID NextStateID = States.size() - 1; 176 StatesIndex.insert({std::move(KernelItems), NextStateID}); 177 return {NextStateID, true}; 178 } 179 180 void insertEdge(StateID Src, StateID Dst, SymbolID Label) { 181 Edges.push_back({Src, Dst, Label}); 182 } 183 184 // Returns a state with the given id. 185 const State &find(StateID ID) const { 186 assert(ID < States.size()); 187 return States[ID]; 188 } 189 190 void addStartState(SymbolID Sym, StateID State) { 191 StartStates.push_back({Sym, State}); 192 } 193 194 LRGraph build() && { 195 States.shrink_to_fit(); 196 Edges.shrink_to_fit(); 197 llvm::sort(StartStates); 198 StartStates.shrink_to_fit(); 199 return LRGraph(std::move(States), std::move(Edges), 200 std::move(StartStates)); 201 } 202 203 private: 204 // Key is the **kernel** item sets. 205 llvm::DenseMap<ItemSet, /*index of States*/ size_t> StatesIndex; 206 std::vector<State> States; 207 std::vector<Edge> Edges; 208 const Grammar &G; 209 std::vector<std::pair<SymbolID, StateID>> StartStates; 210 } Builder(G); 211 212 std::vector<StateID> PendingStates; 213 // Initialize states with the start symbol. 214 auto RRange = G.table().Nonterminals[G.underscore()].RuleRange; 215 for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) { 216 auto StartState = std::vector<Item>{Item::start(RID, G)}; 217 auto Result = Builder.insert(std::move(StartState)); 218 assert(Result.second && "State must be new"); 219 PendingStates.push_back(Result.first); 220 221 const Rule &StartRule = G.lookupRule(RID); 222 assert(StartRule.Size == 1 && 223 "Start rule must have exactly one symbol in its body!"); 224 Builder.addStartState(StartRule.seq().front(), Result.first); 225 } 226 227 while (!PendingStates.empty()) { 228 auto CurrentStateID = PendingStates.back(); 229 PendingStates.pop_back(); 230 for (auto Next : 231 nextAvailableKernelItems(Builder.find(CurrentStateID), G)) { 232 auto Insert = Builder.insert(Next.second); 233 if (Insert.second) // new state, insert to the pending queue. 234 PendingStates.push_back(Insert.first); 235 Builder.insertEdge(CurrentStateID, Insert.first, Next.first); 236 } 237 } 238 return std::move(Builder).build(); 239 } 240 241 } // namespace pseudo 242 } // namespace clang 243