1 //===--- LRGraph.cpp - -------------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang-pseudo/grammar/LRGraph.h" 10 #include "clang-pseudo/grammar/Grammar.h" 11 #include "llvm/ADT/DenseSet.h" 12 #include "llvm/ADT/Hashing.h" 13 #include "llvm/ADT/STLExtras.h" 14 #include "llvm/ADT/StringExtras.h" 15 #include "llvm/Support/FormatVariadic.h" 16 #include "llvm/Support/raw_ostream.h" 17 18 using ItemSet = std::vector<clang::pseudo::Item>; 19 20 namespace llvm { 21 // Support clang::pseudo::Item as DenseMap keys. 22 template <> struct DenseMapInfo<ItemSet> { 23 static inline ItemSet getEmptyKey() { 24 return {DenseMapInfo<clang::pseudo::Item>::getEmptyKey()}; 25 } 26 static inline ItemSet getTombstoneKey() { 27 return {DenseMapInfo<clang::pseudo::Item>::getTombstoneKey()}; 28 } 29 static unsigned getHashValue(const ItemSet &I) { 30 return llvm::hash_combine_range(I.begin(), I.end()); 31 } 32 static bool isEqual(const ItemSet &LHS, const ItemSet &RHS) { 33 return LHS == RHS; 34 } 35 }; 36 } // namespace llvm 37 38 namespace clang { 39 namespace pseudo { 40 namespace { 41 42 struct SortByNextSymbol { 43 SortByNextSymbol(const Grammar &G) : G(G) {} 44 bool operator()(const Item &L, const Item &R) { 45 if (L.hasNext() && R.hasNext() && L.next(G) != R.next(G)) 46 return L.next(G) < R.next(G); 47 if (L.hasNext() != R.hasNext()) 48 return L.hasNext() < R.hasNext(); // a trailing dot is minimal. 49 return L < R; 50 } 51 const Grammar &G; 52 }; 53 54 // Computes a closure of the given item set S: 55 // - extends the given S to contain all options for parsing next token; 56 // - nonterminals after a dot are recursively expanded into the begin-state 57 // of all production rules that produce that nonterminal; 58 // 59 // Given 60 // Grammar rules = [ _ := E, E := E - T, E := T, T := n, T := ( E ) ] 61 // Input = [ E := . T ] 62 // returns [ E := . T, T := . n, T := . ( E ) ] 63 State closure(ItemSet Queue, const Grammar &G) { 64 llvm::DenseSet<Item> InQueue = {Queue.begin(), Queue.end()}; 65 // We reuse the passed-by-value Queue as the final result, as it's already 66 // initialized to the right elements. 67 size_t ItIndex = 0; 68 while (ItIndex < Queue.size()) { 69 const Item &ExpandingItem = Queue[ItIndex]; 70 ++ItIndex; 71 if (!ExpandingItem.hasNext()) 72 continue; 73 74 SymbolID NextSym = ExpandingItem.next(G); 75 if (pseudo::isToken(NextSym)) 76 continue; 77 auto RRange = G.table().Nonterminals[NextSym].RuleRange; 78 for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) { 79 Item NewItem = Item::start(RID, G); 80 if (InQueue.insert(NewItem).second) // new 81 Queue.push_back(std::move(NewItem)); 82 } 83 } 84 Queue.shrink_to_fit(); 85 llvm::sort(Queue, SortByNextSymbol(G)); 86 return {std::move(Queue)}; 87 } 88 89 // Returns all next (with a dot advanced) kernel item sets, partitioned by the 90 // advanced symbol. 91 // 92 // Given 93 // S = [ E := . a b, E := E . - T ] 94 // returns [ 95 // {id(a), [ E := a . b ]}, 96 // {id(-), [ E := E - . T ]} 97 // ] 98 std::vector<std::pair<SymbolID, ItemSet>> 99 nextAvailableKernelItems(const State &S, const Grammar &G) { 100 std::vector<std::pair<SymbolID, ItemSet>> Results; 101 llvm::ArrayRef<Item> AllItems = S.Items; 102 AllItems = AllItems.drop_while([](const Item &I) { return !I.hasNext(); }); 103 while (!AllItems.empty()) { 104 SymbolID AdvancedSymbol = AllItems.front().next(G); 105 auto Batch = AllItems.take_while([AdvancedSymbol, &G](const Item &I) { 106 assert(I.hasNext()); 107 return I.next(G) == AdvancedSymbol; 108 }); 109 assert(!Batch.empty()); 110 AllItems = AllItems.drop_front(Batch.size()); 111 112 // Advance a dot over the Symbol. 113 ItemSet Next; 114 for (const Item &I : Batch) 115 Next.push_back(I.advance()); 116 // sort the set to keep order determinism for hash computation. 117 llvm::sort(Next); 118 Results.push_back({AdvancedSymbol, std::move(Next)}); 119 } 120 return Results; 121 } 122 123 std::vector<std::pair<ExtensionID, SymbolID>> 124 availableRecovery(const State &S, const Grammar &G) { 125 std::vector<std::pair<ExtensionID, SymbolID>> Result; 126 for (const Item &I : S.Items) { 127 const auto &Rule = G.lookupRule(I.rule()); 128 if (I.dot() != Rule.RecoveryIndex) 129 continue; 130 Result.push_back({Rule.Recovery, Rule.seq()[Rule.RecoveryIndex]}); 131 } 132 llvm::sort(Result); 133 Result.erase(std::unique(Result.begin(), Result.end()), Result.end()); 134 return Result; 135 } 136 137 } // namespace 138 139 std::string Item::dump(const Grammar &G) const { 140 const auto &Rule = G.lookupRule(RID); 141 auto ToNames = [&](llvm::ArrayRef<SymbolID> Syms) { 142 std::vector<llvm::StringRef> Results; 143 for (auto SID : Syms) 144 Results.push_back(G.symbolName(SID)); 145 return Results; 146 }; 147 return llvm::formatv("{0} := {1} • {2}{3}", G.symbolName(Rule.Target), 148 llvm::join(ToNames(Rule.seq().take_front(DotPos)), " "), 149 llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " "), 150 Rule.RecoveryIndex == DotPos ? " [recovery]" : "") 151 .str(); 152 } 153 154 std::string State::dump(const Grammar &G, unsigned Indent) const { 155 std::string Result; 156 llvm::raw_string_ostream OS(Result); 157 for (const auto &Item : Items) 158 OS.indent(Indent) << llvm::formatv("{0}\n", Item.dump(G)); 159 return OS.str(); 160 } 161 162 std::string LRGraph::dumpForTests(const Grammar &G) const { 163 std::string Result; 164 llvm::raw_string_ostream OS(Result); 165 OS << "States:\n"; 166 for (StateID ID = 0; ID < States.size(); ++ID) { 167 OS << llvm::formatv("State {0}\n", ID); 168 OS << States[ID].dump(G, /*Indent*/ 4); 169 } 170 for (const auto &E : Edges) { 171 OS << llvm::formatv("{0} ->[{1}] {2}\n", E.Src, G.symbolName(E.Label), 172 E.Dst); 173 } 174 return OS.str(); 175 } 176 177 LRGraph LRGraph::buildLR0(const Grammar &G) { 178 class Builder { 179 public: 180 Builder(const Grammar &G) : G(G) {} 181 182 // Adds a given state if not existed. 183 std::pair<StateID, /*inserted*/ bool> insert(ItemSet KernelItems) { 184 assert(llvm::is_sorted(KernelItems) && 185 "Item must be sorted before inserting to a hash map!"); 186 auto It = StatesIndex.find(KernelItems); 187 if (It != StatesIndex.end()) 188 return {It->second, false}; 189 States.push_back(closure(KernelItems, G)); 190 StateID NextStateID = States.size() - 1; 191 StatesIndex.insert({std::move(KernelItems), NextStateID}); 192 return {NextStateID, true}; 193 } 194 195 void insertEdge(StateID Src, StateID Dst, SymbolID Label) { 196 Edges.push_back({Src, Dst, Label}); 197 } 198 199 void insertRecovery(StateID Src, ExtensionID Strategy, SymbolID Result) { 200 Recoveries.push_back({Src, Strategy, Result}); 201 } 202 203 // Returns a state with the given id. 204 const State &find(StateID ID) const { 205 assert(ID < States.size()); 206 return States[ID]; 207 } 208 209 void addStartState(SymbolID Sym, StateID State) { 210 StartStates.push_back({Sym, State}); 211 } 212 213 LRGraph build() && { 214 States.shrink_to_fit(); 215 Edges.shrink_to_fit(); 216 Recoveries.shrink_to_fit(); 217 llvm::sort(StartStates); 218 StartStates.shrink_to_fit(); 219 return LRGraph(std::move(States), std::move(Edges), std::move(Recoveries), 220 std::move(StartStates)); 221 } 222 223 private: 224 // Key is the **kernel** item sets. 225 llvm::DenseMap<ItemSet, /*index of States*/ size_t> StatesIndex; 226 std::vector<State> States; 227 std::vector<Edge> Edges; 228 std::vector<Recovery> Recoveries; 229 const Grammar &G; 230 std::vector<std::pair<SymbolID, StateID>> StartStates; 231 } Builder(G); 232 233 std::vector<StateID> PendingStates; 234 // Initialize states with the start symbol. 235 auto RRange = G.table().Nonterminals[G.underscore()].RuleRange; 236 for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) { 237 auto StartState = std::vector<Item>{Item::start(RID, G)}; 238 auto Result = Builder.insert(std::move(StartState)); 239 assert(Result.second && "State must be new"); 240 PendingStates.push_back(Result.first); 241 242 const Rule &StartRule = G.lookupRule(RID); 243 assert(StartRule.Size == 1 && 244 "Start rule must have exactly one symbol in its body!"); 245 Builder.addStartState(StartRule.seq().front(), Result.first); 246 } 247 248 while (!PendingStates.empty()) { 249 auto StateID = PendingStates.back(); 250 PendingStates.pop_back(); 251 for (auto Next : nextAvailableKernelItems(Builder.find(StateID), G)) { 252 auto Insert = Builder.insert(Next.second); 253 if (Insert.second) // new state, insert to the pending queue. 254 PendingStates.push_back(Insert.first); 255 Builder.insertEdge(StateID, Insert.first, Next.first); 256 } 257 for (auto Recovery : availableRecovery(Builder.find(StateID), G)) 258 Builder.insertRecovery(StateID, Recovery.first, Recovery.second); 259 } 260 return std::move(Builder).build(); 261 } 262 263 } // namespace pseudo 264 } // namespace clang 265