1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a pass that removes irreducible control flow. 11 /// Irreducible control flow means multiple-entry loops, which this pass 12 /// transforms to have a single entry. 13 /// 14 /// Note that LLVM has a generic pass that lowers irreducible control flow, but 15 /// it linearizes control flow, turning diamonds into two triangles, which is 16 /// both unnecessary and undesirable for WebAssembly. 17 /// 18 /// The big picture: We recursively process each "region", defined as a group 19 /// of blocks with a single entry and no branches back to that entry. A region 20 /// may be the entire function body, or the inner part of a loop, i.e., the 21 /// loop's body without branches back to the loop entry. In each region we fix 22 /// up multi-entry loops by adding a new block that can dispatch to each of the 23 /// loop entries, based on the value of a label "helper" variable, and we 24 /// replace direct branches to the entries with assignments to the label 25 /// variable and a branch to the dispatch block. Then the dispatch block is the 26 /// single entry in the loop containing the previous multiple entries. After 27 /// ensuring all the loops in a region are reducible, we recurse into them. The 28 /// total time complexity of this pass is: 29 /// 30 /// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops + 31 /// NumLoops * NumLoops) 32 /// 33 /// This pass is similar to what the Relooper [1] does. Both identify looping 34 /// code that requires multiple entries, and resolve it in a similar way (in 35 /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note 36 /// also that like the Relooper, we implement a "minimal" intervention: we only 37 /// use the "label" helper for the blocks we absolutely must and no others. We 38 /// also prioritize code size and do not duplicate code in order to resolve 39 /// irreducibility. The graph algorithms for finding loops and entries and so 40 /// forth are also similar to the Relooper. The main differences between this 41 /// pass and the Relooper are: 42 /// 43 /// * We just care about irreducibility, so we just look at loops. 44 /// * The Relooper emits structured control flow (with ifs etc.), while we 45 /// emit a CFG. 46 /// 47 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In 48 /// Proceedings of the ACM international conference companion on Object oriented 49 /// programming systems languages and applications companion (SPLASH '11). ACM, 50 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 51 /// http://doi.acm.org/10.1145/2048147.2048224 52 /// 53 //===----------------------------------------------------------------------===// 54 55 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 56 #include "WebAssembly.h" 57 #include "WebAssemblySubtarget.h" 58 #include "llvm/CodeGen/MachineInstrBuilder.h" 59 #include "llvm/Support/Debug.h" 60 using namespace llvm; 61 62 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow" 63 64 namespace { 65 66 using BlockVector = SmallVector<MachineBasicBlock *, 4>; 67 using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>; 68 69 static BlockVector getSortedEntries(const BlockSet &Entries) { 70 BlockVector SortedEntries(Entries.begin(), Entries.end()); 71 llvm::sort(SortedEntries, 72 [](const MachineBasicBlock *A, const MachineBasicBlock *B) { 73 auto ANum = A->getNumber(); 74 auto BNum = B->getNumber(); 75 return ANum < BNum; 76 }); 77 return SortedEntries; 78 } 79 80 // Calculates reachability in a region. Ignores branches to blocks outside of 81 // the region, and ignores branches to the region entry (for the case where 82 // the region is the inner part of a loop). 83 class ReachabilityGraph { 84 public: 85 ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks) 86 : Entry(Entry), Blocks(Blocks) { 87 #ifndef NDEBUG 88 // The region must have a single entry. 89 for (auto *MBB : Blocks) { 90 if (MBB != Entry) { 91 for (auto *Pred : MBB->predecessors()) { 92 assert(inRegion(Pred)); 93 } 94 } 95 } 96 #endif 97 calculate(); 98 } 99 100 bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const { 101 assert(inRegion(From) && inRegion(To)); 102 auto I = Reachable.find(From); 103 if (I == Reachable.end()) 104 return false; 105 return I->second.count(To); 106 } 107 108 // "Loopers" are blocks that are in a loop. We detect these by finding blocks 109 // that can reach themselves. 110 const BlockSet &getLoopers() const { return Loopers; } 111 112 // Get all blocks that are loop entries. 113 const BlockSet &getLoopEntries() const { return LoopEntries; } 114 115 // Get all blocks that enter a particular loop from outside. 116 const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const { 117 assert(inRegion(LoopEntry)); 118 auto I = LoopEnterers.find(LoopEntry); 119 assert(I != LoopEnterers.end()); 120 return I->second; 121 } 122 123 private: 124 MachineBasicBlock *Entry; 125 const BlockSet &Blocks; 126 127 BlockSet Loopers, LoopEntries; 128 DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers; 129 130 bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); } 131 132 // Maps a block to all the other blocks it can reach. 133 DenseMap<MachineBasicBlock *, BlockSet> Reachable; 134 135 void calculate() { 136 // Reachability computation work list. Contains pairs of recent additions 137 // (A, B) where we just added a link A => B. 138 using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>; 139 SmallVector<BlockPair, 4> WorkList; 140 141 // Add all relevant direct branches. 142 for (auto *MBB : Blocks) { 143 for (auto *Succ : MBB->successors()) { 144 if (Succ != Entry && inRegion(Succ)) { 145 Reachable[MBB].insert(Succ); 146 WorkList.emplace_back(MBB, Succ); 147 } 148 } 149 } 150 151 while (!WorkList.empty()) { 152 MachineBasicBlock *MBB, *Succ; 153 std::tie(MBB, Succ) = WorkList.pop_back_val(); 154 assert(inRegion(MBB) && Succ != Entry && inRegion(Succ)); 155 if (MBB != Entry) { 156 // We recently added MBB => Succ, and that means we may have enabled 157 // Pred => MBB => Succ. 158 for (auto *Pred : MBB->predecessors()) { 159 if (Reachable[Pred].insert(Succ).second) { 160 WorkList.emplace_back(Pred, Succ); 161 } 162 } 163 } 164 } 165 166 // Blocks that can return to themselves are in a loop. 167 for (auto *MBB : Blocks) { 168 if (canReach(MBB, MBB)) { 169 Loopers.insert(MBB); 170 } 171 } 172 assert(!Loopers.count(Entry)); 173 174 // Find the loop entries - loopers reachable from blocks not in that loop - 175 // and those outside blocks that reach them, the "loop enterers". 176 for (auto *Looper : Loopers) { 177 for (auto *Pred : Looper->predecessors()) { 178 // Pred can reach Looper. If Looper can reach Pred, it is in the loop; 179 // otherwise, it is a block that enters into the loop. 180 if (!canReach(Looper, Pred)) { 181 LoopEntries.insert(Looper); 182 LoopEnterers[Looper].insert(Pred); 183 } 184 } 185 } 186 } 187 }; 188 189 // Finds the blocks in a single-entry loop, given the loop entry and the 190 // list of blocks that enter the loop. 191 class LoopBlocks { 192 public: 193 LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers) 194 : Entry(Entry), Enterers(Enterers) { 195 calculate(); 196 } 197 198 BlockSet &getBlocks() { return Blocks; } 199 200 private: 201 MachineBasicBlock *Entry; 202 const BlockSet &Enterers; 203 204 BlockSet Blocks; 205 206 void calculate() { 207 // Going backwards from the loop entry, if we ignore the blocks entering 208 // from outside, we will traverse all the blocks in the loop. 209 BlockVector WorkList; 210 BlockSet AddedToWorkList; 211 Blocks.insert(Entry); 212 for (auto *Pred : Entry->predecessors()) { 213 if (!Enterers.count(Pred)) { 214 WorkList.push_back(Pred); 215 AddedToWorkList.insert(Pred); 216 } 217 } 218 219 while (!WorkList.empty()) { 220 auto *MBB = WorkList.pop_back_val(); 221 assert(!Enterers.count(MBB)); 222 if (Blocks.insert(MBB).second) { 223 for (auto *Pred : MBB->predecessors()) { 224 if (!AddedToWorkList.count(Pred)) { 225 WorkList.push_back(Pred); 226 AddedToWorkList.insert(Pred); 227 } 228 } 229 } 230 } 231 } 232 }; 233 234 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { 235 StringRef getPassName() const override { 236 return "WebAssembly Fix Irreducible Control Flow"; 237 } 238 239 bool runOnMachineFunction(MachineFunction &MF) override; 240 241 bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks, 242 MachineFunction &MF); 243 244 void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks, 245 MachineFunction &MF, const ReachabilityGraph &Graph); 246 247 public: 248 static char ID; // Pass identification, replacement for typeid 249 WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} 250 }; 251 252 bool WebAssemblyFixIrreducibleControlFlow::processRegion( 253 MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) { 254 bool Changed = false; 255 // Remove irreducibility before processing child loops, which may take 256 // multiple iterations. 257 while (true) { 258 ReachabilityGraph Graph(Entry, Blocks); 259 260 bool FoundIrreducibility = false; 261 262 for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) { 263 // Find mutual entries - all entries which can reach this one, and 264 // are reached by it (that always includes LoopEntry itself). All mutual 265 // entries must be in the same loop, so if we have more than one, then we 266 // have irreducible control flow. 267 // 268 // (Note that we need to sort the entries here, as otherwise the order can 269 // matter: being mutual is a symmetric relationship, and each set of 270 // mutuals will be handled properly no matter which we see first. However, 271 // there can be multiple disjoint sets of mutuals, and which we process 272 // first changes the output.) 273 // 274 // Note that irreducibility may involve inner loops, e.g. imagine A 275 // starts one loop, and it has B inside it which starts an inner loop. 276 // If we add a branch from all the way on the outside to B, then in a 277 // sense B is no longer an "inner" loop, semantically speaking. We will 278 // fix that irreducibility by adding a block that dispatches to either 279 // either A or B, so B will no longer be an inner loop in our output. 280 // (A fancier approach might try to keep it as such.) 281 // 282 // Note that we still need to recurse into inner loops later, to handle 283 // the case where the irreducibility is entirely nested - we would not 284 // be able to identify that at this point, since the enclosing loop is 285 // a group of blocks all of whom can reach each other. (We'll see the 286 // irreducibility after removing branches to the top of that enclosing 287 // loop.) 288 BlockSet MutualLoopEntries; 289 MutualLoopEntries.insert(LoopEntry); 290 for (auto *OtherLoopEntry : Graph.getLoopEntries()) { 291 if (OtherLoopEntry != LoopEntry && 292 Graph.canReach(LoopEntry, OtherLoopEntry) && 293 Graph.canReach(OtherLoopEntry, LoopEntry)) { 294 MutualLoopEntries.insert(OtherLoopEntry); 295 } 296 } 297 298 if (MutualLoopEntries.size() > 1) { 299 makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph); 300 FoundIrreducibility = true; 301 Changed = true; 302 break; 303 } 304 } 305 // Only go on to actually process the inner loops when we are done 306 // removing irreducible control flow and changing the graph. Modifying 307 // the graph as we go is possible, and that might let us avoid looking at 308 // the already-fixed loops again if we are careful, but all that is 309 // complex and bug-prone. Since irreducible loops are rare, just starting 310 // another iteration is best. 311 if (FoundIrreducibility) { 312 continue; 313 } 314 315 for (auto *LoopEntry : Graph.getLoopEntries()) { 316 LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry)); 317 // Each of these calls to processRegion may change the graph, but are 318 // guaranteed not to interfere with each other. The only changes we make 319 // to the graph are to add blocks on the way to a loop entry. As the 320 // loops are disjoint, that means we may only alter branches that exit 321 // another loop, which are ignored when recursing into that other loop 322 // anyhow. 323 if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) { 324 Changed = true; 325 } 326 } 327 328 return Changed; 329 } 330 } 331 332 // Given a set of entries to a single loop, create a single entry for that 333 // loop by creating a dispatch block for them, routing control flow using 334 // a helper variable. Also updates Blocks with any new blocks created, so 335 // that we properly track all the blocks in the region. But this does not update 336 // ReachabilityGraph; this will be updated in the caller of this function as 337 // needed. 338 void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( 339 BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF, 340 const ReachabilityGraph &Graph) { 341 assert(Entries.size() >= 2); 342 343 // Sort the entries to ensure a deterministic build. 344 BlockVector SortedEntries = getSortedEntries(Entries); 345 346 #ifndef NDEBUG 347 for (auto Block : SortedEntries) 348 assert(Block->getNumber() != -1); 349 if (SortedEntries.size() > 1) { 350 for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E; 351 ++I) { 352 auto ANum = (*I)->getNumber(); 353 auto BNum = (*(std::next(I)))->getNumber(); 354 assert(ANum != BNum); 355 } 356 } 357 #endif 358 359 // Create a dispatch block which will contain a jump table to the entries. 360 MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); 361 MF.insert(MF.end(), Dispatch); 362 Blocks.insert(Dispatch); 363 364 // Add the jump table. 365 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 366 MachineInstrBuilder MIB = 367 BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32)); 368 369 // Add the register which will be used to tell the jump table which block to 370 // jump to. 371 MachineRegisterInfo &MRI = MF.getRegInfo(); 372 Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 373 MIB.addReg(Reg); 374 375 // Compute the indices in the superheader, one for each bad block, and 376 // add them as successors. 377 DenseMap<MachineBasicBlock *, unsigned> Indices; 378 for (auto *Entry : SortedEntries) { 379 auto Pair = Indices.insert(std::make_pair(Entry, 0)); 380 assert(Pair.second); 381 382 unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; 383 Pair.first->second = Index; 384 385 MIB.addMBB(Entry); 386 Dispatch->addSuccessor(Entry); 387 } 388 389 // Rewrite the problematic successors for every block that wants to reach 390 // the bad blocks. For simplicity, we just introduce a new block for every 391 // edge we need to rewrite. (Fancier things are possible.) 392 393 BlockVector AllPreds; 394 for (auto *Entry : SortedEntries) { 395 for (auto *Pred : Entry->predecessors()) { 396 if (Pred != Dispatch) { 397 AllPreds.push_back(Pred); 398 } 399 } 400 } 401 402 // This set stores predecessors within this loop. 403 DenseSet<MachineBasicBlock *> InLoop; 404 for (auto *Pred : AllPreds) { 405 for (auto *Entry : Pred->successors()) { 406 if (!Entries.count(Entry)) 407 continue; 408 if (Graph.canReach(Entry, Pred)) { 409 InLoop.insert(Pred); 410 break; 411 } 412 } 413 } 414 415 // Record if each entry has a layout predecessor. This map stores 416 // <<Predecessor is within the loop?, loop entry>, layout predecessor> 417 std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> 418 EntryToLayoutPred; 419 for (auto *Pred : AllPreds) 420 for (auto *Entry : Pred->successors()) 421 if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry)) 422 EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred; 423 424 // We need to create at most two routing blocks per entry: one for 425 // predecessors outside the loop and one for predecessors inside the loop. 426 // This map stores 427 // <<Predecessor is within the loop?, loop entry>, routing block> 428 std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map; 429 for (auto *Pred : AllPreds) { 430 bool PredInLoop = InLoop.count(Pred); 431 for (auto *Entry : Pred->successors()) { 432 if (!Entries.count(Entry) || 433 Map.count(std::make_pair(InLoop.count(Pred), Entry))) 434 continue; 435 // If there exists a layout predecessor of this entry and this predecessor 436 // is not that, we rather create a routing block after that layout 437 // predecessor to save a branch. 438 if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) && 439 EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred) 440 continue; 441 442 // This is a successor we need to rewrite. 443 MachineBasicBlock *Routing = MF.CreateMachineBasicBlock(); 444 MF.insert(Pred->isLayoutSuccessor(Entry) 445 ? MachineFunction::iterator(Entry) 446 : MF.end(), 447 Routing); 448 Blocks.insert(Routing); 449 450 // Set the jump table's register of the index of the block we wish to 451 // jump to, and jump to the jump table. 452 BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg) 453 .addImm(Indices[Entry]); 454 BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch); 455 Routing->addSuccessor(Dispatch); 456 Map[std::make_pair(PredInLoop, Entry)] = Routing; 457 } 458 } 459 460 for (auto *Pred : AllPreds) { 461 bool PredInLoop = InLoop.count(Pred); 462 // Remap the terminator operands and the successor list. 463 for (MachineInstr &Term : Pred->terminators()) 464 for (auto &Op : Term.explicit_uses()) 465 if (Op.isMBB() && Indices.count(Op.getMBB())) 466 Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]); 467 468 for (auto *Succ : Pred->successors()) { 469 if (!Entries.count(Succ)) 470 continue; 471 auto *Routing = Map[std::make_pair(PredInLoop, Succ)]; 472 Pred->replaceSuccessor(Succ, Routing); 473 } 474 } 475 476 // Create a fake default label, because br_table requires one. 477 MIB.addMBB(MIB.getInstr() 478 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) 479 .getMBB()); 480 } 481 482 } // end anonymous namespace 483 484 char WebAssemblyFixIrreducibleControlFlow::ID = 0; 485 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE, 486 "Removes irreducible control flow", false, false) 487 488 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { 489 return new WebAssemblyFixIrreducibleControlFlow(); 490 } 491 492 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( 493 MachineFunction &MF) { 494 LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" 495 "********** Function: " 496 << MF.getName() << '\n'); 497 498 // Start the recursive process on the entire function body. 499 BlockSet AllBlocks; 500 for (auto &MBB : MF) { 501 AllBlocks.insert(&MBB); 502 } 503 504 if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) { 505 // We rewrote part of the function; recompute relevant things. 506 MF.getRegInfo().invalidateLiveness(); 507 MF.RenumberBlocks(); 508 return true; 509 } 510 511 return false; 512 } 513