1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a pass that removes irreducible control flow. 11 /// Irreducible control flow means multiple-entry loops, which this pass 12 /// transforms to have a single entry. 13 /// 14 /// Note that LLVM has a generic pass that lowers irreducible control flow, but 15 /// it linearizes control flow, turning diamonds into two triangles, which is 16 /// both unnecessary and undesirable for WebAssembly. 17 /// 18 /// The big picture: We recursively process each "region", defined as a group 19 /// of blocks with a single entry and no branches back to that entry. A region 20 /// may be the entire function body, or the inner part of a loop, i.e., the 21 /// loop's body without branches back to the loop entry. In each region we fix 22 /// up multi-entry loops by adding a new block that can dispatch to each of the 23 /// loop entries, based on the value of a label "helper" variable, and we 24 /// replace direct branches to the entries with assignments to the label 25 /// variable and a branch to the dispatch block. Then the dispatch block is the 26 /// single entry in the loop containing the previous multiple entries. After 27 /// ensuring all the loops in a region are reducible, we recurse into them. The 28 /// total time complexity of this pass is: 29 /// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops + 30 /// NumLoops * NumLoops) 31 /// 32 /// This pass is similar to what the Relooper [1] does. Both identify looping 33 /// code that requires multiple entries, and resolve it in a similar way (in 34 /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note 35 /// also that like the Relooper, we implement a "minimal" intervention: we only 36 /// use the "label" helper for the blocks we absolutely must and no others. We 37 /// also prioritize code size and do not duplicate code in order to resolve 38 /// irreducibility. The graph algorithms for finding loops and entries and so 39 /// forth are also similar to the Relooper. The main differences between this 40 /// pass and the Relooper are: 41 /// * We just care about irreducibility, so we just look at loops. 42 /// * The Relooper emits structured control flow (with ifs etc.), while we 43 /// emit a CFG. 44 /// 45 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In 46 /// Proceedings of the ACM international conference companion on Object oriented 47 /// programming systems languages and applications companion (SPLASH '11). ACM, 48 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 49 /// http://doi.acm.org/10.1145/2048147.2048224 50 /// 51 //===----------------------------------------------------------------------===// 52 53 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 54 #include "WebAssembly.h" 55 #include "WebAssemblyMachineFunctionInfo.h" 56 #include "WebAssemblySubtarget.h" 57 #include "llvm/ADT/PriorityQueue.h" 58 #include "llvm/ADT/SCCIterator.h" 59 #include "llvm/ADT/SetVector.h" 60 #include "llvm/CodeGen/MachineDominators.h" 61 #include "llvm/CodeGen/MachineFunction.h" 62 #include "llvm/CodeGen/MachineInstrBuilder.h" 63 #include "llvm/CodeGen/MachineLoopInfo.h" 64 #include "llvm/CodeGen/MachineRegisterInfo.h" 65 #include "llvm/CodeGen/Passes.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/raw_ostream.h" 68 using namespace llvm; 69 70 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow" 71 72 namespace { 73 74 using BlockVector = SmallVector<MachineBasicBlock *, 4>; 75 using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>; 76 77 // Calculates reachability in a region. Ignores branches to blocks outside of 78 // the region, and ignores branches to the region entry (for the case where 79 // the region is the inner part of a loop). 80 class ReachabilityGraph { 81 public: 82 ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks) 83 : Entry(Entry), Blocks(Blocks) { 84 #ifndef NDEBUG 85 // The region must have a single entry. 86 for (auto *MBB : Blocks) { 87 if (MBB != Entry) { 88 for (auto *Pred : MBB->predecessors()) { 89 assert(inRegion(Pred)); 90 } 91 } 92 } 93 #endif 94 calculate(); 95 } 96 97 bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) { 98 assert(inRegion(From) && inRegion(To)); 99 return Reachable[From].count(To); 100 } 101 102 // "Loopers" are blocks that are in a loop. We detect these by finding blocks 103 // that can reach themselves. 104 const BlockSet &getLoopers() { return Loopers; } 105 106 // Get all blocks that are loop entries. 107 const BlockSet &getLoopEntries() { return LoopEntries; } 108 109 // Get all blocks that enter a particular loop from outside. 110 const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) { 111 assert(inRegion(LoopEntry)); 112 return LoopEnterers[LoopEntry]; 113 } 114 115 private: 116 MachineBasicBlock *Entry; 117 const BlockSet &Blocks; 118 119 BlockSet Loopers, LoopEntries; 120 DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers; 121 122 bool inRegion(MachineBasicBlock *MBB) { return Blocks.count(MBB); } 123 124 // Maps a block to all the other blocks it can reach. 125 DenseMap<MachineBasicBlock *, BlockSet> Reachable; 126 127 void calculate() { 128 // Reachability computation work list. Contains pairs of recent additions 129 // (A, B) where we just added a link A => B. 130 using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>; 131 SmallVector<BlockPair, 4> WorkList; 132 133 // Add all relevant direct branches. 134 for (auto *MBB : Blocks) { 135 for (auto *Succ : MBB->successors()) { 136 if (Succ != Entry && inRegion(Succ)) { 137 Reachable[MBB].insert(Succ); 138 WorkList.emplace_back(MBB, Succ); 139 } 140 } 141 } 142 143 while (!WorkList.empty()) { 144 MachineBasicBlock *MBB, *Succ; 145 std::tie(MBB, Succ) = WorkList.pop_back_val(); 146 assert(inRegion(MBB) && Succ != Entry && inRegion(Succ)); 147 if (MBB != Entry) { 148 // We recently added MBB => Succ, and that means we may have enabled 149 // Pred => MBB => Succ. 150 for (auto *Pred : MBB->predecessors()) { 151 if (Reachable[Pred].insert(Succ).second) { 152 WorkList.emplace_back(Pred, Succ); 153 } 154 } 155 } 156 } 157 158 // Blocks that can return to themselves are in a loop. 159 for (auto *MBB : Blocks) { 160 if (canReach(MBB, MBB)) { 161 Loopers.insert(MBB); 162 } 163 } 164 assert(!Loopers.count(Entry)); 165 166 // Find the loop entries - loopers reachable from blocks not in that loop - 167 // and those outside blocks that reach them, the "loop enterers". 168 for (auto *Looper : Loopers) { 169 for (auto *Pred : Looper->predecessors()) { 170 // Pred can reach Looper. If Looper can reach Pred, it is in the loop; 171 // otherwise, it is a block that enters into the loop. 172 if (!canReach(Looper, Pred)) { 173 LoopEntries.insert(Looper); 174 LoopEnterers[Looper].insert(Pred); 175 } 176 } 177 } 178 } 179 }; 180 181 // Finds the blocks in a single-entry loop, given the loop entry and the 182 // list of blocks that enter the loop. 183 class LoopBlocks { 184 public: 185 LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers) 186 : Entry(Entry), Enterers(Enterers) { 187 calculate(); 188 } 189 190 BlockSet &getBlocks() { return Blocks; } 191 192 private: 193 MachineBasicBlock *Entry; 194 const BlockSet &Enterers; 195 196 BlockSet Blocks; 197 198 void calculate() { 199 // Going backwards from the loop entry, if we ignore the blocks entering 200 // from outside, we will traverse all the blocks in the loop. 201 BlockVector WorkList; 202 BlockSet AddedToWorkList; 203 Blocks.insert(Entry); 204 for (auto *Pred : Entry->predecessors()) { 205 if (!Enterers.count(Pred)) { 206 WorkList.push_back(Pred); 207 AddedToWorkList.insert(Pred); 208 } 209 } 210 211 while (!WorkList.empty()) { 212 auto *MBB = WorkList.pop_back_val(); 213 assert(!Enterers.count(MBB)); 214 if (Blocks.insert(MBB).second) { 215 for (auto *Pred : MBB->predecessors()) { 216 if (!AddedToWorkList.count(Pred)) { 217 WorkList.push_back(Pred); 218 AddedToWorkList.insert(Pred); 219 } 220 } 221 } 222 } 223 } 224 }; 225 226 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { 227 StringRef getPassName() const override { 228 return "WebAssembly Fix Irreducible Control Flow"; 229 } 230 231 bool runOnMachineFunction(MachineFunction &MF) override; 232 233 bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks, 234 MachineFunction &MF); 235 236 void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks, 237 MachineFunction &MF); 238 239 public: 240 static char ID; // Pass identification, replacement for typeid 241 WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} 242 }; 243 244 bool WebAssemblyFixIrreducibleControlFlow::processRegion( 245 MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) { 246 bool Changed = false; 247 248 // Remove irreducibility before processing child loops, which may take 249 // multiple iterations. 250 while (true) { 251 ReachabilityGraph Graph(Entry, Blocks); 252 253 bool FoundIrreducibility = false; 254 255 for (auto *LoopEntry : Graph.getLoopEntries()) { 256 // Find mutual entries - all entries which can reach this one, and 257 // are reached by it (that always includes LoopEntry itself). All mutual 258 // entries must be in the same loop, so if we have more than one, then we 259 // have irreducible control flow. 260 // 261 // Note that irreducibility may involve inner loops, e.g. imagine A 262 // starts one loop, and it has B inside it which starts an inner loop. 263 // If we add a branch from all the way on the outside to B, then in a 264 // sense B is no longer an "inner" loop, semantically speaking. We will 265 // fix that irreducibility by adding a block that dispatches to either 266 // either A or B, so B will no longer be an inner loop in our output. 267 // (A fancier approach might try to keep it as such.) 268 // 269 // Note that we still need to recurse into inner loops later, to handle 270 // the case where the irreducibility is entirely nested - we would not 271 // be able to identify that at this point, since the enclosing loop is 272 // a group of blocks all of whom can reach each other. (We'll see the 273 // irreducibility after removing branches to the top of that enclosing 274 // loop.) 275 BlockSet MutualLoopEntries; 276 MutualLoopEntries.insert(LoopEntry); 277 for (auto *OtherLoopEntry : Graph.getLoopEntries()) { 278 if (OtherLoopEntry != LoopEntry && 279 Graph.canReach(LoopEntry, OtherLoopEntry) && 280 Graph.canReach(OtherLoopEntry, LoopEntry)) { 281 MutualLoopEntries.insert(OtherLoopEntry); 282 } 283 } 284 285 if (MutualLoopEntries.size() > 1) { 286 makeSingleEntryLoop(MutualLoopEntries, Blocks, MF); 287 FoundIrreducibility = true; 288 Changed = true; 289 break; 290 } 291 } 292 // Only go on to actually process the inner loops when we are done 293 // removing irreducible control flow and changing the graph. Modifying 294 // the graph as we go is possible, and that might let us avoid looking at 295 // the already-fixed loops again if we are careful, but all that is 296 // complex and bug-prone. Since irreducible loops are rare, just starting 297 // another iteration is best. 298 if (FoundIrreducibility) { 299 continue; 300 } 301 302 for (auto *LoopEntry : Graph.getLoopEntries()) { 303 LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry)); 304 // Each of these calls to processRegion may change the graph, but are 305 // guaranteed not to interfere with each other. The only changes we make 306 // to the graph are to add blocks on the way to a loop entry. As the 307 // loops are disjoint, that means we may only alter branches that exit 308 // another loop, which are ignored when recursing into that other loop 309 // anyhow. 310 if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) { 311 Changed = true; 312 } 313 } 314 315 return Changed; 316 } 317 } 318 319 // Given a set of entries to a single loop, create a single entry for that 320 // loop by creating a dispatch block for them, routing control flow using 321 // a helper variable. Also updates Blocks with any new blocks created, so 322 // that we properly track all the blocks in the region. 323 void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( 324 BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF) { 325 assert(Entries.size() >= 2); 326 327 // Sort the entries to ensure a deterministic build. 328 BlockVector SortedEntries(Entries.begin(), Entries.end()); 329 llvm::sort(SortedEntries, 330 [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { 331 auto ANum = A->getNumber(); 332 auto BNum = B->getNumber(); 333 return ANum < BNum; 334 }); 335 336 #ifndef NDEBUG 337 for (auto Block : SortedEntries) 338 assert(Block->getNumber() != -1); 339 if (SortedEntries.size() > 1) { 340 for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E; 341 ++I) { 342 auto ANum = (*I)->getNumber(); 343 auto BNum = (*(std::next(I)))->getNumber(); 344 assert(ANum != BNum); 345 } 346 } 347 #endif 348 349 // Create a dispatch block which will contain a jump table to the entries. 350 MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); 351 MF.insert(MF.end(), Dispatch); 352 Blocks.insert(Dispatch); 353 354 // Add the jump table. 355 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 356 MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(), 357 TII.get(WebAssembly::BR_TABLE_I32)); 358 359 // Add the register which will be used to tell the jump table which block to 360 // jump to. 361 MachineRegisterInfo &MRI = MF.getRegInfo(); 362 unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 363 MIB.addReg(Reg); 364 365 // Compute the indices in the superheader, one for each bad block, and 366 // add them as successors. 367 DenseMap<MachineBasicBlock *, unsigned> Indices; 368 for (auto *Entry : SortedEntries) { 369 auto Pair = Indices.insert(std::make_pair(Entry, 0)); 370 assert(Pair.second); 371 372 unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; 373 Pair.first->second = Index; 374 375 MIB.addMBB(Entry); 376 Dispatch->addSuccessor(Entry); 377 } 378 379 // Rewrite the problematic successors for every block that wants to reach 380 // the bad blocks. For simplicity, we just introduce a new block for every 381 // edge we need to rewrite. (Fancier things are possible.) 382 383 BlockVector AllPreds; 384 for (auto *Entry : SortedEntries) { 385 for (auto *Pred : Entry->predecessors()) { 386 if (Pred != Dispatch) { 387 AllPreds.push_back(Pred); 388 } 389 } 390 } 391 392 for (MachineBasicBlock *Pred : AllPreds) { 393 DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map; 394 for (auto *Entry : Pred->successors()) { 395 if (!Entries.count(Entry)) { 396 continue; 397 } 398 399 // This is a successor we need to rewrite. 400 MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); 401 MF.insert(Pred->isLayoutSuccessor(Entry) 402 ? MachineFunction::iterator(Entry) 403 : MF.end(), 404 Split); 405 Blocks.insert(Split); 406 407 // Set the jump table's register of the index of the block we wish to 408 // jump to, and jump to the jump table. 409 BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32), 410 Reg) 411 .addImm(Indices[Entry]); 412 BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR)) 413 .addMBB(Dispatch); 414 Split->addSuccessor(Dispatch); 415 Map[Entry] = Split; 416 } 417 // Remap the terminator operands and the successor list. 418 for (MachineInstr &Term : Pred->terminators()) 419 for (auto &Op : Term.explicit_uses()) 420 if (Op.isMBB() && Indices.count(Op.getMBB())) 421 Op.setMBB(Map[Op.getMBB()]); 422 for (auto Rewrite : Map) 423 Pred->replaceSuccessor(Rewrite.first, Rewrite.second); 424 } 425 426 // Create a fake default label, because br_table requires one. 427 MIB.addMBB(MIB.getInstr() 428 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) 429 .getMBB()); 430 } 431 432 } // end anonymous namespace 433 434 char WebAssemblyFixIrreducibleControlFlow::ID = 0; 435 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE, 436 "Removes irreducible control flow", false, false) 437 438 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { 439 return new WebAssemblyFixIrreducibleControlFlow(); 440 } 441 442 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( 443 MachineFunction &MF) { 444 LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" 445 "********** Function: " 446 << MF.getName() << '\n'); 447 448 // Start the recursive process on the entire function body. 449 BlockSet AllBlocks; 450 for (auto &MBB : MF) { 451 AllBlocks.insert(&MBB); 452 } 453 454 if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) { 455 // We rewrote part of the function; recompute relevant things. 456 MF.getRegInfo().invalidateLiveness(); 457 MF.RenumberBlocks(); 458 return true; 459 } 460 461 return false; 462 } 463