1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a pass that removes irreducible control flow. 11 /// Irreducible control flow means multiple-entry loops, which this pass 12 /// transforms to have a single entry. 13 /// 14 /// Note that LLVM has a generic pass that lowers irreducible control flow, but 15 /// it linearizes control flow, turning diamonds into two triangles, which is 16 /// both unnecessary and undesirable for WebAssembly. 17 /// 18 /// The big picture: We recursively process each "region", defined as a group 19 /// of blocks with a single entry and no branches back to that entry. A region 20 /// may be the entire function body, or the inner part of a loop, i.e., the 21 /// loop's body without branches back to the loop entry. In each region we fix 22 /// up multi-entry loops by adding a new block that can dispatch to each of the 23 /// loop entries, based on the value of a label "helper" variable, and we 24 /// replace direct branches to the entries with assignments to the label 25 /// variable and a branch to the dispatch block. Then the dispatch block is the 26 /// single entry in the loop containing the previous multiple entries. After 27 /// ensuring all the loops in a region are reducible, we recurse into them. The 28 /// total time complexity of this pass is: 29 /// 30 /// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops + 31 /// NumLoops * NumLoops) 32 /// 33 /// This pass is similar to what the Relooper [1] does. Both identify looping 34 /// code that requires multiple entries, and resolve it in a similar way (in 35 /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note 36 /// also that like the Relooper, we implement a "minimal" intervention: we only 37 /// use the "label" helper for the blocks we absolutely must and no others. We 38 /// also prioritize code size and do not duplicate code in order to resolve 39 /// irreducibility. The graph algorithms for finding loops and entries and so 40 /// forth are also similar to the Relooper. The main differences between this 41 /// pass and the Relooper are: 42 /// 43 /// * We just care about irreducibility, so we just look at loops. 44 /// * The Relooper emits structured control flow (with ifs etc.), while we 45 /// emit a CFG. 46 /// 47 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In 48 /// Proceedings of the ACM international conference companion on Object oriented 49 /// programming systems languages and applications companion (SPLASH '11). ACM, 50 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 51 /// http://doi.acm.org/10.1145/2048147.2048224 52 /// 53 //===----------------------------------------------------------------------===// 54 55 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 56 #include "WebAssembly.h" 57 #include "WebAssemblySubtarget.h" 58 #include "llvm/CodeGen/MachineInstrBuilder.h" 59 using namespace llvm; 60 61 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow" 62 63 namespace { 64 65 using BlockVector = SmallVector<MachineBasicBlock *, 4>; 66 using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>; 67 68 // Calculates reachability in a region. Ignores branches to blocks outside of 69 // the region, and ignores branches to the region entry (for the case where 70 // the region is the inner part of a loop). 71 class ReachabilityGraph { 72 public: 73 ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks) 74 : Entry(Entry), Blocks(Blocks) { 75 #ifndef NDEBUG 76 // The region must have a single entry. 77 for (auto *MBB : Blocks) { 78 if (MBB != Entry) { 79 for (auto *Pred : MBB->predecessors()) { 80 assert(inRegion(Pred)); 81 } 82 } 83 } 84 #endif 85 calculate(); 86 } 87 88 bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const { 89 assert(inRegion(From) && inRegion(To)); 90 auto I = Reachable.find(From); 91 if (I == Reachable.end()) 92 return false; 93 return I->second.count(To); 94 } 95 96 // "Loopers" are blocks that are in a loop. We detect these by finding blocks 97 // that can reach themselves. 98 const BlockSet &getLoopers() const { return Loopers; } 99 100 // Get all blocks that are loop entries. 101 const BlockSet &getLoopEntries() const { return LoopEntries; } 102 103 // Get all blocks that enter a particular loop from outside. 104 const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const { 105 assert(inRegion(LoopEntry)); 106 auto I = LoopEnterers.find(LoopEntry); 107 assert(I != LoopEnterers.end()); 108 return I->second; 109 } 110 111 private: 112 MachineBasicBlock *Entry; 113 const BlockSet &Blocks; 114 115 BlockSet Loopers, LoopEntries; 116 DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers; 117 118 bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); } 119 120 // Maps a block to all the other blocks it can reach. 121 DenseMap<MachineBasicBlock *, BlockSet> Reachable; 122 123 void calculate() { 124 // Reachability computation work list. Contains pairs of recent additions 125 // (A, B) where we just added a link A => B. 126 using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>; 127 SmallVector<BlockPair, 4> WorkList; 128 129 // Add all relevant direct branches. 130 for (auto *MBB : Blocks) { 131 for (auto *Succ : MBB->successors()) { 132 if (Succ != Entry && inRegion(Succ)) { 133 Reachable[MBB].insert(Succ); 134 WorkList.emplace_back(MBB, Succ); 135 } 136 } 137 } 138 139 while (!WorkList.empty()) { 140 MachineBasicBlock *MBB, *Succ; 141 std::tie(MBB, Succ) = WorkList.pop_back_val(); 142 assert(inRegion(MBB) && Succ != Entry && inRegion(Succ)); 143 if (MBB != Entry) { 144 // We recently added MBB => Succ, and that means we may have enabled 145 // Pred => MBB => Succ. 146 for (auto *Pred : MBB->predecessors()) { 147 if (Reachable[Pred].insert(Succ).second) { 148 WorkList.emplace_back(Pred, Succ); 149 } 150 } 151 } 152 } 153 154 // Blocks that can return to themselves are in a loop. 155 for (auto *MBB : Blocks) { 156 if (canReach(MBB, MBB)) { 157 Loopers.insert(MBB); 158 } 159 } 160 assert(!Loopers.count(Entry)); 161 162 // Find the loop entries - loopers reachable from blocks not in that loop - 163 // and those outside blocks that reach them, the "loop enterers". 164 for (auto *Looper : Loopers) { 165 for (auto *Pred : Looper->predecessors()) { 166 // Pred can reach Looper. If Looper can reach Pred, it is in the loop; 167 // otherwise, it is a block that enters into the loop. 168 if (!canReach(Looper, Pred)) { 169 LoopEntries.insert(Looper); 170 LoopEnterers[Looper].insert(Pred); 171 } 172 } 173 } 174 } 175 }; 176 177 // Finds the blocks in a single-entry loop, given the loop entry and the 178 // list of blocks that enter the loop. 179 class LoopBlocks { 180 public: 181 LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers) 182 : Entry(Entry), Enterers(Enterers) { 183 calculate(); 184 } 185 186 BlockSet &getBlocks() { return Blocks; } 187 188 private: 189 MachineBasicBlock *Entry; 190 const BlockSet &Enterers; 191 192 BlockSet Blocks; 193 194 void calculate() { 195 // Going backwards from the loop entry, if we ignore the blocks entering 196 // from outside, we will traverse all the blocks in the loop. 197 BlockVector WorkList; 198 BlockSet AddedToWorkList; 199 Blocks.insert(Entry); 200 for (auto *Pred : Entry->predecessors()) { 201 if (!Enterers.count(Pred)) { 202 WorkList.push_back(Pred); 203 AddedToWorkList.insert(Pred); 204 } 205 } 206 207 while (!WorkList.empty()) { 208 auto *MBB = WorkList.pop_back_val(); 209 assert(!Enterers.count(MBB)); 210 if (Blocks.insert(MBB).second) { 211 for (auto *Pred : MBB->predecessors()) { 212 if (!AddedToWorkList.count(Pred)) { 213 WorkList.push_back(Pred); 214 AddedToWorkList.insert(Pred); 215 } 216 } 217 } 218 } 219 } 220 }; 221 222 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { 223 StringRef getPassName() const override { 224 return "WebAssembly Fix Irreducible Control Flow"; 225 } 226 227 bool runOnMachineFunction(MachineFunction &MF) override; 228 229 bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks, 230 MachineFunction &MF); 231 232 void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks, 233 MachineFunction &MF); 234 235 public: 236 static char ID; // Pass identification, replacement for typeid 237 WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} 238 }; 239 240 bool WebAssemblyFixIrreducibleControlFlow::processRegion( 241 MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) { 242 bool Changed = false; 243 244 // Remove irreducibility before processing child loops, which may take 245 // multiple iterations. 246 while (true) { 247 ReachabilityGraph Graph(Entry, Blocks); 248 249 bool FoundIrreducibility = false; 250 251 for (auto *LoopEntry : Graph.getLoopEntries()) { 252 // Find mutual entries - all entries which can reach this one, and 253 // are reached by it (that always includes LoopEntry itself). All mutual 254 // entries must be in the same loop, so if we have more than one, then we 255 // have irreducible control flow. 256 // 257 // Note that irreducibility may involve inner loops, e.g. imagine A 258 // starts one loop, and it has B inside it which starts an inner loop. 259 // If we add a branch from all the way on the outside to B, then in a 260 // sense B is no longer an "inner" loop, semantically speaking. We will 261 // fix that irreducibility by adding a block that dispatches to either 262 // either A or B, so B will no longer be an inner loop in our output. 263 // (A fancier approach might try to keep it as such.) 264 // 265 // Note that we still need to recurse into inner loops later, to handle 266 // the case where the irreducibility is entirely nested - we would not 267 // be able to identify that at this point, since the enclosing loop is 268 // a group of blocks all of whom can reach each other. (We'll see the 269 // irreducibility after removing branches to the top of that enclosing 270 // loop.) 271 BlockSet MutualLoopEntries; 272 MutualLoopEntries.insert(LoopEntry); 273 for (auto *OtherLoopEntry : Graph.getLoopEntries()) { 274 if (OtherLoopEntry != LoopEntry && 275 Graph.canReach(LoopEntry, OtherLoopEntry) && 276 Graph.canReach(OtherLoopEntry, LoopEntry)) { 277 MutualLoopEntries.insert(OtherLoopEntry); 278 } 279 } 280 281 if (MutualLoopEntries.size() > 1) { 282 makeSingleEntryLoop(MutualLoopEntries, Blocks, MF); 283 FoundIrreducibility = true; 284 Changed = true; 285 break; 286 } 287 } 288 // Only go on to actually process the inner loops when we are done 289 // removing irreducible control flow and changing the graph. Modifying 290 // the graph as we go is possible, and that might let us avoid looking at 291 // the already-fixed loops again if we are careful, but all that is 292 // complex and bug-prone. Since irreducible loops are rare, just starting 293 // another iteration is best. 294 if (FoundIrreducibility) { 295 continue; 296 } 297 298 for (auto *LoopEntry : Graph.getLoopEntries()) { 299 LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry)); 300 // Each of these calls to processRegion may change the graph, but are 301 // guaranteed not to interfere with each other. The only changes we make 302 // to the graph are to add blocks on the way to a loop entry. As the 303 // loops are disjoint, that means we may only alter branches that exit 304 // another loop, which are ignored when recursing into that other loop 305 // anyhow. 306 if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) { 307 Changed = true; 308 } 309 } 310 311 return Changed; 312 } 313 } 314 315 // Given a set of entries to a single loop, create a single entry for that 316 // loop by creating a dispatch block for them, routing control flow using 317 // a helper variable. Also updates Blocks with any new blocks created, so 318 // that we properly track all the blocks in the region. 319 void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( 320 BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF) { 321 assert(Entries.size() >= 2); 322 323 // Sort the entries to ensure a deterministic build. 324 BlockVector SortedEntries(Entries.begin(), Entries.end()); 325 llvm::sort(SortedEntries, 326 [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { 327 auto ANum = A->getNumber(); 328 auto BNum = B->getNumber(); 329 return ANum < BNum; 330 }); 331 332 #ifndef NDEBUG 333 for (auto Block : SortedEntries) 334 assert(Block->getNumber() != -1); 335 if (SortedEntries.size() > 1) { 336 for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E; 337 ++I) { 338 auto ANum = (*I)->getNumber(); 339 auto BNum = (*(std::next(I)))->getNumber(); 340 assert(ANum != BNum); 341 } 342 } 343 #endif 344 345 // Create a dispatch block which will contain a jump table to the entries. 346 MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); 347 MF.insert(MF.end(), Dispatch); 348 Blocks.insert(Dispatch); 349 350 // Add the jump table. 351 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 352 MachineInstrBuilder MIB = 353 BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32)); 354 355 // Add the register which will be used to tell the jump table which block to 356 // jump to. 357 MachineRegisterInfo &MRI = MF.getRegInfo(); 358 unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 359 MIB.addReg(Reg); 360 361 // Compute the indices in the superheader, one for each bad block, and 362 // add them as successors. 363 DenseMap<MachineBasicBlock *, unsigned> Indices; 364 for (auto *Entry : SortedEntries) { 365 auto Pair = Indices.insert(std::make_pair(Entry, 0)); 366 assert(Pair.second); 367 368 unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; 369 Pair.first->second = Index; 370 371 MIB.addMBB(Entry); 372 Dispatch->addSuccessor(Entry); 373 } 374 375 // Rewrite the problematic successors for every block that wants to reach 376 // the bad blocks. For simplicity, we just introduce a new block for every 377 // edge we need to rewrite. (Fancier things are possible.) 378 379 BlockVector AllPreds; 380 for (auto *Entry : SortedEntries) { 381 for (auto *Pred : Entry->predecessors()) { 382 if (Pred != Dispatch) { 383 AllPreds.push_back(Pred); 384 } 385 } 386 } 387 388 for (MachineBasicBlock *Pred : AllPreds) { 389 DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map; 390 for (auto *Entry : Pred->successors()) { 391 if (!Entries.count(Entry)) { 392 continue; 393 } 394 395 // This is a successor we need to rewrite. 396 MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); 397 MF.insert(Pred->isLayoutSuccessor(Entry) 398 ? MachineFunction::iterator(Entry) 399 : MF.end(), 400 Split); 401 Blocks.insert(Split); 402 403 // Set the jump table's register of the index of the block we wish to 404 // jump to, and jump to the jump table. 405 BuildMI(Split, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg) 406 .addImm(Indices[Entry]); 407 BuildMI(Split, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch); 408 Split->addSuccessor(Dispatch); 409 Map[Entry] = Split; 410 } 411 // Remap the terminator operands and the successor list. 412 for (MachineInstr &Term : Pred->terminators()) 413 for (auto &Op : Term.explicit_uses()) 414 if (Op.isMBB() && Indices.count(Op.getMBB())) 415 Op.setMBB(Map[Op.getMBB()]); 416 for (auto Rewrite : Map) 417 Pred->replaceSuccessor(Rewrite.first, Rewrite.second); 418 } 419 420 // Create a fake default label, because br_table requires one. 421 MIB.addMBB(MIB.getInstr() 422 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) 423 .getMBB()); 424 } 425 426 } // end anonymous namespace 427 428 char WebAssemblyFixIrreducibleControlFlow::ID = 0; 429 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE, 430 "Removes irreducible control flow", false, false) 431 432 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { 433 return new WebAssemblyFixIrreducibleControlFlow(); 434 } 435 436 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( 437 MachineFunction &MF) { 438 LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" 439 "********** Function: " 440 << MF.getName() << '\n'); 441 442 // Start the recursive process on the entire function body. 443 BlockSet AllBlocks; 444 for (auto &MBB : MF) { 445 AllBlocks.insert(&MBB); 446 } 447 448 if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) { 449 // We rewrote part of the function; recompute relevant things. 450 MF.getRegInfo().invalidateLiveness(); 451 MF.RenumberBlocks(); 452 return true; 453 } 454 455 return false; 456 } 457