1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a pass that transforms irreducible control flow into 11 /// reducible control flow. Irreducible control flow means multiple-entry 12 /// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo 13 /// due to being unnatural. 14 /// 15 /// Note that LLVM has a generic pass that lowers irreducible control flow, but 16 /// it linearizes control flow, turning diamonds into two triangles, which is 17 /// both unnecessary and undesirable for WebAssembly. 18 /// 19 /// The big picture: Ignoring natural loops (seeing them monolithically), we 20 /// find all the blocks which can return to themselves ("loopers"). Loopers 21 /// reachable from the non-loopers are loop entries: if there are 2 or more, 22 /// then we have irreducible control flow. We fix that as follows: a new block 23 /// is created that can dispatch to each of the loop entries, based on the 24 /// value of a label "helper" variable, and we replace direct branches to the 25 /// entries with assignments to the label variable and a branch to the dispatch 26 /// block. Then the dispatch block is the single entry in a new natural loop. 27 /// 28 /// This is similar to what the Relooper [1] does, both identify looping code 29 /// that requires multiple entries, and resolve it in a similar way. In 30 /// Relooper terminology, we implement a Multiple shape in a Loop shape. Note 31 /// also that like the Relooper, we implement a "minimal" intervention: we only 32 /// use the "label" helper for the blocks we absolutely must and no others. We 33 /// also prioritize code size and do not perform node splitting (i.e. we don't 34 /// duplicate code in order to resolve irreducibility). 35 /// 36 /// The difference between this code and the Relooper is that the Relooper also 37 /// generates ifs and loops and works in a recursive manner, knowing at each 38 /// point what the entries are, and recursively breaks down the problem. Here 39 /// we just want to resolve irreducible control flow, and we also want to use 40 /// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to 41 /// identify natural loops, etc., and we start with the whole CFG and must 42 /// identify both the looping code and its entries. 43 /// 44 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In 45 /// Proceedings of the ACM international conference companion on Object oriented 46 /// programming systems languages and applications companion (SPLASH '11). ACM, 47 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 48 /// http://doi.acm.org/10.1145/2048147.2048224 49 /// 50 //===----------------------------------------------------------------------===// 51 52 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 53 #include "WebAssembly.h" 54 #include "WebAssemblyMachineFunctionInfo.h" 55 #include "WebAssemblySubtarget.h" 56 #include "llvm/ADT/PriorityQueue.h" 57 #include "llvm/ADT/SCCIterator.h" 58 #include "llvm/ADT/SetVector.h" 59 #include "llvm/CodeGen/MachineDominators.h" 60 #include "llvm/CodeGen/MachineFunction.h" 61 #include "llvm/CodeGen/MachineInstrBuilder.h" 62 #include "llvm/CodeGen/MachineLoopInfo.h" 63 #include "llvm/CodeGen/MachineRegisterInfo.h" 64 #include "llvm/CodeGen/Passes.h" 65 #include "llvm/Support/Debug.h" 66 #include "llvm/Support/raw_ostream.h" 67 using namespace llvm; 68 69 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow" 70 71 namespace { 72 73 class LoopFixer { 74 public: 75 LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop) 76 : MF(MF), MLI(MLI), Loop(Loop) {} 77 78 // Run the fixer on the given inputs. Returns whether changes were made. 79 bool run(); 80 81 private: 82 MachineFunction &MF; 83 MachineLoopInfo &MLI; 84 MachineLoop *Loop; 85 86 MachineBasicBlock *Header; 87 SmallPtrSet<MachineBasicBlock *, 4> LoopBlocks; 88 89 using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>; 90 DenseMap<MachineBasicBlock *, BlockSet> Reachable; 91 92 // The worklist contains pairs of recent additions, (a, b), where we just 93 // added a link a => b. 94 using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>; 95 SmallVector<BlockPair, 4> WorkList; 96 97 // Get a canonical block to represent a block or a loop: the block, or if in 98 // an inner loop, the loop header, of it in an outer loop scope, we can 99 // ignore it. We need to call this on all blocks we work on. 100 MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) { 101 MachineLoop *InnerLoop = MLI.getLoopFor(MBB); 102 if (InnerLoop == Loop) { 103 return MBB; 104 } else { 105 // This is either in an outer or an inner loop, and not in ours. 106 if (!LoopBlocks.count(MBB)) { 107 // It's in outer code, ignore it. 108 return nullptr; 109 } 110 assert(InnerLoop); 111 // It's in an inner loop, canonicalize it to the header of that loop. 112 return InnerLoop->getHeader(); 113 } 114 } 115 116 // For a successor we can additionally ignore it if it's a branch back to a 117 // natural loop top, as when we are in the scope of a loop, we just care 118 // about internal irreducibility, and can ignore the loop we are in. We need 119 // to call this on all blocks in a context where they are a successor. 120 MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) { 121 if (Loop && MBB == Loop->getHeader()) { 122 // Ignore branches going to the loop's natural header. 123 return nullptr; 124 } 125 return canonicalize(MBB); 126 } 127 128 // Potentially insert a new reachable edge, and if so, note it as further 129 // work. 130 void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 131 assert(MBB == canonicalize(MBB)); 132 assert(Succ); 133 // Succ may not be interesting as a sucessor. 134 Succ = canonicalizeSuccessor(Succ); 135 if (!Succ) 136 return; 137 if (Reachable[MBB].insert(Succ).second) { 138 // For there to be further work, it means that we have 139 // X => MBB => Succ 140 // for some other X, and in that case X => Succ would be a new edge for 141 // us to discover later. However, if we don't care about MBB as a 142 // successor, then we don't care about that anyhow. 143 if (canonicalizeSuccessor(MBB)) { 144 WorkList.emplace_back(MBB, Succ); 145 } 146 } 147 } 148 }; 149 150 bool LoopFixer::run() { 151 Header = Loop ? Loop->getHeader() : &*MF.begin(); 152 153 // Identify all the blocks in this loop scope. 154 if (Loop) { 155 for (auto *MBB : Loop->getBlocks()) { 156 LoopBlocks.insert(MBB); 157 } 158 } else { 159 for (auto &MBB : MF) { 160 LoopBlocks.insert(&MBB); 161 } 162 } 163 164 // Compute which (canonicalized) blocks each block can reach. 165 166 // Add all the initial work. 167 for (auto *MBB : LoopBlocks) { 168 MachineLoop *InnerLoop = MLI.getLoopFor(MBB); 169 170 if (InnerLoop == Loop) { 171 for (auto *Succ : MBB->successors()) { 172 maybeInsert(MBB, Succ); 173 } 174 } else { 175 // It can't be in an outer loop - we loop on LoopBlocks - and so it must 176 // be an inner loop. 177 assert(InnerLoop); 178 // Check if we are the canonical block for this loop. 179 if (canonicalize(MBB) != MBB) { 180 continue; 181 } 182 // The successors are those of the loop. 183 SmallVector<MachineBasicBlock *, 2> ExitBlocks; 184 InnerLoop->getExitBlocks(ExitBlocks); 185 for (auto *Succ : ExitBlocks) { 186 maybeInsert(MBB, Succ); 187 } 188 } 189 } 190 191 // Do work until we are all done. 192 while (!WorkList.empty()) { 193 MachineBasicBlock *MBB; 194 MachineBasicBlock *Succ; 195 std::tie(MBB, Succ) = WorkList.pop_back_val(); 196 // The worklist item is an edge we just added, so it must have valid blocks 197 // (and not something canonicalized to nullptr). 198 assert(MBB); 199 assert(Succ); 200 // The successor in that pair must also be a valid successor. 201 assert(MBB == canonicalizeSuccessor(MBB)); 202 // We recently added MBB => Succ, and that means we may have enabled 203 // Pred => MBB => Succ. Check all the predecessors. Note that our loop here 204 // is correct for both a block and a block representing a loop, as the loop 205 // is natural and so the predecessors are all predecessors of the loop 206 // header, which is the block we have here. 207 for (auto *Pred : MBB->predecessors()) { 208 // Canonicalize, make sure it's relevant, and check it's not the same 209 // block (an update to the block itself doesn't help compute that same 210 // block). 211 Pred = canonicalize(Pred); 212 if (Pred && Pred != MBB) { 213 maybeInsert(Pred, Succ); 214 } 215 } 216 } 217 218 // It's now trivial to identify the loopers. 219 SmallPtrSet<MachineBasicBlock *, 4> Loopers; 220 for (auto MBB : LoopBlocks) { 221 if (Reachable[MBB].count(MBB)) { 222 Loopers.insert(MBB); 223 } 224 } 225 // The header cannot be a looper. At the toplevel, LLVM does not allow the 226 // entry to be in a loop, and in a natural loop we should ignore the header. 227 assert(Loopers.count(Header) == 0); 228 229 // Find the entries, loopers reachable from non-loopers. 230 SmallPtrSet<MachineBasicBlock *, 4> Entries; 231 SmallVector<MachineBasicBlock *, 4> SortedEntries; 232 for (auto *Looper : Loopers) { 233 for (auto *Pred : Looper->predecessors()) { 234 Pred = canonicalize(Pred); 235 if (Pred && !Loopers.count(Pred)) { 236 Entries.insert(Looper); 237 SortedEntries.push_back(Looper); 238 break; 239 } 240 } 241 } 242 243 // Check if we found irreducible control flow. 244 if (LLVM_LIKELY(Entries.size() <= 1)) 245 return false; 246 247 // Sort the entries to ensure a deterministic build. 248 llvm::sort(SortedEntries, 249 [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { 250 auto ANum = A->getNumber(); 251 auto BNum = B->getNumber(); 252 return ANum < BNum; 253 }); 254 255 #ifndef NDEBUG 256 for (auto Block : SortedEntries) 257 assert(Block->getNumber() != -1); 258 if (SortedEntries.size() > 1) { 259 for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; 260 I != E; ++I) { 261 auto ANum = (*I)->getNumber(); 262 auto BNum = (*(std::next(I)))->getNumber(); 263 assert(ANum != BNum); 264 } 265 } 266 #endif 267 268 // Create a dispatch block which will contain a jump table to the entries. 269 MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); 270 MF.insert(MF.end(), Dispatch); 271 MLI.changeLoopFor(Dispatch, Loop); 272 273 // Add the jump table. 274 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 275 MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(), 276 TII.get(WebAssembly::BR_TABLE_I32)); 277 278 // Add the register which will be used to tell the jump table which block to 279 // jump to. 280 MachineRegisterInfo &MRI = MF.getRegInfo(); 281 unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 282 MIB.addReg(Reg); 283 284 // Compute the indices in the superheader, one for each bad block, and 285 // add them as successors. 286 DenseMap<MachineBasicBlock *, unsigned> Indices; 287 for (auto *MBB : SortedEntries) { 288 auto Pair = Indices.insert(std::make_pair(MBB, 0)); 289 if (!Pair.second) { 290 continue; 291 } 292 293 unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; 294 Pair.first->second = Index; 295 296 MIB.addMBB(MBB); 297 Dispatch->addSuccessor(MBB); 298 } 299 300 // Rewrite the problematic successors for every block that wants to reach the 301 // bad blocks. For simplicity, we just introduce a new block for every edge 302 // we need to rewrite. (Fancier things are possible.) 303 304 SmallVector<MachineBasicBlock *, 4> AllPreds; 305 for (auto *MBB : SortedEntries) { 306 for (auto *Pred : MBB->predecessors()) { 307 if (Pred != Dispatch) { 308 AllPreds.push_back(Pred); 309 } 310 } 311 } 312 313 for (MachineBasicBlock *MBB : AllPreds) { 314 DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map; 315 for (auto *Succ : MBB->successors()) { 316 if (!Entries.count(Succ)) { 317 continue; 318 } 319 320 // This is a successor we need to rewrite. 321 MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); 322 MF.insert(MBB->isLayoutSuccessor(Succ) ? Succ->getIterator() : MF.end(), 323 Split); 324 MLI.changeLoopFor(Split, Loop); 325 326 // Set the jump table's register of the index of the block we wish to 327 // jump to, and jump to the jump table. 328 BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32), 329 Reg) 330 .addImm(Indices[Succ]); 331 BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR)) 332 .addMBB(Dispatch); 333 Split->addSuccessor(Dispatch); 334 Map[Succ] = Split; 335 } 336 // Remap the terminator operands and the successor list. 337 for (MachineInstr &Term : MBB->terminators()) 338 for (auto &Op : Term.explicit_uses()) 339 if (Op.isMBB() && Indices.count(Op.getMBB())) 340 Op.setMBB(Map[Op.getMBB()]); 341 for (auto Rewrite : Map) 342 MBB->replaceSuccessor(Rewrite.first, Rewrite.second); 343 } 344 345 // Create a fake default label, because br_table requires one. 346 MIB.addMBB(MIB.getInstr() 347 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) 348 .getMBB()); 349 350 return true; 351 } 352 353 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { 354 StringRef getPassName() const override { 355 return "WebAssembly Fix Irreducible Control Flow"; 356 } 357 358 void getAnalysisUsage(AnalysisUsage &AU) const override { 359 AU.setPreservesCFG(); 360 AU.addRequired<MachineDominatorTree>(); 361 AU.addPreserved<MachineDominatorTree>(); 362 AU.addRequired<MachineLoopInfo>(); 363 AU.addPreserved<MachineLoopInfo>(); 364 MachineFunctionPass::getAnalysisUsage(AU); 365 } 366 367 bool runOnMachineFunction(MachineFunction &MF) override; 368 369 bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) { 370 // Visit the function body, which is identified as a null loop. 371 if (LoopFixer(MF, MLI, nullptr).run()) { 372 return true; 373 } 374 375 // Visit all the loops. 376 SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end()); 377 while (!Worklist.empty()) { 378 MachineLoop *Loop = Worklist.pop_back_val(); 379 Worklist.append(Loop->begin(), Loop->end()); 380 if (LoopFixer(MF, MLI, Loop).run()) { 381 return true; 382 } 383 } 384 385 return false; 386 } 387 388 public: 389 static char ID; // Pass identification, replacement for typeid 390 WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} 391 }; 392 } // end anonymous namespace 393 394 char WebAssemblyFixIrreducibleControlFlow::ID = 0; 395 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE, 396 "Removes irreducible control flow", false, false) 397 398 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { 399 return new WebAssemblyFixIrreducibleControlFlow(); 400 } 401 402 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( 403 MachineFunction &MF) { 404 LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" 405 "********** Function: " 406 << MF.getName() << '\n'); 407 408 bool Changed = false; 409 auto &MLI = getAnalysis<MachineLoopInfo>(); 410 411 // When we modify something, bail out and recompute MLI, then start again, as 412 // we create a new natural loop when we resolve irreducible control flow, and 413 // other loops may become nested in it, etc. In practice this is not an issue 414 // because irreducible control flow is rare, only very few cycles are needed 415 // here. 416 while (LLVM_UNLIKELY(runIteration(MF, MLI))) { 417 // We rewrote part of the function; recompute MLI and start again. 418 LLVM_DEBUG(dbgs() << "Recomputing loops.\n"); 419 MF.getRegInfo().invalidateLiveness(); 420 MF.RenumberBlocks(); 421 getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF); 422 MLI.runOnMachineFunction(MF); 423 Changed = true; 424 } 425 426 return Changed; 427 } 428