1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file implements a pass that transforms irreducible control flow into 12 /// reducible control flow. Irreducible control flow means multiple-entry 13 /// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo 14 /// due to being unnatural. 15 /// 16 /// Note that LLVM has a generic pass that lowers irreducible control flow, but 17 /// it linearizes control flow, turning diamonds into two triangles, which is 18 /// both unnecessary and undesirable for WebAssembly. 19 /// 20 /// The big picture: Ignoring natural loops (seeing them monolithically), we 21 /// find all the blocks which can return to themselves ("loopers"). Loopers 22 /// reachable from the non-loopers are loop entries: if there are 2 or more, 23 /// then we have irreducible control flow. We fix that as follows: a new block 24 /// is created that can dispatch to each of the loop entries, based on the 25 /// value of a label "helper" variable, and we replace direct branches to the 26 /// entries with assignments to the label variable and a branch to the dispatch 27 /// block. Then the dispatch block is the single entry in a new natural loop. 28 /// 29 /// This is similar to what the Relooper [1] does, both identify looping code 30 /// that requires multiple entries, and resolve it in a similar way. In 31 /// Relooper terminology, we implement a Multiple shape in a Loop shape. Note 32 /// also that like the Relooper, we implement a "minimal" intervention: we only 33 /// use the "label" helper for the blocks we absolutely must and no others. We 34 /// also prioritize code size and do not perform node splitting (i.e. we don't 35 /// duplicate code in order to resolve irreducibility). 36 /// 37 /// The difference between this code and the Relooper is that the Relooper also 38 /// generates ifs and loops and works in a recursive manner, knowing at each 39 /// point what the entries are, and recursively breaks down the problem. Here 40 /// we just want to resolve irreducible control flow, and we also want to use 41 /// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to 42 /// identify natural loops, etc., and we start with the whole CFG and must 43 /// identify both the looping code and its entries. 44 /// 45 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In 46 /// Proceedings of the ACM international conference companion on Object oriented 47 /// programming systems languages and applications companion (SPLASH '11). ACM, 48 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 49 /// http://doi.acm.org/10.1145/2048147.2048224 50 /// 51 //===----------------------------------------------------------------------===// 52 53 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 54 #include "WebAssembly.h" 55 #include "WebAssemblyMachineFunctionInfo.h" 56 #include "WebAssemblySubtarget.h" 57 #include "llvm/ADT/PriorityQueue.h" 58 #include "llvm/ADT/SCCIterator.h" 59 #include "llvm/ADT/SetVector.h" 60 #include "llvm/CodeGen/MachineDominators.h" 61 #include "llvm/CodeGen/MachineFunction.h" 62 #include "llvm/CodeGen/MachineInstrBuilder.h" 63 #include "llvm/CodeGen/MachineLoopInfo.h" 64 #include "llvm/CodeGen/MachineRegisterInfo.h" 65 #include "llvm/CodeGen/Passes.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/raw_ostream.h" 68 using namespace llvm; 69 70 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow" 71 72 namespace { 73 74 class LoopFixer { 75 public: 76 LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop) 77 : MF(MF), MLI(MLI), Loop(Loop) {} 78 79 // Run the fixer on the given inputs. Returns whether changes were made. 80 bool run(); 81 82 private: 83 MachineFunction &MF; 84 MachineLoopInfo &MLI; 85 MachineLoop *Loop; 86 87 MachineBasicBlock *Header; 88 SmallPtrSet<MachineBasicBlock *, 4> LoopBlocks; 89 90 using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>; 91 DenseMap<MachineBasicBlock *, BlockSet> Reachable; 92 93 // The worklist contains pairs of recent additions, (a, b), where we just 94 // added a link a => b. 95 using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>; 96 SmallVector<BlockPair, 4> WorkList; 97 98 // Get a canonical block to represent a block or a loop: the block, or if in 99 // an inner loop, the loop header, of it in an outer loop scope, we can 100 // ignore it. We need to call this on all blocks we work on. 101 MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) { 102 MachineLoop *InnerLoop = MLI.getLoopFor(MBB); 103 if (InnerLoop == Loop) { 104 return MBB; 105 } else { 106 // This is either in an outer or an inner loop, and not in ours. 107 if (!LoopBlocks.count(MBB)) { 108 // It's in outer code, ignore it. 109 return nullptr; 110 } 111 assert(InnerLoop); 112 // It's in an inner loop, canonicalize it to the header of that loop. 113 return InnerLoop->getHeader(); 114 } 115 } 116 117 // For a successor we can additionally ignore it if it's a branch back to a 118 // natural loop top, as when we are in the scope of a loop, we just care 119 // about internal irreducibility, and can ignore the loop we are in. We need 120 // to call this on all blocks in a context where they are a successor. 121 MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) { 122 if (Loop && MBB == Loop->getHeader()) { 123 // Ignore branches going to the loop's natural header. 124 return nullptr; 125 } 126 return canonicalize(MBB); 127 } 128 129 // Potentially insert a new reachable edge, and if so, note it as further 130 // work. 131 void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 132 assert(MBB == canonicalize(MBB)); 133 assert(Succ); 134 // Succ may not be interesting as a sucessor. 135 Succ = canonicalizeSuccessor(Succ); 136 if (!Succ) 137 return; 138 if (Reachable[MBB].insert(Succ).second) { 139 // For there to be further work, it means that we have 140 // X => MBB => Succ 141 // for some other X, and in that case X => Succ would be a new edge for 142 // us to discover later. However, if we don't care about MBB as a 143 // successor, then we don't care about that anyhow. 144 if (canonicalizeSuccessor(MBB)) { 145 WorkList.emplace_back(MBB, Succ); 146 } 147 } 148 } 149 }; 150 151 bool LoopFixer::run() { 152 Header = Loop ? Loop->getHeader() : &*MF.begin(); 153 154 // Identify all the blocks in this loop scope. 155 if (Loop) { 156 for (auto *MBB : Loop->getBlocks()) { 157 LoopBlocks.insert(MBB); 158 } 159 } else { 160 for (auto &MBB : MF) { 161 LoopBlocks.insert(&MBB); 162 } 163 } 164 165 // Compute which (canonicalized) blocks each block can reach. 166 167 // Add all the initial work. 168 for (auto *MBB : LoopBlocks) { 169 MachineLoop *InnerLoop = MLI.getLoopFor(MBB); 170 171 if (InnerLoop == Loop) { 172 for (auto *Succ : MBB->successors()) { 173 maybeInsert(MBB, Succ); 174 } 175 } else { 176 // It can't be in an outer loop - we loop on LoopBlocks - and so it must 177 // be an inner loop. 178 assert(InnerLoop); 179 // Check if we are the canonical block for this loop. 180 if (canonicalize(MBB) != MBB) { 181 continue; 182 } 183 // The successors are those of the loop. 184 SmallVector<MachineBasicBlock *, 2> ExitBlocks; 185 InnerLoop->getExitBlocks(ExitBlocks); 186 for (auto *Succ : ExitBlocks) { 187 maybeInsert(MBB, Succ); 188 } 189 } 190 } 191 192 // Do work until we are all done. 193 while (!WorkList.empty()) { 194 MachineBasicBlock *MBB; 195 MachineBasicBlock *Succ; 196 std::tie(MBB, Succ) = WorkList.pop_back_val(); 197 // The worklist item is an edge we just added, so it must have valid blocks 198 // (and not something canonicalized to nullptr). 199 assert(MBB); 200 assert(Succ); 201 // The successor in that pair must also be a valid successor. 202 assert(MBB == canonicalizeSuccessor(MBB)); 203 // We recently added MBB => Succ, and that means we may have enabled 204 // Pred => MBB => Succ. Check all the predecessors. Note that our loop here 205 // is correct for both a block and a block representing a loop, as the loop 206 // is natural and so the predecessors are all predecessors of the loop 207 // header, which is the block we have here. 208 for (auto *Pred : MBB->predecessors()) { 209 // Canonicalize, make sure it's relevant, and check it's not the same 210 // block (an update to the block itself doesn't help compute that same 211 // block). 212 Pred = canonicalize(Pred); 213 if (Pred && Pred != MBB) { 214 maybeInsert(Pred, Succ); 215 } 216 } 217 } 218 219 // It's now trivial to identify the loopers. 220 SmallPtrSet<MachineBasicBlock *, 4> Loopers; 221 for (auto MBB : LoopBlocks) { 222 if (Reachable[MBB].count(MBB)) { 223 Loopers.insert(MBB); 224 } 225 } 226 // The header cannot be a looper. At the toplevel, LLVM does not allow the 227 // entry to be in a loop, and in a natural loop we should ignore the header. 228 assert(Loopers.count(Header) == 0); 229 230 // Find the entries, loopers reachable from non-loopers. 231 SmallPtrSet<MachineBasicBlock *, 4> Entries; 232 SmallVector<MachineBasicBlock *, 4> SortedEntries; 233 for (auto *Looper : Loopers) { 234 for (auto *Pred : Looper->predecessors()) { 235 Pred = canonicalize(Pred); 236 if (Pred && !Loopers.count(Pred)) { 237 Entries.insert(Looper); 238 SortedEntries.push_back(Looper); 239 break; 240 } 241 } 242 } 243 244 // Check if we found irreducible control flow. 245 if (LLVM_LIKELY(Entries.size() <= 1)) 246 return false; 247 248 // Sort the entries to ensure a deterministic build. 249 llvm::sort(SortedEntries, 250 [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { 251 auto ANum = A->getNumber(); 252 auto BNum = B->getNumber(); 253 return ANum < BNum; 254 }); 255 256 #ifndef NDEBUG 257 for (auto Block : SortedEntries) 258 assert(Block->getNumber() != -1); 259 if (SortedEntries.size() > 1) { 260 for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; 261 I != E; ++I) { 262 auto ANum = (*I)->getNumber(); 263 auto BNum = (*(std::next(I)))->getNumber(); 264 assert(ANum != BNum); 265 } 266 } 267 #endif 268 269 // Create a dispatch block which will contain a jump table to the entries. 270 MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); 271 MF.insert(MF.end(), Dispatch); 272 MLI.changeLoopFor(Dispatch, Loop); 273 274 // Add the jump table. 275 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 276 MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(), 277 TII.get(WebAssembly::BR_TABLE_I32)); 278 279 // Add the register which will be used to tell the jump table which block to 280 // jump to. 281 MachineRegisterInfo &MRI = MF.getRegInfo(); 282 unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 283 MIB.addReg(Reg); 284 285 // Compute the indices in the superheader, one for each bad block, and 286 // add them as successors. 287 DenseMap<MachineBasicBlock *, unsigned> Indices; 288 for (auto *MBB : SortedEntries) { 289 auto Pair = Indices.insert(std::make_pair(MBB, 0)); 290 if (!Pair.second) { 291 continue; 292 } 293 294 unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; 295 Pair.first->second = Index; 296 297 MIB.addMBB(MBB); 298 Dispatch->addSuccessor(MBB); 299 } 300 301 // Rewrite the problematic successors for every block that wants to reach the 302 // bad blocks. For simplicity, we just introduce a new block for every edge 303 // we need to rewrite. (Fancier things are possible.) 304 305 SmallVector<MachineBasicBlock *, 4> AllPreds; 306 for (auto *MBB : SortedEntries) { 307 for (auto *Pred : MBB->predecessors()) { 308 if (Pred != Dispatch) { 309 AllPreds.push_back(Pred); 310 } 311 } 312 } 313 314 for (MachineBasicBlock *MBB : AllPreds) { 315 DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map; 316 for (auto *Succ : MBB->successors()) { 317 if (!Entries.count(Succ)) { 318 continue; 319 } 320 321 // This is a successor we need to rewrite. 322 MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); 323 MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ) 324 : MF.end(), 325 Split); 326 MLI.changeLoopFor(Split, Loop); 327 328 // Set the jump table's register of the index of the block we wish to 329 // jump to, and jump to the jump table. 330 BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32), 331 Reg) 332 .addImm(Indices[Succ]); 333 BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR)) 334 .addMBB(Dispatch); 335 Split->addSuccessor(Dispatch); 336 Map[Succ] = Split; 337 } 338 // Remap the terminator operands and the successor list. 339 for (MachineInstr &Term : MBB->terminators()) 340 for (auto &Op : Term.explicit_uses()) 341 if (Op.isMBB() && Indices.count(Op.getMBB())) 342 Op.setMBB(Map[Op.getMBB()]); 343 for (auto Rewrite : Map) 344 MBB->replaceSuccessor(Rewrite.first, Rewrite.second); 345 } 346 347 // Create a fake default label, because br_table requires one. 348 MIB.addMBB(MIB.getInstr() 349 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) 350 .getMBB()); 351 352 return true; 353 } 354 355 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { 356 StringRef getPassName() const override { 357 return "WebAssembly Fix Irreducible Control Flow"; 358 } 359 360 void getAnalysisUsage(AnalysisUsage &AU) const override { 361 AU.setPreservesCFG(); 362 AU.addRequired<MachineDominatorTree>(); 363 AU.addPreserved<MachineDominatorTree>(); 364 AU.addRequired<MachineLoopInfo>(); 365 AU.addPreserved<MachineLoopInfo>(); 366 MachineFunctionPass::getAnalysisUsage(AU); 367 } 368 369 bool runOnMachineFunction(MachineFunction &MF) override; 370 371 bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) { 372 // Visit the function body, which is identified as a null loop. 373 if (LoopFixer(MF, MLI, nullptr).run()) { 374 return true; 375 } 376 377 // Visit all the loops. 378 SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end()); 379 while (!Worklist.empty()) { 380 MachineLoop *Loop = Worklist.pop_back_val(); 381 Worklist.append(Loop->begin(), Loop->end()); 382 if (LoopFixer(MF, MLI, Loop).run()) { 383 return true; 384 } 385 } 386 387 return false; 388 } 389 390 public: 391 static char ID; // Pass identification, replacement for typeid 392 WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} 393 }; 394 } // end anonymous namespace 395 396 char WebAssemblyFixIrreducibleControlFlow::ID = 0; 397 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE, 398 "Removes irreducible control flow", false, false) 399 400 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { 401 return new WebAssemblyFixIrreducibleControlFlow(); 402 } 403 404 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( 405 MachineFunction &MF) { 406 LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" 407 "********** Function: " 408 << MF.getName() << '\n'); 409 410 bool Changed = false; 411 auto &MLI = getAnalysis<MachineLoopInfo>(); 412 413 // When we modify something, bail out and recompute MLI, then start again, as 414 // we create a new natural loop when we resolve irreducible control flow, and 415 // other loops may become nested in it, etc. In practice this is not an issue 416 // because irreducible control flow is rare, only very few cycles are needed 417 // here. 418 while (LLVM_UNLIKELY(runIteration(MF, MLI))) { 419 // We rewrote part of the function; recompute MLI and start again. 420 LLVM_DEBUG(dbgs() << "Recomputing loops.\n"); 421 MF.getRegInfo().invalidateLiveness(); 422 MF.RenumberBlocks(); 423 getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF); 424 MLI.runOnMachineFunction(MF); 425 Changed = true; 426 } 427 428 return Changed; 429 } 430