1 //=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Does various transformations for exception handling. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 16 #include "WebAssembly.h" 17 #include "WebAssemblySubtarget.h" 18 #include "WebAssemblyUtilities.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/WasmEHFuncInfo.h" 21 #include "llvm/MC/MCAsmInfo.h" 22 using namespace llvm; 23 24 #define DEBUG_TYPE "wasm-exception-prepare" 25 26 namespace { 27 class WebAssemblyLateEHPrepare final : public MachineFunctionPass { 28 StringRef getPassName() const override { 29 return "WebAssembly Prepare Exception"; 30 } 31 32 bool runOnMachineFunction(MachineFunction &MF) override; 33 34 bool removeUnnecessaryUnreachables(MachineFunction &MF); 35 bool replaceFuncletReturns(MachineFunction &MF); 36 bool hoistCatches(MachineFunction &MF); 37 bool addCatchAlls(MachineFunction &MF); 38 bool addRethrows(MachineFunction &MF); 39 bool ensureSingleBBTermPads(MachineFunction &MF); 40 bool mergeTerminatePads(MachineFunction &MF); 41 bool addCatchAllTerminatePads(MachineFunction &MF); 42 43 public: 44 static char ID; // Pass identification, replacement for typeid 45 WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {} 46 }; 47 } // end anonymous namespace 48 49 char WebAssemblyLateEHPrepare::ID = 0; 50 INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE, 51 "WebAssembly Late Exception Preparation", false, false) 52 53 FunctionPass *llvm::createWebAssemblyLateEHPrepare() { 54 return new WebAssemblyLateEHPrepare(); 55 } 56 57 // Returns the nearest EH pad that dominates this instruction. This does not use 58 // dominator analysis; it just does BFS on its predecessors until arriving at an 59 // EH pad. This assumes valid EH scopes so the first EH pad it arrives in all 60 // possible search paths should be the same. 61 // Returns nullptr in case it does not find any EH pad in the search, or finds 62 // multiple different EH pads. 63 static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) { 64 MachineFunction *MF = MI->getParent()->getParent(); 65 SmallVector<MachineBasicBlock *, 2> WL; 66 SmallPtrSet<MachineBasicBlock *, 2> Visited; 67 WL.push_back(MI->getParent()); 68 MachineBasicBlock *EHPad = nullptr; 69 while (!WL.empty()) { 70 MachineBasicBlock *MBB = WL.pop_back_val(); 71 if (Visited.count(MBB)) 72 continue; 73 Visited.insert(MBB); 74 if (MBB->isEHPad()) { 75 if (EHPad && EHPad != MBB) 76 return nullptr; 77 EHPad = MBB; 78 continue; 79 } 80 if (MBB == &MF->front()) 81 return nullptr; 82 WL.append(MBB->pred_begin(), MBB->pred_end()); 83 } 84 return EHPad; 85 } 86 87 // Erase the specified BBs if the BB does not have any remaining predecessors, 88 // and also all its dead children. 89 template <typename Container> 90 static void eraseDeadBBsAndChildren(const Container &MBBs) { 91 SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end()); 92 while (!WL.empty()) { 93 MachineBasicBlock *MBB = WL.pop_back_val(); 94 if (!MBB->pred_empty()) 95 continue; 96 SmallVector<MachineBasicBlock *, 4> Succs(MBB->succ_begin(), 97 MBB->succ_end()); 98 WL.append(MBB->succ_begin(), MBB->succ_end()); 99 for (auto *Succ : Succs) 100 MBB->removeSuccessor(Succ); 101 MBB->eraseFromParent(); 102 } 103 } 104 105 bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) { 106 if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() != 107 ExceptionHandling::Wasm) 108 return false; 109 110 bool Changed = false; 111 Changed |= removeUnnecessaryUnreachables(MF); 112 Changed |= addRethrows(MF); 113 if (!MF.getFunction().hasPersonalityFn()) 114 return Changed; 115 Changed |= replaceFuncletReturns(MF); 116 Changed |= hoistCatches(MF); 117 Changed |= addCatchAlls(MF); 118 Changed |= ensureSingleBBTermPads(MF); 119 Changed |= mergeTerminatePads(MF); 120 Changed |= addCatchAllTerminatePads(MF); 121 return Changed; 122 } 123 124 bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( 125 MachineFunction &MF) { 126 bool Changed = false; 127 for (auto &MBB : MF) { 128 for (auto &MI : MBB) { 129 if (!WebAssembly::isThrow(MI)) 130 continue; 131 Changed = true; 132 133 // The instruction after the throw should be an unreachable or a branch to 134 // another BB that should eventually lead to an unreachable. Delete it 135 // because throw itself is a terminator, and also delete successors if 136 // any. 137 MBB.erase(std::next(MachineBasicBlock::iterator(MI)), MBB.end()); 138 SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(), 139 MBB.succ_end()); 140 for (auto *Succ : Succs) 141 MBB.removeSuccessor(Succ); 142 eraseDeadBBsAndChildren(Succs); 143 } 144 } 145 146 return Changed; 147 } 148 149 bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) { 150 bool Changed = false; 151 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 152 auto *EHInfo = MF.getWasmEHFuncInfo(); 153 154 for (auto &MBB : MF) { 155 auto Pos = MBB.getFirstTerminator(); 156 if (Pos == MBB.end()) 157 continue; 158 MachineInstr *TI = &*Pos; 159 160 switch (TI->getOpcode()) { 161 case WebAssembly::CATCHRET: { 162 // Replace a catchret with a branch 163 MachineBasicBlock *TBB = TI->getOperand(0).getMBB(); 164 if (!MBB.isLayoutSuccessor(TBB)) 165 BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR)) 166 .addMBB(TBB); 167 TI->eraseFromParent(); 168 Changed = true; 169 break; 170 } 171 case WebAssembly::CLEANUPRET: { 172 // Replace a cleanupret with a rethrow 173 if (EHInfo->hasThrowUnwindDest(&MBB)) 174 BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW)) 175 .addMBB(EHInfo->getThrowUnwindDest(&MBB)); 176 else 177 BuildMI(MBB, TI, TI->getDebugLoc(), 178 TII.get(WebAssembly::RETHROW_TO_CALLER)); 179 180 TI->eraseFromParent(); 181 Changed = true; 182 break; 183 } 184 } 185 } 186 return Changed; 187 } 188 189 // Hoist catch instructions to the beginning of their matching EH pad BBs in 190 // case, 191 // (1) catch instruction is not the first instruction in EH pad. 192 // ehpad: 193 // some_other_instruction 194 // ... 195 // %exn = catch 0 196 // (2) catch instruction is in a non-EH pad BB. For example, 197 // ehpad: 198 // br bb0 199 // bb0: 200 // %exn = catch 0 201 bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) { 202 bool Changed = false; 203 SmallVector<MachineInstr *, 16> Catches; 204 for (auto &MBB : MF) 205 for (auto &MI : MBB) 206 if (WebAssembly::isCatch(MI)) 207 Catches.push_back(&MI); 208 209 for (auto *Catch : Catches) { 210 MachineBasicBlock *EHPad = getMatchingEHPad(Catch); 211 assert(EHPad && "No matching EH pad for catch"); 212 if (EHPad->begin() == Catch) 213 continue; 214 Changed = true; 215 EHPad->insert(EHPad->begin(), Catch->removeFromParent()); 216 } 217 return Changed; 218 } 219 220 // Add catch_all to beginning of cleanup pads. 221 bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) { 222 bool Changed = false; 223 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 224 225 for (auto &MBB : MF) { 226 if (!MBB.isEHPad()) 227 continue; 228 // This runs after hoistCatches(), so we assume that if there is a catch, 229 // that should be the first instruction in an EH pad. 230 if (!WebAssembly::isCatch(*MBB.begin())) { 231 Changed = true; 232 BuildMI(MBB, MBB.begin(), MBB.begin()->getDebugLoc(), 233 TII.get(WebAssembly::CATCH_ALL)); 234 } 235 } 236 return Changed; 237 } 238 239 // Add a 'rethrow' instruction after __cxa_rethrow() call 240 bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) { 241 bool Changed = false; 242 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 243 auto *EHInfo = MF.getWasmEHFuncInfo(); 244 245 for (auto &MBB : MF) 246 for (auto &MI : MBB) { 247 // Check if it is a call to __cxa_rethrow() 248 if (!MI.isCall()) 249 continue; 250 MachineOperand &CalleeOp = MI.getOperand(0); 251 if (!CalleeOp.isGlobal() || 252 CalleeOp.getGlobal()->getName() != WebAssembly::CxaRethrowFn) 253 continue; 254 255 // Now we have __cxa_rethrow() call 256 Changed = true; 257 auto InsertPt = std::next(MachineBasicBlock::iterator(MI)); 258 while (InsertPt != MBB.end() && InsertPt->isLabel()) // Skip EH_LABELs 259 ++InsertPt; 260 MachineInstr *Rethrow = nullptr; 261 if (EHInfo->hasThrowUnwindDest(&MBB)) 262 Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(), 263 TII.get(WebAssembly::RETHROW)) 264 .addMBB(EHInfo->getThrowUnwindDest(&MBB)); 265 else 266 Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(), 267 TII.get(WebAssembly::RETHROW_TO_CALLER)); 268 269 // Because __cxa_rethrow does not return, the instruction after the 270 // rethrow should be an unreachable or a branch to another BB that should 271 // eventually lead to an unreachable. Delete it because rethrow itself is 272 // a terminator, and also delete non-EH pad successors if any. 273 MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end()); 274 SmallVector<MachineBasicBlock *, 8> NonPadSuccessors; 275 for (auto *Succ : MBB.successors()) 276 if (!Succ->isEHPad()) 277 NonPadSuccessors.push_back(Succ); 278 for (auto *Succ : NonPadSuccessors) 279 MBB.removeSuccessor(Succ); 280 eraseDeadBBsAndChildren(NonPadSuccessors); 281 } 282 return Changed; 283 } 284 285 // Terminate pads are an single-BB EH pad in the form of 286 // termpad: 287 // %exn = catch 0 288 // call @__clang_call_terminate(%exn) 289 // unreachable 290 // (There can be set_local and get_locals before the call if we didn't run 291 // RegStackify) 292 // But code transformations can change or add more control flow, so the call to 293 // __clang_call_terminate() function may not be in the original EH pad anymore. 294 // This ensures every terminate pad is a single BB in the form illustrated 295 // above. 296 bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) { 297 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 298 299 // Find calls to __clang_call_terminate() 300 SmallVector<MachineInstr *, 8> ClangCallTerminateCalls; 301 for (auto &MBB : MF) 302 for (auto &MI : MBB) 303 if (MI.isCall()) { 304 const MachineOperand &CalleeOp = MI.getOperand(0); 305 if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() == 306 WebAssembly::ClangCallTerminateFn) 307 ClangCallTerminateCalls.push_back(&MI); 308 } 309 310 bool Changed = false; 311 for (auto *Call : ClangCallTerminateCalls) { 312 MachineBasicBlock *EHPad = getMatchingEHPad(Call); 313 assert(EHPad && "No matching EH pad for catch"); 314 315 // If it is already the form we want, skip it 316 if (Call->getParent() == EHPad && 317 Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE) 318 continue; 319 320 // In case the __clang_call_terminate() call is not in its matching EH pad, 321 // move the call to the end of EH pad and add an unreachable instruction 322 // after that. Delete all successors and their children if any, because here 323 // the program terminates. 324 Changed = true; 325 MachineInstr *Catch = &*EHPad->begin(); 326 // This runs after hoistCatches(), so catch instruction should be at the top 327 assert(WebAssembly::isCatch(*Catch)); 328 // Takes the result register of the catch instruction as argument. There may 329 // have been some other set_local/get_locals in between, but at this point 330 // we don't care. 331 Call->getOperand(1).setReg(Catch->getOperand(0).getReg()); 332 auto InsertPos = std::next(MachineBasicBlock::iterator(Catch)); 333 EHPad->insert(InsertPos, Call->removeFromParent()); 334 BuildMI(*EHPad, InsertPos, Call->getDebugLoc(), 335 TII.get(WebAssembly::UNREACHABLE)); 336 EHPad->erase(InsertPos, EHPad->end()); 337 SmallVector<MachineBasicBlock *, 8> Succs(EHPad->succ_begin(), 338 EHPad->succ_end()); 339 for (auto *Succ : Succs) 340 EHPad->removeSuccessor(Succ); 341 eraseDeadBBsAndChildren(Succs); 342 } 343 return Changed; 344 } 345 346 // In case there are multiple terminate pads, merge them into one for code size. 347 // This runs after ensureSingleBBTermPads() and assumes every terminate pad is a 348 // single BB. 349 // In principle this violates EH scope relationship because it can merge 350 // multiple inner EH scopes, each of which is in different outer EH scope. But 351 // getEHScopeMembership() function will not be called after this, so it is fine. 352 bool WebAssemblyLateEHPrepare::mergeTerminatePads(MachineFunction &MF) { 353 SmallVector<MachineBasicBlock *, 8> TermPads; 354 for (auto &MBB : MF) 355 if (WebAssembly::isCatchTerminatePad(MBB)) 356 TermPads.push_back(&MBB); 357 if (TermPads.empty()) 358 return false; 359 360 MachineBasicBlock *UniqueTermPad = TermPads.front(); 361 for (auto *TermPad : 362 llvm::make_range(std::next(TermPads.begin()), TermPads.end())) { 363 SmallVector<MachineBasicBlock *, 2> Preds(TermPad->pred_begin(), 364 TermPad->pred_end()); 365 for (auto *Pred : Preds) 366 Pred->replaceSuccessor(TermPad, UniqueTermPad); 367 TermPad->eraseFromParent(); 368 } 369 return true; 370 } 371 372 // Terminate pads are cleanup pads, so they should start with a 'catch_all' 373 // instruction. But in the Itanium model, when we have a C++ exception object, 374 // we pass them to __clang_call_terminate function, which calls __cxa_end_catch 375 // with the passed exception pointer and then std::terminate. This is the reason 376 // that terminate pads are generated with not a catch_all but a catch 377 // instruction in clang and earlier llvm passes. Here we append a terminate pad 378 // with a catch_all after each existing terminate pad so we can also catch 379 // foreign exceptions. For every terminate pad: 380 // %exn = catch 0 381 // call @__clang_call_terminate(%exn) 382 // unreachable 383 // We append this BB right after that: 384 // catch_all 385 // call @std::terminate() 386 // unreachable 387 bool WebAssemblyLateEHPrepare::addCatchAllTerminatePads(MachineFunction &MF) { 388 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 389 SmallVector<MachineBasicBlock *, 8> TermPads; 390 for (auto &MBB : MF) 391 if (WebAssembly::isCatchTerminatePad(MBB)) 392 TermPads.push_back(&MBB); 393 if (TermPads.empty()) 394 return false; 395 396 Function *StdTerminateFn = 397 MF.getFunction().getParent()->getFunction(WebAssembly::StdTerminateFn); 398 assert(StdTerminateFn && "There is no std::terminate() function"); 399 for (auto *CatchTermPad : TermPads) { 400 DebugLoc DL = CatchTermPad->findDebugLoc(CatchTermPad->begin()); 401 auto *CatchAllTermPad = MF.CreateMachineBasicBlock(); 402 MF.insert(std::next(MachineFunction::iterator(CatchTermPad)), 403 CatchAllTermPad); 404 CatchAllTermPad->setIsEHPad(); 405 BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CATCH_ALL)); 406 BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CALL_VOID)) 407 .addGlobalAddress(StdTerminateFn); 408 BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::UNREACHABLE)); 409 410 // Actually this CatchAllTermPad (new terminate pad with a catch_all) is not 411 // a successor of an existing terminate pad. CatchAllTermPad should have all 412 // predecessors CatchTermPad has instead. This is a hack to force 413 // CatchAllTermPad be always sorted right after CatchTermPad; the correct 414 // predecessor-successor relationships will be restored in CFGStackify pass. 415 CatchTermPad->addSuccessor(CatchAllTermPad); 416 } 417 return true; 418 } 419