1 //=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Does various transformations for exception handling. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 16 #include "WebAssembly.h" 17 #include "WebAssemblySubtarget.h" 18 #include "WebAssemblyUtilities.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/WasmEHFuncInfo.h" 21 #include "llvm/MC/MCAsmInfo.h" 22 using namespace llvm; 23 24 #define DEBUG_TYPE "wasm-exception-prepare" 25 26 namespace { 27 class WebAssemblyLateEHPrepare final : public MachineFunctionPass { 28 StringRef getPassName() const override { 29 return "WebAssembly Prepare Exception"; 30 } 31 32 bool runOnMachineFunction(MachineFunction &MF) override; 33 34 bool replaceFuncletReturns(MachineFunction &MF); 35 bool hoistCatches(MachineFunction &MF); 36 bool addCatchAlls(MachineFunction &MF); 37 bool addRethrows(MachineFunction &MF); 38 bool ensureSingleBBTermPads(MachineFunction &MF); 39 bool mergeTerminatePads(MachineFunction &MF); 40 bool addCatchAllTerminatePads(MachineFunction &MF); 41 42 public: 43 static char ID; // Pass identification, replacement for typeid 44 WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {} 45 }; 46 } // end anonymous namespace 47 48 char WebAssemblyLateEHPrepare::ID = 0; 49 INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE, 50 "WebAssembly Late Exception Preparation", false, false) 51 52 FunctionPass *llvm::createWebAssemblyLateEHPrepare() { 53 return new WebAssemblyLateEHPrepare(); 54 } 55 56 // Returns the nearest EH pad that dominates this instruction. This does not use 57 // dominator analysis; it just does BFS on its predecessors until arriving at an 58 // EH pad. This assumes valid EH scopes so the first EH pad it arrives in all 59 // possible search paths should be the same. 60 // Returns nullptr in case it does not find any EH pad in the search, or finds 61 // multiple different EH pads. 62 static MachineBasicBlock *GetMatchingEHPad(MachineInstr *MI) { 63 MachineFunction *MF = MI->getParent()->getParent(); 64 SmallVector<MachineBasicBlock *, 2> WL; 65 SmallPtrSet<MachineBasicBlock *, 2> Visited; 66 WL.push_back(MI->getParent()); 67 MachineBasicBlock *EHPad = nullptr; 68 while (!WL.empty()) { 69 MachineBasicBlock *MBB = WL.pop_back_val(); 70 if (Visited.count(MBB)) 71 continue; 72 Visited.insert(MBB); 73 if (MBB->isEHPad()) { 74 if (EHPad && EHPad != MBB) 75 return nullptr; 76 EHPad = MBB; 77 continue; 78 } 79 if (MBB == &MF->front()) 80 return nullptr; 81 WL.append(MBB->pred_begin(), MBB->pred_end()); 82 } 83 return EHPad; 84 } 85 86 // Erases the given BBs and all their children from the function. If other BBs 87 // have the BB as a successor, the successor relationships will be deleted as 88 // well. 89 template <typename Container> 90 static void EraseBBsAndChildren(const Container &MBBs) { 91 SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end()); 92 while (!WL.empty()) { 93 MachineBasicBlock *MBB = WL.pop_back_val(); 94 SmallVector<MachineBasicBlock *, 4> Preds(MBB->pred_begin(), 95 MBB->pred_end()); 96 for (auto *Pred : Preds) 97 Pred->removeSuccessor(MBB); 98 SmallVector<MachineBasicBlock *, 4> Succs(MBB->succ_begin(), 99 MBB->succ_end()); 100 WL.append(MBB->succ_begin(), MBB->succ_end()); 101 for (auto *Succ : Succs) 102 MBB->removeSuccessor(Succ); 103 MBB->eraseFromParent(); 104 } 105 } 106 107 bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) { 108 if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() != 109 ExceptionHandling::Wasm) 110 return false; 111 112 bool Changed = false; 113 Changed |= addRethrows(MF); 114 if (!MF.getFunction().hasPersonalityFn()) 115 return Changed; 116 Changed |= replaceFuncletReturns(MF); 117 Changed |= hoistCatches(MF); 118 Changed |= addCatchAlls(MF); 119 Changed |= ensureSingleBBTermPads(MF); 120 Changed |= mergeTerminatePads(MF); 121 Changed |= addCatchAllTerminatePads(MF); 122 return Changed; 123 } 124 125 bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) { 126 bool Changed = false; 127 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 128 auto *EHInfo = MF.getWasmEHFuncInfo(); 129 130 for (auto &MBB : MF) { 131 auto Pos = MBB.getFirstTerminator(); 132 if (Pos == MBB.end()) 133 continue; 134 MachineInstr *TI = &*Pos; 135 136 switch (TI->getOpcode()) { 137 case WebAssembly::CATCHRET: { 138 // Replace a catchret with a branch 139 MachineBasicBlock *TBB = TI->getOperand(0).getMBB(); 140 if (!MBB.isLayoutSuccessor(TBB)) 141 BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR)) 142 .addMBB(TBB); 143 TI->eraseFromParent(); 144 Changed = true; 145 break; 146 } 147 case WebAssembly::CLEANUPRET: { 148 // Replace a cleanupret with a rethrow 149 if (EHInfo->hasThrowUnwindDest(&MBB)) 150 BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW)) 151 .addMBB(EHInfo->getThrowUnwindDest(&MBB)); 152 else 153 BuildMI(MBB, TI, TI->getDebugLoc(), 154 TII.get(WebAssembly::RETHROW_TO_CALLER)); 155 156 TI->eraseFromParent(); 157 Changed = true; 158 break; 159 } 160 } 161 } 162 return Changed; 163 } 164 165 // Hoist catch instructions to the beginning of their matching EH pad BBs in 166 // case, 167 // (1) catch instruction is not the first instruction in EH pad. 168 // ehpad: 169 // some_other_instruction 170 // ... 171 // %exn = catch 0 172 // (2) catch instruction is in a non-EH pad BB. For example, 173 // ehpad: 174 // br bb0 175 // bb0: 176 // %exn = catch 0 177 bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) { 178 bool Changed = false; 179 SmallVector<MachineInstr *, 16> Catches; 180 for (auto &MBB : MF) 181 for (auto &MI : MBB) 182 if (WebAssembly::isCatch(MI)) 183 Catches.push_back(&MI); 184 185 for (auto *Catch : Catches) { 186 MachineBasicBlock *EHPad = GetMatchingEHPad(Catch); 187 assert(EHPad && "No matching EH pad for catch"); 188 if (EHPad->begin() == Catch) 189 continue; 190 Changed = true; 191 EHPad->insert(EHPad->begin(), Catch->removeFromParent()); 192 } 193 return Changed; 194 } 195 196 // Add catch_all to beginning of cleanup pads. 197 bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) { 198 bool Changed = false; 199 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 200 201 for (auto &MBB : MF) { 202 if (!MBB.isEHPad()) 203 continue; 204 // This runs after hoistCatches(), so we assume that if there is a catch, 205 // that should be the first instruction in an EH pad. 206 if (!WebAssembly::isCatch(*MBB.begin())) { 207 Changed = true; 208 BuildMI(MBB, MBB.begin(), MBB.begin()->getDebugLoc(), 209 TII.get(WebAssembly::CATCH_ALL)); 210 } 211 } 212 return Changed; 213 } 214 215 // Add a 'rethrow' instruction after __cxa_rethrow() call 216 bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) { 217 bool Changed = false; 218 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 219 auto *EHInfo = MF.getWasmEHFuncInfo(); 220 221 for (auto &MBB : MF) 222 for (auto &MI : MBB) { 223 // Check if it is a call to __cxa_rethrow() 224 if (!MI.isCall()) 225 continue; 226 MachineOperand &CalleeOp = MI.getOperand(0); 227 if (!CalleeOp.isGlobal() || 228 CalleeOp.getGlobal()->getName() != WebAssembly::CxaRethrowFn) 229 continue; 230 231 // Now we have __cxa_rethrow() call 232 Changed = true; 233 auto InsertPt = std::next(MachineBasicBlock::iterator(MI)); 234 while (InsertPt != MBB.end() && InsertPt->isLabel()) // Skip EH_LABELs 235 ++InsertPt; 236 MachineInstr *Rethrow = nullptr; 237 if (EHInfo->hasThrowUnwindDest(&MBB)) 238 Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(), 239 TII.get(WebAssembly::RETHROW)) 240 .addMBB(EHInfo->getThrowUnwindDest(&MBB)); 241 else 242 Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(), 243 TII.get(WebAssembly::RETHROW_TO_CALLER)); 244 245 // Becasue __cxa_rethrow does not return, the instruction after the 246 // rethrow should be an unreachable or a branch to another BB that should 247 // eventually lead to an unreachable. Delete it because rethrow itself is 248 // a terminator, and also delete non-EH pad successors if any. 249 MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end()); 250 SmallVector<MachineBasicBlock *, 8> NonPadSuccessors; 251 for (auto *Succ : MBB.successors()) 252 if (!Succ->isEHPad()) 253 NonPadSuccessors.push_back(Succ); 254 EraseBBsAndChildren(NonPadSuccessors); 255 } 256 return Changed; 257 } 258 259 // Terminate pads are an single-BB EH pad in the form of 260 // termpad: 261 // %exn = catch 0 262 // call @__clang_call_terminate(%exn) 263 // unreachable 264 // (There can be set_local and get_locals before the call if we didn't run 265 // RegStackify) 266 // But code transformations can change or add more control flow, so the call to 267 // __clang_call_terminate() function may not be in the original EH pad anymore. 268 // This ensures every terminate pad is a single BB in the form illustrated 269 // above. 270 bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) { 271 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 272 273 // Find calls to __clang_call_terminate() 274 SmallVector<MachineInstr *, 8> ClangCallTerminateCalls; 275 for (auto &MBB : MF) 276 for (auto &MI : MBB) 277 if (MI.isCall()) { 278 const MachineOperand &CalleeOp = MI.getOperand(0); 279 if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() == 280 WebAssembly::ClangCallTerminateFn) 281 ClangCallTerminateCalls.push_back(&MI); 282 } 283 284 bool Changed = false; 285 for (auto *Call : ClangCallTerminateCalls) { 286 MachineBasicBlock *EHPad = GetMatchingEHPad(Call); 287 assert(EHPad && "No matching EH pad for catch"); 288 289 // If it is already the form we want, skip it 290 if (Call->getParent() == EHPad && 291 Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE) 292 continue; 293 294 // In case the __clang_call_terminate() call is not in its matching EH pad, 295 // move the call to the end of EH pad and add an unreachable instruction 296 // after that. Delete all successors and their children if any, because here 297 // the program terminates. 298 Changed = true; 299 MachineInstr *Catch = &*EHPad->begin(); 300 // This runs after hoistCatches(), so catch instruction should be at the top 301 assert(WebAssembly::isCatch(*Catch)); 302 // Takes the result register of the catch instruction as argument. There may 303 // have been some other set_local/get_locals in between, but at this point 304 // we don't care. 305 Call->getOperand(1).setReg(Catch->getOperand(0).getReg()); 306 auto InsertPos = std::next(MachineBasicBlock::iterator(Catch)); 307 EHPad->insert(InsertPos, Call->removeFromParent()); 308 BuildMI(*EHPad, InsertPos, Call->getDebugLoc(), 309 TII.get(WebAssembly::UNREACHABLE)); 310 EHPad->erase(InsertPos, EHPad->end()); 311 EraseBBsAndChildren(EHPad->successors()); 312 } 313 return Changed; 314 } 315 316 // In case there are multiple terminate pads, merge them into one for code size. 317 // This runs after ensureSingleBBTermPads() and assumes every terminate pad is a 318 // single BB. 319 // In principle this violates EH scope relationship because it can merge 320 // multiple inner EH scopes, each of which is in different outer EH scope. But 321 // getEHScopeMembership() function will not be called after this, so it is fine. 322 bool WebAssemblyLateEHPrepare::mergeTerminatePads(MachineFunction &MF) { 323 SmallVector<MachineBasicBlock *, 8> TermPads; 324 for (auto &MBB : MF) 325 if (WebAssembly::isCatchTerminatePad(MBB)) 326 TermPads.push_back(&MBB); 327 if (TermPads.empty()) 328 return false; 329 330 MachineBasicBlock *UniqueTermPad = TermPads.front(); 331 for (auto *TermPad : 332 llvm::make_range(std::next(TermPads.begin()), TermPads.end())) { 333 SmallVector<MachineBasicBlock *, 2> Preds(TermPad->pred_begin(), 334 TermPad->pred_end()); 335 for (auto *Pred : Preds) 336 Pred->replaceSuccessor(TermPad, UniqueTermPad); 337 TermPad->eraseFromParent(); 338 } 339 return true; 340 } 341 342 // Terminate pads are cleanup pads, so they should start with a 'catch_all' 343 // instruction. But in the Itanium model, when we have a C++ exception object, 344 // we pass them to __clang_call_terminate function, which calls __cxa_end_catch 345 // with the passed exception pointer and then std::terminate. This is the reason 346 // that terminate pads are generated with not a catch_all but a catch 347 // instruction in clang and earlier llvm passes. Here we append a terminate pad 348 // with a catch_all after each existing terminate pad so we can also catch 349 // foreign exceptions. For every terminate pad: 350 // %exn = catch 0 351 // call @__clang_call_terminate(%exn) 352 // unreachable 353 // We append this BB right after that: 354 // catch_all 355 // call @std::terminate() 356 // unreachable 357 bool WebAssemblyLateEHPrepare::addCatchAllTerminatePads(MachineFunction &MF) { 358 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 359 SmallVector<MachineBasicBlock *, 8> TermPads; 360 for (auto &MBB : MF) 361 if (WebAssembly::isCatchTerminatePad(MBB)) 362 TermPads.push_back(&MBB); 363 if (TermPads.empty()) 364 return false; 365 366 Function *StdTerminateFn = 367 MF.getFunction().getParent()->getFunction(WebAssembly::StdTerminateFn); 368 assert(StdTerminateFn && "There is no std::terminate() function"); 369 for (auto *CatchTermPad : TermPads) { 370 DebugLoc DL = CatchTermPad->findDebugLoc(CatchTermPad->begin()); 371 auto *CatchAllTermPad = MF.CreateMachineBasicBlock(); 372 MF.insert(std::next(MachineFunction::iterator(CatchTermPad)), 373 CatchAllTermPad); 374 CatchAllTermPad->setIsEHPad(); 375 BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CATCH_ALL)); 376 BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CALL_VOID)) 377 .addGlobalAddress(StdTerminateFn); 378 BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::UNREACHABLE)); 379 380 // Actually this CatchAllTermPad (new terminate pad with a catch_all) is not 381 // a successor of an existing terminate pad. CatchAllTermPad should have all 382 // predecessors CatchTermPad has instead. This is a hack to force 383 // CatchAllTermPad be always sorted right after CatchTermPad; the correct 384 // predecessor-successor relationships will be restored in CFGStackify pass. 385 CatchTermPad->addSuccessor(CatchAllTermPad); 386 } 387 return true; 388 } 389