1 //=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Does various transformations for exception handling.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "WebAssembly.h"
17 #include "WebAssemblySubtarget.h"
18 #include "WebAssemblyUtilities.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/WasmEHFuncInfo.h"
21 #include "llvm/MC/MCAsmInfo.h"
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "wasm-exception-prepare"
25 
26 namespace {
27 class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
28   StringRef getPassName() const override {
29     return "WebAssembly Prepare Exception";
30   }
31 
32   bool runOnMachineFunction(MachineFunction &MF) override;
33 
34   bool removeUnnecessaryUnreachables(MachineFunction &MF);
35   bool replaceFuncletReturns(MachineFunction &MF);
36   bool hoistCatches(MachineFunction &MF);
37   bool addCatchAlls(MachineFunction &MF);
38   bool addRethrows(MachineFunction &MF);
39   bool ensureSingleBBTermPads(MachineFunction &MF);
40   bool mergeTerminatePads(MachineFunction &MF);
41   bool addCatchAllTerminatePads(MachineFunction &MF);
42 
43 public:
44   static char ID; // Pass identification, replacement for typeid
45   WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {}
46 };
47 } // end anonymous namespace
48 
49 char WebAssemblyLateEHPrepare::ID = 0;
50 INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE,
51                 "WebAssembly Late Exception Preparation", false, false)
52 
53 FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
54   return new WebAssemblyLateEHPrepare();
55 }
56 
57 // Returns the nearest EH pad that dominates this instruction. This does not use
58 // dominator analysis; it just does BFS on its predecessors until arriving at an
59 // EH pad. This assumes valid EH scopes so the first EH pad it arrives in all
60 // possible search paths should be the same.
61 // Returns nullptr in case it does not find any EH pad in the search, or finds
62 // multiple different EH pads.
63 static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) {
64   MachineFunction *MF = MI->getParent()->getParent();
65   SmallVector<MachineBasicBlock *, 2> WL;
66   SmallPtrSet<MachineBasicBlock *, 2> Visited;
67   WL.push_back(MI->getParent());
68   MachineBasicBlock *EHPad = nullptr;
69   while (!WL.empty()) {
70     MachineBasicBlock *MBB = WL.pop_back_val();
71     if (Visited.count(MBB))
72       continue;
73     Visited.insert(MBB);
74     if (MBB->isEHPad()) {
75       if (EHPad && EHPad != MBB)
76         return nullptr;
77       EHPad = MBB;
78       continue;
79     }
80     if (MBB == &MF->front())
81       return nullptr;
82     WL.append(MBB->pred_begin(), MBB->pred_end());
83   }
84   return EHPad;
85 }
86 
87 // Erase the specified BBs if the BB does not have any remaining predecessors,
88 // and also all its dead children.
89 template <typename Container>
90 static void eraseDeadBBsAndChildren(const Container &MBBs) {
91   SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end());
92   while (!WL.empty()) {
93     MachineBasicBlock *MBB = WL.pop_back_val();
94     if (!MBB->pred_empty())
95       continue;
96     SmallVector<MachineBasicBlock *, 4> Succs(MBB->succ_begin(),
97                                               MBB->succ_end());
98     WL.append(MBB->succ_begin(), MBB->succ_end());
99     for (auto *Succ : Succs)
100       MBB->removeSuccessor(Succ);
101     MBB->eraseFromParent();
102   }
103 }
104 
105 bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
106   if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() !=
107       ExceptionHandling::Wasm)
108     return false;
109 
110   bool Changed = false;
111   Changed |= removeUnnecessaryUnreachables(MF);
112   Changed |= addRethrows(MF);
113   if (!MF.getFunction().hasPersonalityFn())
114     return Changed;
115   Changed |= replaceFuncletReturns(MF);
116   Changed |= hoistCatches(MF);
117   Changed |= addCatchAlls(MF);
118   Changed |= ensureSingleBBTermPads(MF);
119   Changed |= mergeTerminatePads(MF);
120   Changed |= addCatchAllTerminatePads(MF);
121   return Changed;
122 }
123 
124 bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
125     MachineFunction &MF) {
126   bool Changed = false;
127   for (auto &MBB : MF) {
128     for (auto &MI : MBB) {
129       if (!WebAssembly::isThrow(MI))
130         continue;
131       Changed = true;
132 
133       // The instruction after the throw should be an unreachable or a branch to
134       // another BB that should eventually lead to an unreachable. Delete it
135       // because throw itself is a terminator, and also delete successors if
136       // any.
137       MBB.erase(std::next(MachineBasicBlock::iterator(MI)), MBB.end());
138       SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(),
139                                                 MBB.succ_end());
140       for (auto *Succ : Succs)
141         MBB.removeSuccessor(Succ);
142       eraseDeadBBsAndChildren(Succs);
143     }
144   }
145 
146   return Changed;
147 }
148 
149 bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
150   bool Changed = false;
151   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
152   auto *EHInfo = MF.getWasmEHFuncInfo();
153 
154   for (auto &MBB : MF) {
155     auto Pos = MBB.getFirstTerminator();
156     if (Pos == MBB.end())
157       continue;
158     MachineInstr *TI = &*Pos;
159 
160     switch (TI->getOpcode()) {
161     case WebAssembly::CATCHRET: {
162       // Replace a catchret with a branch
163       MachineBasicBlock *TBB = TI->getOperand(0).getMBB();
164       if (!MBB.isLayoutSuccessor(TBB))
165         BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR))
166             .addMBB(TBB);
167       TI->eraseFromParent();
168       Changed = true;
169       break;
170     }
171     case WebAssembly::CLEANUPRET: {
172       // Replace a cleanupret with a rethrow
173       if (EHInfo->hasThrowUnwindDest(&MBB))
174         BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
175             .addMBB(EHInfo->getThrowUnwindDest(&MBB));
176       else
177         BuildMI(MBB, TI, TI->getDebugLoc(),
178                 TII.get(WebAssembly::RETHROW_TO_CALLER));
179 
180       TI->eraseFromParent();
181       Changed = true;
182       break;
183     }
184     }
185   }
186   return Changed;
187 }
188 
189 // Hoist catch instructions to the beginning of their matching EH pad BBs in
190 // case,
191 // (1) catch instruction is not the first instruction in EH pad.
192 // ehpad:
193 //   some_other_instruction
194 //   ...
195 //   %exn = catch 0
196 // (2) catch instruction is in a non-EH pad BB. For example,
197 // ehpad:
198 //   br bb0
199 // bb0:
200 //   %exn = catch 0
201 bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
202   bool Changed = false;
203   SmallVector<MachineInstr *, 16> Catches;
204   for (auto &MBB : MF)
205     for (auto &MI : MBB)
206       if (WebAssembly::isCatch(MI))
207         Catches.push_back(&MI);
208 
209   for (auto *Catch : Catches) {
210     MachineBasicBlock *EHPad = getMatchingEHPad(Catch);
211     assert(EHPad && "No matching EH pad for catch");
212     if (EHPad->begin() == Catch)
213       continue;
214     Changed = true;
215     EHPad->insert(EHPad->begin(), Catch->removeFromParent());
216   }
217   return Changed;
218 }
219 
220 // Add catch_all to beginning of cleanup pads.
221 bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) {
222   bool Changed = false;
223   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
224 
225   for (auto &MBB : MF) {
226     if (!MBB.isEHPad())
227       continue;
228     // This runs after hoistCatches(), so we assume that if there is a catch,
229     // that should be the first instruction in an EH pad.
230     if (!WebAssembly::isCatch(*MBB.begin())) {
231       Changed = true;
232       BuildMI(MBB, MBB.begin(), MBB.begin()->getDebugLoc(),
233               TII.get(WebAssembly::CATCH_ALL));
234     }
235   }
236   return Changed;
237 }
238 
239 // Add a 'rethrow' instruction after __cxa_rethrow() call
240 bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
241   bool Changed = false;
242   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
243   auto *EHInfo = MF.getWasmEHFuncInfo();
244 
245   for (auto &MBB : MF)
246     for (auto &MI : MBB) {
247       // Check if it is a call to __cxa_rethrow()
248       if (!MI.isCall())
249         continue;
250       MachineOperand &CalleeOp = MI.getOperand(0);
251       if (!CalleeOp.isGlobal() ||
252           CalleeOp.getGlobal()->getName() != WebAssembly::CxaRethrowFn)
253         continue;
254 
255       // Now we have __cxa_rethrow() call
256       Changed = true;
257       auto InsertPt = std::next(MachineBasicBlock::iterator(MI));
258       while (InsertPt != MBB.end() && InsertPt->isLabel()) // Skip EH_LABELs
259         ++InsertPt;
260       MachineInstr *Rethrow = nullptr;
261       if (EHInfo->hasThrowUnwindDest(&MBB))
262         Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
263                           TII.get(WebAssembly::RETHROW))
264                       .addMBB(EHInfo->getThrowUnwindDest(&MBB));
265       else
266         Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
267                           TII.get(WebAssembly::RETHROW_TO_CALLER));
268 
269       // Because __cxa_rethrow does not return, the instruction after the
270       // rethrow should be an unreachable or a branch to another BB that should
271       // eventually lead to an unreachable. Delete it because rethrow itself is
272       // a terminator, and also delete non-EH pad successors if any.
273       MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end());
274       SmallVector<MachineBasicBlock *, 8> NonPadSuccessors;
275       for (auto *Succ : MBB.successors())
276         if (!Succ->isEHPad())
277           NonPadSuccessors.push_back(Succ);
278       for (auto *Succ : NonPadSuccessors)
279         MBB.removeSuccessor(Succ);
280       eraseDeadBBsAndChildren(NonPadSuccessors);
281     }
282   return Changed;
283 }
284 
285 // Terminate pads are an single-BB EH pad in the form of
286 // termpad:
287 //   %exn = catch 0
288 //   call @__clang_call_terminate(%exn)
289 //   unreachable
290 // (There can be set_local and get_locals before the call if we didn't run
291 // RegStackify)
292 // But code transformations can change or add more control flow, so the call to
293 // __clang_call_terminate() function may not be in the original EH pad anymore.
294 // This ensures every terminate pad is a single BB in the form illustrated
295 // above.
296 bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
297   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
298 
299   // Find calls to __clang_call_terminate()
300   SmallVector<MachineInstr *, 8> ClangCallTerminateCalls;
301   for (auto &MBB : MF)
302     for (auto &MI : MBB)
303       if (MI.isCall()) {
304         const MachineOperand &CalleeOp = MI.getOperand(0);
305         if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() ==
306                                        WebAssembly::ClangCallTerminateFn)
307           ClangCallTerminateCalls.push_back(&MI);
308       }
309 
310   bool Changed = false;
311   for (auto *Call : ClangCallTerminateCalls) {
312     MachineBasicBlock *EHPad = getMatchingEHPad(Call);
313     assert(EHPad && "No matching EH pad for catch");
314 
315     // If it is already the form we want, skip it
316     if (Call->getParent() == EHPad &&
317         Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE)
318       continue;
319 
320     // In case the __clang_call_terminate() call is not in its matching EH pad,
321     // move the call to the end of EH pad and add an unreachable instruction
322     // after that. Delete all successors and their children if any, because here
323     // the program terminates.
324     Changed = true;
325     MachineInstr *Catch = &*EHPad->begin();
326     // This runs after hoistCatches(), so catch instruction should be at the top
327     assert(WebAssembly::isCatch(*Catch));
328     // Takes the result register of the catch instruction as argument. There may
329     // have been some other set_local/get_locals in between, but at this point
330     // we don't care.
331     Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
332     auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
333     EHPad->insert(InsertPos, Call->removeFromParent());
334     BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
335             TII.get(WebAssembly::UNREACHABLE));
336     EHPad->erase(InsertPos, EHPad->end());
337     SmallVector<MachineBasicBlock *, 8> Succs(EHPad->succ_begin(),
338                                               EHPad->succ_end());
339     for (auto *Succ : Succs)
340       EHPad->removeSuccessor(Succ);
341     eraseDeadBBsAndChildren(Succs);
342   }
343   return Changed;
344 }
345 
346 // In case there are multiple terminate pads, merge them into one for code size.
347 // This runs after ensureSingleBBTermPads() and assumes every terminate pad is a
348 // single BB.
349 // In principle this violates EH scope relationship because it can merge
350 // multiple inner EH scopes, each of which is in different outer EH scope. But
351 // getEHScopeMembership() function will not be called after this, so it is fine.
352 bool WebAssemblyLateEHPrepare::mergeTerminatePads(MachineFunction &MF) {
353   SmallVector<MachineBasicBlock *, 8> TermPads;
354   for (auto &MBB : MF)
355     if (WebAssembly::isCatchTerminatePad(MBB))
356       TermPads.push_back(&MBB);
357   if (TermPads.empty())
358     return false;
359 
360   MachineBasicBlock *UniqueTermPad = TermPads.front();
361   for (auto *TermPad :
362        llvm::make_range(std::next(TermPads.begin()), TermPads.end())) {
363     SmallVector<MachineBasicBlock *, 2> Preds(TermPad->pred_begin(),
364                                               TermPad->pred_end());
365     for (auto *Pred : Preds)
366       Pred->replaceSuccessor(TermPad, UniqueTermPad);
367     TermPad->eraseFromParent();
368   }
369   return true;
370 }
371 
372 // Terminate pads are cleanup pads, so they should start with a 'catch_all'
373 // instruction. But in the Itanium model, when we have a C++ exception object,
374 // we pass them to __clang_call_terminate function, which calls __cxa_end_catch
375 // with the passed exception pointer and then std::terminate. This is the reason
376 // that terminate pads are generated with not a catch_all but a catch
377 // instruction in clang and earlier llvm passes. Here we append a terminate pad
378 // with a catch_all after each existing terminate pad so we can also catch
379 // foreign exceptions. For every terminate pad:
380 //   %exn = catch 0
381 //   call @__clang_call_terminate(%exn)
382 //   unreachable
383 // We append this BB right after that:
384 //   catch_all
385 //   call @std::terminate()
386 //   unreachable
387 bool WebAssemblyLateEHPrepare::addCatchAllTerminatePads(MachineFunction &MF) {
388   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
389   SmallVector<MachineBasicBlock *, 8> TermPads;
390   for (auto &MBB : MF)
391     if (WebAssembly::isCatchTerminatePad(MBB))
392       TermPads.push_back(&MBB);
393   if (TermPads.empty())
394     return false;
395 
396   Function *StdTerminateFn =
397       MF.getFunction().getParent()->getFunction(WebAssembly::StdTerminateFn);
398   assert(StdTerminateFn && "There is no std::terminate() function");
399   for (auto *CatchTermPad : TermPads) {
400     DebugLoc DL = CatchTermPad->findDebugLoc(CatchTermPad->begin());
401     auto *CatchAllTermPad = MF.CreateMachineBasicBlock();
402     MF.insert(std::next(MachineFunction::iterator(CatchTermPad)),
403               CatchAllTermPad);
404     CatchAllTermPad->setIsEHPad();
405     BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CATCH_ALL));
406     BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CALL_VOID))
407         .addGlobalAddress(StdTerminateFn);
408     BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::UNREACHABLE));
409 
410     // Actually this CatchAllTermPad (new terminate pad with a catch_all) is not
411     // a successor of an existing terminate pad. CatchAllTermPad should have all
412     // predecessors CatchTermPad has instead. This is a hack to force
413     // CatchAllTermPad be always sorted right after CatchTermPad; the correct
414     // predecessor-successor relationships will be restored in CFGStackify pass.
415     CatchTermPad->addSuccessor(CatchAllTermPad);
416   }
417   return true;
418 }
419