1 //=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// \brief Does various transformations for exception handling.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
15 #include "WebAssembly.h"
16 #include "WebAssemblySubtarget.h"
17 #include "WebAssemblyUtilities.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/WasmEHFuncInfo.h"
21 #include "llvm/MC/MCAsmInfo.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Target/TargetMachine.h"
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "wasm-late-eh-prepare"
27 
28 namespace {
29 class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
30   StringRef getPassName() const override {
31     return "WebAssembly Late Prepare Exception";
32   }
33 
34   bool runOnMachineFunction(MachineFunction &MF) override;
35   bool removeUnreachableEHPads(MachineFunction &MF);
36   void recordCatchRetBBs(MachineFunction &MF);
37   bool hoistCatches(MachineFunction &MF);
38   bool addCatchAlls(MachineFunction &MF);
39   bool replaceFuncletReturns(MachineFunction &MF);
40   bool removeUnnecessaryUnreachables(MachineFunction &MF);
41   bool ensureSingleBBTermPads(MachineFunction &MF);
42   bool restoreStackPointer(MachineFunction &MF);
43 
44   MachineBasicBlock *getMatchingEHPad(MachineInstr *MI);
45   SmallPtrSet<MachineBasicBlock *, 8> CatchRetBBs;
46 
47 public:
48   static char ID; // Pass identification, replacement for typeid
49   WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {}
50 };
51 } // end anonymous namespace
52 
53 char WebAssemblyLateEHPrepare::ID = 0;
54 INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE,
55                 "WebAssembly Late Exception Preparation", false, false)
56 
57 FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
58   return new WebAssemblyLateEHPrepare();
59 }
60 
61 // Returns the nearest EH pad that dominates this instruction. This does not use
62 // dominator analysis; it just does BFS on its predecessors until arriving at an
63 // EH pad. This assumes valid EH scopes so the first EH pad it arrives in all
64 // possible search paths should be the same.
65 // Returns nullptr in case it does not find any EH pad in the search, or finds
66 // multiple different EH pads.
67 MachineBasicBlock *
68 WebAssemblyLateEHPrepare::getMatchingEHPad(MachineInstr *MI) {
69   MachineFunction *MF = MI->getParent()->getParent();
70   SmallVector<MachineBasicBlock *, 2> WL;
71   SmallPtrSet<MachineBasicBlock *, 2> Visited;
72   WL.push_back(MI->getParent());
73   MachineBasicBlock *EHPad = nullptr;
74   while (!WL.empty()) {
75     MachineBasicBlock *MBB = WL.pop_back_val();
76     if (Visited.count(MBB))
77       continue;
78     Visited.insert(MBB);
79     if (MBB->isEHPad()) {
80       if (EHPad && EHPad != MBB)
81         return nullptr;
82       EHPad = MBB;
83       continue;
84     }
85     if (MBB == &MF->front())
86       return nullptr;
87     for (auto *Pred : MBB->predecessors())
88       if (!CatchRetBBs.count(Pred)) // We don't go into child scopes
89         WL.push_back(Pred);
90   }
91   return EHPad;
92 }
93 
94 // Erase the specified BBs if the BB does not have any remaining predecessors,
95 // and also all its dead children.
96 template <typename Container>
97 static void eraseDeadBBsAndChildren(const Container &MBBs) {
98   SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end());
99   SmallPtrSet<MachineBasicBlock *, 8> Deleted;
100   while (!WL.empty()) {
101     MachineBasicBlock *MBB = WL.pop_back_val();
102     if (Deleted.count(MBB) || !MBB->pred_empty())
103       continue;
104     SmallVector<MachineBasicBlock *, 4> Succs(MBB->successors());
105     WL.append(MBB->succ_begin(), MBB->succ_end());
106     for (auto *Succ : Succs)
107       MBB->removeSuccessor(Succ);
108     // To prevent deleting the same BB multiple times, which can happen when
109     // 'MBBs' contain both a parent and a child
110     Deleted.insert(MBB);
111     MBB->eraseFromParent();
112   }
113 }
114 
115 bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
116   LLVM_DEBUG(dbgs() << "********** Late EH Prepare **********\n"
117                        "********** Function: "
118                     << MF.getName() << '\n');
119 
120   if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() !=
121       ExceptionHandling::Wasm)
122     return false;
123 
124   bool Changed = false;
125   if (MF.getFunction().hasPersonalityFn()) {
126     Changed |= removeUnreachableEHPads(MF);
127     recordCatchRetBBs(MF);
128     Changed |= hoistCatches(MF);
129     Changed |= addCatchAlls(MF);
130     Changed |= replaceFuncletReturns(MF);
131     Changed |= ensureSingleBBTermPads(MF);
132   }
133   Changed |= removeUnnecessaryUnreachables(MF);
134   if (MF.getFunction().hasPersonalityFn())
135     Changed |= restoreStackPointer(MF);
136   return Changed;
137 }
138 
139 // Remove unreachable EH pads and its children. If they remain, CFG
140 // stackification can be tricky.
141 bool WebAssemblyLateEHPrepare::removeUnreachableEHPads(MachineFunction &MF) {
142   SmallVector<MachineBasicBlock *, 4> ToDelete;
143   for (auto &MBB : MF)
144     if (MBB.isEHPad() && MBB.pred_empty())
145       ToDelete.push_back(&MBB);
146   eraseDeadBBsAndChildren(ToDelete);
147   return !ToDelete.empty();
148 }
149 
150 // Record which BB ends with catchret instruction, because this will be replaced
151 // with 'br's later. This set of catchret BBs is necessary in 'getMatchingEHPad'
152 // function.
153 void WebAssemblyLateEHPrepare::recordCatchRetBBs(MachineFunction &MF) {
154   CatchRetBBs.clear();
155   for (auto &MBB : MF) {
156     auto Pos = MBB.getFirstTerminator();
157     if (Pos == MBB.end())
158       continue;
159     MachineInstr *TI = &*Pos;
160     if (TI->getOpcode() == WebAssembly::CATCHRET)
161       CatchRetBBs.insert(&MBB);
162   }
163 }
164 
165 // Hoist catch instructions to the beginning of their matching EH pad BBs in
166 // case,
167 // (1) catch instruction is not the first instruction in EH pad.
168 // ehpad:
169 //   some_other_instruction
170 //   ...
171 //   %exn = catch 0
172 // (2) catch instruction is in a non-EH pad BB. For example,
173 // ehpad:
174 //   br bb0
175 // bb0:
176 //   %exn = catch 0
177 bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
178   bool Changed = false;
179   SmallVector<MachineInstr *, 16> Catches;
180   for (auto &MBB : MF)
181     for (auto &MI : MBB)
182       if (WebAssembly::isCatch(MI.getOpcode()))
183         Catches.push_back(&MI);
184 
185   for (auto *Catch : Catches) {
186     MachineBasicBlock *EHPad = getMatchingEHPad(Catch);
187     assert(EHPad && "No matching EH pad for catch");
188     auto InsertPos = EHPad->begin();
189     // Skip EH_LABELs in the beginning of an EH pad if present. We don't use
190     // these labels at the moment, but other targets also seem to have an
191     // EH_LABEL instruction in the beginning of an EH pad.
192     while (InsertPos != EHPad->end() && InsertPos->isEHLabel())
193       InsertPos++;
194     if (InsertPos == Catch)
195       continue;
196     Changed = true;
197     EHPad->insert(InsertPos, Catch->removeFromParent());
198   }
199   return Changed;
200 }
201 
202 // Add catch_all to beginning of cleanup pads.
203 bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) {
204   bool Changed = false;
205   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
206 
207   for (auto &MBB : MF) {
208     if (!MBB.isEHPad())
209       continue;
210     auto InsertPos = MBB.begin();
211     // Skip EH_LABELs in the beginning of an EH pad if present.
212     while (InsertPos != MBB.end() && InsertPos->isEHLabel())
213       InsertPos++;
214     // This runs after hoistCatches(), so we assume that if there is a catch,
215     // that should be the first non-EH-label instruction in an EH pad.
216     if (InsertPos == MBB.end() ||
217         !WebAssembly::isCatch(InsertPos->getOpcode())) {
218       Changed = true;
219       BuildMI(MBB, InsertPos,
220               InsertPos == MBB.end() ? DebugLoc() : InsertPos->getDebugLoc(),
221               TII.get(WebAssembly::CATCH_ALL));
222     }
223   }
224   return Changed;
225 }
226 
227 // Replace pseudo-instructions catchret and cleanupret with br and rethrow
228 // respectively.
229 bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
230   bool Changed = false;
231   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
232 
233   for (auto &MBB : MF) {
234     auto Pos = MBB.getFirstTerminator();
235     if (Pos == MBB.end())
236       continue;
237     MachineInstr *TI = &*Pos;
238 
239     switch (TI->getOpcode()) {
240     case WebAssembly::CATCHRET: {
241       // Replace a catchret with a branch
242       MachineBasicBlock *TBB = TI->getOperand(0).getMBB();
243       if (!MBB.isLayoutSuccessor(TBB))
244         BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR))
245             .addMBB(TBB);
246       TI->eraseFromParent();
247       Changed = true;
248       break;
249     }
250     case WebAssembly::CLEANUPRET: {
251       // Replace a cleanupret with a rethrow. For C++ support, currently
252       // rethrow's immediate argument is always 0 (= the latest exception).
253       BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
254           .addImm(0);
255       TI->eraseFromParent();
256       Changed = true;
257       break;
258     }
259     }
260   }
261   return Changed;
262 }
263 
264 // Remove unnecessary unreachables after a throw or rethrow.
265 bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
266     MachineFunction &MF) {
267   bool Changed = false;
268   for (auto &MBB : MF) {
269     for (auto &MI : MBB) {
270       if (MI.getOpcode() != WebAssembly::THROW &&
271           MI.getOpcode() != WebAssembly::RETHROW)
272         continue;
273       Changed = true;
274 
275       // The instruction after the throw should be an unreachable or a branch to
276       // another BB that should eventually lead to an unreachable. Delete it
277       // because throw itself is a terminator, and also delete successors if
278       // any.
279       MBB.erase(std::next(MI.getIterator()), MBB.end());
280       SmallVector<MachineBasicBlock *, 8> Succs(MBB.successors());
281       for (auto *Succ : Succs)
282         if (!Succ->isEHPad())
283           MBB.removeSuccessor(Succ);
284       eraseDeadBBsAndChildren(Succs);
285     }
286   }
287 
288   return Changed;
289 }
290 
291 // Clang-generated terminate pads are an single-BB EH pad in the form of
292 // termpad:
293 //   %exn = catch $__cpp_exception
294 //   call @__clang_call_terminate(%exn)
295 //   unreachable
296 // (There can be local.set and local.gets before the call if we didn't run
297 // RegStackify)
298 // But code transformations can change or add more control flow, so the call to
299 // __clang_call_terminate() function may not be in the original EH pad anymore.
300 // This ensures every terminate pad is a single BB in the form illustrated
301 // above.
302 //
303 // This is preparation work for the HandleEHTerminatePads pass later, which
304 // duplicates terminate pads both for 'catch' and 'catch_all'. Refer to
305 // WebAssemblyHandleEHTerminatePads.cpp for details.
306 bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
307   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
308 
309   // Find calls to __clang_call_terminate()
310   SmallVector<MachineInstr *, 8> ClangCallTerminateCalls;
311   SmallPtrSet<MachineBasicBlock *, 8> TermPads;
312   for (auto &MBB : MF) {
313     for (auto &MI : MBB) {
314       if (MI.isCall()) {
315         const MachineOperand &CalleeOp = MI.getOperand(0);
316         if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() ==
317                                        WebAssembly::ClangCallTerminateFn) {
318           MachineBasicBlock *EHPad = getMatchingEHPad(&MI);
319           assert(EHPad && "No matching EH pad for __clang_call_terminate");
320           // In case a __clang_call_terminate call is duplicated during code
321           // transformation so one terminate pad contains multiple
322           // __clang_call_terminate calls, we only count one of them
323           if (TermPads.insert(EHPad).second)
324             ClangCallTerminateCalls.push_back(&MI);
325         }
326       }
327     }
328   }
329 
330   bool Changed = false;
331   for (auto *Call : ClangCallTerminateCalls) {
332     MachineBasicBlock *EHPad = getMatchingEHPad(Call);
333     assert(EHPad && "No matching EH pad for __clang_call_terminate");
334 
335     // If it is already the form we want, skip it
336     if (Call->getParent() == EHPad &&
337         Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE)
338       continue;
339 
340     // In case the __clang_call_terminate() call is not in its matching EH pad,
341     // move the call to the end of EH pad and add an unreachable instruction
342     // after that. Delete all successors and their children if any, because here
343     // the program terminates.
344     Changed = true;
345     // This runs after hoistCatches(), so catch instruction should be at the top
346     MachineInstr *Catch = WebAssembly::findCatch(EHPad);
347     assert(Catch && "EH pad does not have a catch instruction");
348     // Takes the result register of the catch instruction as argument. There may
349     // have been some other local.set/local.gets in between, but at this point
350     // we don't care.
351     Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
352     auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
353     EHPad->insert(InsertPos, Call->removeFromParent());
354     BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
355             TII.get(WebAssembly::UNREACHABLE));
356     EHPad->erase(InsertPos, EHPad->end());
357     SmallVector<MachineBasicBlock *, 8> Succs(EHPad->successors());
358     for (auto *Succ : Succs)
359       EHPad->removeSuccessor(Succ);
360     eraseDeadBBsAndChildren(Succs);
361   }
362   return Changed;
363 }
364 
365 // After the stack is unwound due to a thrown exception, the __stack_pointer
366 // global can point to an invalid address. This inserts instructions that
367 // restore __stack_pointer global.
368 bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) {
369   const auto *FrameLowering = static_cast<const WebAssemblyFrameLowering *>(
370       MF.getSubtarget().getFrameLowering());
371   if (!FrameLowering->needsPrologForEH(MF))
372     return false;
373   bool Changed = false;
374 
375   for (auto &MBB : MF) {
376     if (!MBB.isEHPad())
377       continue;
378     Changed = true;
379 
380     // Insert __stack_pointer restoring instructions at the beginning of each EH
381     // pad, after the catch instruction. Here it is safe to assume that SP32
382     // holds the latest value of __stack_pointer, because the only exception for
383     // this case is when a function uses the red zone, but that only happens
384     // with leaf functions, and we don't restore __stack_pointer in leaf
385     // functions anyway.
386     auto InsertPos = MBB.begin();
387     // Skip EH_LABELs in the beginning of an EH pad if present.
388     while (InsertPos != MBB.end() && InsertPos->isEHLabel())
389       InsertPos++;
390     assert(InsertPos != MBB.end() &&
391            WebAssembly::isCatch(InsertPos->getOpcode()) &&
392            "catch/catch_all should be present in every EH pad at this point");
393     ++InsertPos; // Skip the catch instruction
394     FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB,
395                                    InsertPos, MBB.begin()->getDebugLoc());
396   }
397   return Changed;
398 }
399