1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a pass that removes irreducible control flow.
11 /// Irreducible control flow means multiple-entry loops, which this pass
12 /// transforms to have a single entry.
13 ///
14 /// Note that LLVM has a generic pass that lowers irreducible control flow, but
15 /// it linearizes control flow, turning diamonds into two triangles, which is
16 /// both unnecessary and undesirable for WebAssembly.
17 ///
18 /// The big picture: We recursively process each "region", defined as a group
19 /// of blocks with a single entry and no branches back to that entry. A region
20 /// may be the entire function body, or the inner part of a loop, i.e., the
21 /// loop's body without branches back to the loop entry. In each region we fix
22 /// up multi-entry loops by adding a new block that can dispatch to each of the
23 /// loop entries, based on the value of a label "helper" variable, and we
24 /// replace direct branches to the entries with assignments to the label
25 /// variable and a branch to the dispatch block. Then the dispatch block is the
26 /// single entry in the loop containing the previous multiple entries. After
27 /// ensuring all the loops in a region are reducible, we recurse into them. The
28 /// total time complexity of this pass is:
29 ///   O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
30 ///     NumLoops * NumLoops)
31 ///
32 /// This pass is similar to what the Relooper [1] does. Both identify looping
33 /// code that requires multiple entries, and resolve it in a similar way (in
34 /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
35 /// also that like the Relooper, we implement a "minimal" intervention: we only
36 /// use the "label" helper for the blocks we absolutely must and no others. We
37 /// also prioritize code size and do not duplicate code in order to resolve
38 /// irreducibility. The graph algorithms for finding loops and entries and so
39 /// forth are also similar to the Relooper. The main differences between this
40 /// pass and the Relooper are:
41 ///  * We just care about irreducibility, so we just look at loops.
42 ///  * The Relooper emits structured control flow (with ifs etc.), while we
43 ///    emit a CFG.
44 ///
45 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
46 /// Proceedings of the ACM international conference companion on Object oriented
47 /// programming systems languages and applications companion (SPLASH '11). ACM,
48 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
49 /// http://doi.acm.org/10.1145/2048147.2048224
50 ///
51 //===----------------------------------------------------------------------===//
52 
53 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
54 #include "WebAssembly.h"
55 #include "WebAssemblySubtarget.h"
56 #include "llvm/CodeGen/MachineInstrBuilder.h"
57 using namespace llvm;
58 
59 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
60 
61 namespace {
62 
63 using BlockVector = SmallVector<MachineBasicBlock *, 4>;
64 using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
65 
66 // Calculates reachability in a region. Ignores branches to blocks outside of
67 // the region, and ignores branches to the region entry (for the case where
68 // the region is the inner part of a loop).
69 class ReachabilityGraph {
70 public:
71   ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
72       : Entry(Entry), Blocks(Blocks) {
73 #ifndef NDEBUG
74     // The region must have a single entry.
75     for (auto *MBB : Blocks) {
76       if (MBB != Entry) {
77         for (auto *Pred : MBB->predecessors()) {
78           assert(inRegion(Pred));
79         }
80       }
81     }
82 #endif
83     calculate();
84   }
85 
86   bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
87     assert(inRegion(From) && inRegion(To));
88     auto I = Reachable.find(From);
89     if (I == Reachable.end())
90       return false;
91     return I->second.count(To);
92   }
93 
94   // "Loopers" are blocks that are in a loop. We detect these by finding blocks
95   // that can reach themselves.
96   const BlockSet &getLoopers() const { return Loopers; }
97 
98   // Get all blocks that are loop entries.
99   const BlockSet &getLoopEntries() const { return LoopEntries; }
100 
101   // Get all blocks that enter a particular loop from outside.
102   const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
103     assert(inRegion(LoopEntry));
104     auto I = LoopEnterers.find(LoopEntry);
105     assert(I != LoopEnterers.end());
106     return I->second;
107   }
108 
109 private:
110   MachineBasicBlock *Entry;
111   const BlockSet &Blocks;
112 
113   BlockSet Loopers, LoopEntries;
114   DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
115 
116   bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
117 
118   // Maps a block to all the other blocks it can reach.
119   DenseMap<MachineBasicBlock *, BlockSet> Reachable;
120 
121   void calculate() {
122     // Reachability computation work list. Contains pairs of recent additions
123     // (A, B) where we just added a link A => B.
124     using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
125     SmallVector<BlockPair, 4> WorkList;
126 
127     // Add all relevant direct branches.
128     for (auto *MBB : Blocks) {
129       for (auto *Succ : MBB->successors()) {
130         if (Succ != Entry && inRegion(Succ)) {
131           Reachable[MBB].insert(Succ);
132           WorkList.emplace_back(MBB, Succ);
133         }
134       }
135     }
136 
137     while (!WorkList.empty()) {
138       MachineBasicBlock *MBB, *Succ;
139       std::tie(MBB, Succ) = WorkList.pop_back_val();
140       assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
141       if (MBB != Entry) {
142         // We recently added MBB => Succ, and that means we may have enabled
143         // Pred => MBB => Succ.
144         for (auto *Pred : MBB->predecessors()) {
145           if (Reachable[Pred].insert(Succ).second) {
146             WorkList.emplace_back(Pred, Succ);
147           }
148         }
149       }
150     }
151 
152     // Blocks that can return to themselves are in a loop.
153     for (auto *MBB : Blocks) {
154       if (canReach(MBB, MBB)) {
155         Loopers.insert(MBB);
156       }
157     }
158     assert(!Loopers.count(Entry));
159 
160     // Find the loop entries - loopers reachable from blocks not in that loop -
161     // and those outside blocks that reach them, the "loop enterers".
162     for (auto *Looper : Loopers) {
163       for (auto *Pred : Looper->predecessors()) {
164         // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
165         // otherwise, it is a block that enters into the loop.
166         if (!canReach(Looper, Pred)) {
167           LoopEntries.insert(Looper);
168           LoopEnterers[Looper].insert(Pred);
169         }
170       }
171     }
172   }
173 };
174 
175 // Finds the blocks in a single-entry loop, given the loop entry and the
176 // list of blocks that enter the loop.
177 class LoopBlocks {
178 public:
179   LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
180       : Entry(Entry), Enterers(Enterers) {
181     calculate();
182   }
183 
184   BlockSet &getBlocks() { return Blocks; }
185 
186 private:
187   MachineBasicBlock *Entry;
188   const BlockSet &Enterers;
189 
190   BlockSet Blocks;
191 
192   void calculate() {
193     // Going backwards from the loop entry, if we ignore the blocks entering
194     // from outside, we will traverse all the blocks in the loop.
195     BlockVector WorkList;
196     BlockSet AddedToWorkList;
197     Blocks.insert(Entry);
198     for (auto *Pred : Entry->predecessors()) {
199       if (!Enterers.count(Pred)) {
200         WorkList.push_back(Pred);
201         AddedToWorkList.insert(Pred);
202       }
203     }
204 
205     while (!WorkList.empty()) {
206       auto *MBB = WorkList.pop_back_val();
207       assert(!Enterers.count(MBB));
208       if (Blocks.insert(MBB).second) {
209         for (auto *Pred : MBB->predecessors()) {
210           if (!AddedToWorkList.count(Pred)) {
211             WorkList.push_back(Pred);
212             AddedToWorkList.insert(Pred);
213           }
214         }
215       }
216     }
217   }
218 };
219 
220 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
221   StringRef getPassName() const override {
222     return "WebAssembly Fix Irreducible Control Flow";
223   }
224 
225   bool runOnMachineFunction(MachineFunction &MF) override;
226 
227   bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
228                      MachineFunction &MF);
229 
230   void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
231                            MachineFunction &MF);
232 
233 public:
234   static char ID; // Pass identification, replacement for typeid
235   WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
236 };
237 
238 bool WebAssemblyFixIrreducibleControlFlow::processRegion(
239     MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
240   bool Changed = false;
241 
242   // Remove irreducibility before processing child loops, which may take
243   // multiple iterations.
244   while (true) {
245     ReachabilityGraph Graph(Entry, Blocks);
246 
247     bool FoundIrreducibility = false;
248 
249     for (auto *LoopEntry : Graph.getLoopEntries()) {
250       // Find mutual entries - all entries which can reach this one, and
251       // are reached by it (that always includes LoopEntry itself). All mutual
252       // entries must be in the same loop, so if we have more than one, then we
253       // have irreducible control flow.
254       //
255       // Note that irreducibility may involve inner loops, e.g. imagine A
256       // starts one loop, and it has B inside it which starts an inner loop.
257       // If we add a branch from all the way on the outside to B, then in a
258       // sense B is no longer an "inner" loop, semantically speaking. We will
259       // fix that irreducibility by adding a block that dispatches to either
260       // either A or B, so B will no longer be an inner loop in our output.
261       // (A fancier approach might try to keep it as such.)
262       //
263       // Note that we still need to recurse into inner loops later, to handle
264       // the case where the irreducibility is entirely nested - we would not
265       // be able to identify that at this point, since the enclosing loop is
266       // a group of blocks all of whom can reach each other. (We'll see the
267       // irreducibility after removing branches to the top of that enclosing
268       // loop.)
269       BlockSet MutualLoopEntries;
270       MutualLoopEntries.insert(LoopEntry);
271       for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
272         if (OtherLoopEntry != LoopEntry &&
273             Graph.canReach(LoopEntry, OtherLoopEntry) &&
274             Graph.canReach(OtherLoopEntry, LoopEntry)) {
275           MutualLoopEntries.insert(OtherLoopEntry);
276         }
277       }
278 
279       if (MutualLoopEntries.size() > 1) {
280         makeSingleEntryLoop(MutualLoopEntries, Blocks, MF);
281         FoundIrreducibility = true;
282         Changed = true;
283         break;
284       }
285     }
286     // Only go on to actually process the inner loops when we are done
287     // removing irreducible control flow and changing the graph. Modifying
288     // the graph as we go is possible, and that might let us avoid looking at
289     // the already-fixed loops again if we are careful, but all that is
290     // complex and bug-prone. Since irreducible loops are rare, just starting
291     // another iteration is best.
292     if (FoundIrreducibility) {
293       continue;
294     }
295 
296     for (auto *LoopEntry : Graph.getLoopEntries()) {
297       LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
298       // Each of these calls to processRegion may change the graph, but are
299       // guaranteed not to interfere with each other. The only changes we make
300       // to the graph are to add blocks on the way to a loop entry. As the
301       // loops are disjoint, that means we may only alter branches that exit
302       // another loop, which are ignored when recursing into that other loop
303       // anyhow.
304       if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
305         Changed = true;
306       }
307     }
308 
309     return Changed;
310   }
311 }
312 
313 // Given a set of entries to a single loop, create a single entry for that
314 // loop by creating a dispatch block for them, routing control flow using
315 // a helper variable. Also updates Blocks with any new blocks created, so
316 // that we properly track all the blocks in the region.
317 void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
318     BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF) {
319   assert(Entries.size() >= 2);
320 
321   // Sort the entries to ensure a deterministic build.
322   BlockVector SortedEntries(Entries.begin(), Entries.end());
323   llvm::sort(SortedEntries,
324              [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
325                auto ANum = A->getNumber();
326                auto BNum = B->getNumber();
327                return ANum < BNum;
328              });
329 
330 #ifndef NDEBUG
331   for (auto Block : SortedEntries)
332     assert(Block->getNumber() != -1);
333   if (SortedEntries.size() > 1) {
334     for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
335          ++I) {
336       auto ANum = (*I)->getNumber();
337       auto BNum = (*(std::next(I)))->getNumber();
338       assert(ANum != BNum);
339     }
340   }
341 #endif
342 
343   // Create a dispatch block which will contain a jump table to the entries.
344   MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
345   MF.insert(MF.end(), Dispatch);
346   Blocks.insert(Dispatch);
347 
348   // Add the jump table.
349   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
350   MachineInstrBuilder MIB =
351       BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
352 
353   // Add the register which will be used to tell the jump table which block to
354   // jump to.
355   MachineRegisterInfo &MRI = MF.getRegInfo();
356   unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
357   MIB.addReg(Reg);
358 
359   // Compute the indices in the superheader, one for each bad block, and
360   // add them as successors.
361   DenseMap<MachineBasicBlock *, unsigned> Indices;
362   for (auto *Entry : SortedEntries) {
363     auto Pair = Indices.insert(std::make_pair(Entry, 0));
364     assert(Pair.second);
365 
366     unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
367     Pair.first->second = Index;
368 
369     MIB.addMBB(Entry);
370     Dispatch->addSuccessor(Entry);
371   }
372 
373   // Rewrite the problematic successors for every block that wants to reach
374   // the bad blocks. For simplicity, we just introduce a new block for every
375   // edge we need to rewrite. (Fancier things are possible.)
376 
377   BlockVector AllPreds;
378   for (auto *Entry : SortedEntries) {
379     for (auto *Pred : Entry->predecessors()) {
380       if (Pred != Dispatch) {
381         AllPreds.push_back(Pred);
382       }
383     }
384   }
385 
386   for (MachineBasicBlock *Pred : AllPreds) {
387     DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
388     for (auto *Entry : Pred->successors()) {
389       if (!Entries.count(Entry)) {
390         continue;
391       }
392 
393       // This is a successor we need to rewrite.
394       MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
395       MF.insert(Pred->isLayoutSuccessor(Entry)
396                     ? MachineFunction::iterator(Entry)
397                     : MF.end(),
398                 Split);
399       Blocks.insert(Split);
400 
401       // Set the jump table's register of the index of the block we wish to
402       // jump to, and jump to the jump table.
403       BuildMI(Split, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
404           .addImm(Indices[Entry]);
405       BuildMI(Split, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
406       Split->addSuccessor(Dispatch);
407       Map[Entry] = Split;
408     }
409     // Remap the terminator operands and the successor list.
410     for (MachineInstr &Term : Pred->terminators())
411       for (auto &Op : Term.explicit_uses())
412         if (Op.isMBB() && Indices.count(Op.getMBB()))
413           Op.setMBB(Map[Op.getMBB()]);
414     for (auto Rewrite : Map)
415       Pred->replaceSuccessor(Rewrite.first, Rewrite.second);
416   }
417 
418   // Create a fake default label, because br_table requires one.
419   MIB.addMBB(MIB.getInstr()
420                  ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
421                  .getMBB());
422 }
423 
424 } // end anonymous namespace
425 
426 char WebAssemblyFixIrreducibleControlFlow::ID = 0;
427 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
428                 "Removes irreducible control flow", false, false)
429 
430 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
431   return new WebAssemblyFixIrreducibleControlFlow();
432 }
433 
434 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
435     MachineFunction &MF) {
436   LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
437                        "********** Function: "
438                     << MF.getName() << '\n');
439 
440   // Start the recursive process on the entire function body.
441   BlockSet AllBlocks;
442   for (auto &MBB : MF) {
443     AllBlocks.insert(&MBB);
444   }
445 
446   if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
447     // We rewrote part of the function; recompute relevant things.
448     MF.getRegInfo().invalidateLiveness();
449     MF.RenumberBlocks();
450     return true;
451   }
452 
453   return false;
454 }
455