1*0b57cec5SDimitry Andric //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric ///
9*0b57cec5SDimitry Andric /// \file
10*0b57cec5SDimitry Andric /// This file implements a pass that removes irreducible control flow.
11*0b57cec5SDimitry Andric /// Irreducible control flow means multiple-entry loops, which this pass
12*0b57cec5SDimitry Andric /// transforms to have a single entry.
13*0b57cec5SDimitry Andric ///
14*0b57cec5SDimitry Andric /// Note that LLVM has a generic pass that lowers irreducible control flow, but
15*0b57cec5SDimitry Andric /// it linearizes control flow, turning diamonds into two triangles, which is
16*0b57cec5SDimitry Andric /// both unnecessary and undesirable for WebAssembly.
17*0b57cec5SDimitry Andric ///
18*0b57cec5SDimitry Andric /// The big picture: We recursively process each "region", defined as a group
19*0b57cec5SDimitry Andric /// of blocks with a single entry and no branches back to that entry. A region
20*0b57cec5SDimitry Andric /// may be the entire function body, or the inner part of a loop, i.e., the
21*0b57cec5SDimitry Andric /// loop's body without branches back to the loop entry. In each region we fix
22*0b57cec5SDimitry Andric /// up multi-entry loops by adding a new block that can dispatch to each of the
23*0b57cec5SDimitry Andric /// loop entries, based on the value of a label "helper" variable, and we
24*0b57cec5SDimitry Andric /// replace direct branches to the entries with assignments to the label
25*0b57cec5SDimitry Andric /// variable and a branch to the dispatch block. Then the dispatch block is the
26*0b57cec5SDimitry Andric /// single entry in the loop containing the previous multiple entries. After
27*0b57cec5SDimitry Andric /// ensuring all the loops in a region are reducible, we recurse into them. The
28*0b57cec5SDimitry Andric /// total time complexity of this pass is:
29*0b57cec5SDimitry Andric ///
30*0b57cec5SDimitry Andric ///   O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
31*0b57cec5SDimitry Andric ///     NumLoops * NumLoops)
32*0b57cec5SDimitry Andric ///
33*0b57cec5SDimitry Andric /// This pass is similar to what the Relooper [1] does. Both identify looping
34*0b57cec5SDimitry Andric /// code that requires multiple entries, and resolve it in a similar way (in
35*0b57cec5SDimitry Andric /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
36*0b57cec5SDimitry Andric /// also that like the Relooper, we implement a "minimal" intervention: we only
37*0b57cec5SDimitry Andric /// use the "label" helper for the blocks we absolutely must and no others. We
38*0b57cec5SDimitry Andric /// also prioritize code size and do not duplicate code in order to resolve
39*0b57cec5SDimitry Andric /// irreducibility. The graph algorithms for finding loops and entries and so
40*0b57cec5SDimitry Andric /// forth are also similar to the Relooper. The main differences between this
41*0b57cec5SDimitry Andric /// pass and the Relooper are:
42*0b57cec5SDimitry Andric ///
43*0b57cec5SDimitry Andric ///  * We just care about irreducibility, so we just look at loops.
44*0b57cec5SDimitry Andric ///  * The Relooper emits structured control flow (with ifs etc.), while we
45*0b57cec5SDimitry Andric ///    emit a CFG.
46*0b57cec5SDimitry Andric ///
47*0b57cec5SDimitry Andric /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
48*0b57cec5SDimitry Andric /// Proceedings of the ACM international conference companion on Object oriented
49*0b57cec5SDimitry Andric /// programming systems languages and applications companion (SPLASH '11). ACM,
50*0b57cec5SDimitry Andric /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
51*0b57cec5SDimitry Andric /// http://doi.acm.org/10.1145/2048147.2048224
52*0b57cec5SDimitry Andric ///
53*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
54*0b57cec5SDimitry Andric 
55*0b57cec5SDimitry Andric #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
56*0b57cec5SDimitry Andric #include "WebAssembly.h"
57*0b57cec5SDimitry Andric #include "WebAssemblySubtarget.h"
58*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
59*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
60*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
61*0b57cec5SDimitry Andric using namespace llvm;
62*0b57cec5SDimitry Andric 
63*0b57cec5SDimitry Andric #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
64*0b57cec5SDimitry Andric 
65*0b57cec5SDimitry Andric namespace {
66*0b57cec5SDimitry Andric 
67*0b57cec5SDimitry Andric using BlockVector = SmallVector<MachineBasicBlock *, 4>;
68*0b57cec5SDimitry Andric using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
69*0b57cec5SDimitry Andric 
getSortedEntries(const BlockSet & Entries)70*0b57cec5SDimitry Andric static BlockVector getSortedEntries(const BlockSet &Entries) {
71*0b57cec5SDimitry Andric   BlockVector SortedEntries(Entries.begin(), Entries.end());
72*0b57cec5SDimitry Andric   llvm::sort(SortedEntries,
73*0b57cec5SDimitry Andric              [](const MachineBasicBlock *A, const MachineBasicBlock *B) {
74*0b57cec5SDimitry Andric                auto ANum = A->getNumber();
75*0b57cec5SDimitry Andric                auto BNum = B->getNumber();
76*0b57cec5SDimitry Andric                return ANum < BNum;
77*0b57cec5SDimitry Andric              });
78*0b57cec5SDimitry Andric   return SortedEntries;
79*0b57cec5SDimitry Andric }
80*0b57cec5SDimitry Andric 
81*0b57cec5SDimitry Andric // Calculates reachability in a region. Ignores branches to blocks outside of
82*0b57cec5SDimitry Andric // the region, and ignores branches to the region entry (for the case where
83*0b57cec5SDimitry Andric // the region is the inner part of a loop).
84*0b57cec5SDimitry Andric class ReachabilityGraph {
85*0b57cec5SDimitry Andric public:
ReachabilityGraph(MachineBasicBlock * Entry,const BlockSet & Blocks)86*0b57cec5SDimitry Andric   ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
87*0b57cec5SDimitry Andric       : Entry(Entry), Blocks(Blocks) {
88*0b57cec5SDimitry Andric #ifndef NDEBUG
89*0b57cec5SDimitry Andric     // The region must have a single entry.
90*0b57cec5SDimitry Andric     for (auto *MBB : Blocks) {
91*0b57cec5SDimitry Andric       if (MBB != Entry) {
92*0b57cec5SDimitry Andric         for (auto *Pred : MBB->predecessors()) {
93*0b57cec5SDimitry Andric           assert(inRegion(Pred));
94*0b57cec5SDimitry Andric         }
95*0b57cec5SDimitry Andric       }
96*0b57cec5SDimitry Andric     }
97*0b57cec5SDimitry Andric #endif
98*0b57cec5SDimitry Andric     calculate();
99*0b57cec5SDimitry Andric   }
100*0b57cec5SDimitry Andric 
canReach(MachineBasicBlock * From,MachineBasicBlock * To) const101*0b57cec5SDimitry Andric   bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
102*0b57cec5SDimitry Andric     assert(inRegion(From) && inRegion(To));
103*0b57cec5SDimitry Andric     auto I = Reachable.find(From);
104*0b57cec5SDimitry Andric     if (I == Reachable.end())
105*0b57cec5SDimitry Andric       return false;
106*0b57cec5SDimitry Andric     return I->second.count(To);
107*0b57cec5SDimitry Andric   }
108*0b57cec5SDimitry Andric 
109*0b57cec5SDimitry Andric   // "Loopers" are blocks that are in a loop. We detect these by finding blocks
110*0b57cec5SDimitry Andric   // that can reach themselves.
getLoopers() const111*0b57cec5SDimitry Andric   const BlockSet &getLoopers() const { return Loopers; }
112*0b57cec5SDimitry Andric 
113*0b57cec5SDimitry Andric   // Get all blocks that are loop entries.
getLoopEntries() const114*0b57cec5SDimitry Andric   const BlockSet &getLoopEntries() const { return LoopEntries; }
115*0b57cec5SDimitry Andric 
116*0b57cec5SDimitry Andric   // Get all blocks that enter a particular loop from outside.
getLoopEnterers(MachineBasicBlock * LoopEntry) const117*0b57cec5SDimitry Andric   const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
118*0b57cec5SDimitry Andric     assert(inRegion(LoopEntry));
119*0b57cec5SDimitry Andric     auto I = LoopEnterers.find(LoopEntry);
120*0b57cec5SDimitry Andric     assert(I != LoopEnterers.end());
121*0b57cec5SDimitry Andric     return I->second;
122*0b57cec5SDimitry Andric   }
123*0b57cec5SDimitry Andric 
124*0b57cec5SDimitry Andric private:
125*0b57cec5SDimitry Andric   MachineBasicBlock *Entry;
126*0b57cec5SDimitry Andric   const BlockSet &Blocks;
127*0b57cec5SDimitry Andric 
128*0b57cec5SDimitry Andric   BlockSet Loopers, LoopEntries;
129*0b57cec5SDimitry Andric   DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
130*0b57cec5SDimitry Andric 
inRegion(MachineBasicBlock * MBB) const131*0b57cec5SDimitry Andric   bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
132*0b57cec5SDimitry Andric 
133*0b57cec5SDimitry Andric   // Maps a block to all the other blocks it can reach.
134*0b57cec5SDimitry Andric   DenseMap<MachineBasicBlock *, BlockSet> Reachable;
135*0b57cec5SDimitry Andric 
calculate()136*0b57cec5SDimitry Andric   void calculate() {
137*0b57cec5SDimitry Andric     // Reachability computation work list. Contains pairs of recent additions
138*0b57cec5SDimitry Andric     // (A, B) where we just added a link A => B.
139*0b57cec5SDimitry Andric     using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
140*0b57cec5SDimitry Andric     SmallVector<BlockPair, 4> WorkList;
141*0b57cec5SDimitry Andric 
142*0b57cec5SDimitry Andric     // Add all relevant direct branches.
143*0b57cec5SDimitry Andric     for (auto *MBB : Blocks) {
144*0b57cec5SDimitry Andric       for (auto *Succ : MBB->successors()) {
145*0b57cec5SDimitry Andric         if (Succ != Entry && inRegion(Succ)) {
146*0b57cec5SDimitry Andric           Reachable[MBB].insert(Succ);
147*0b57cec5SDimitry Andric           WorkList.emplace_back(MBB, Succ);
148*0b57cec5SDimitry Andric         }
149*0b57cec5SDimitry Andric       }
150*0b57cec5SDimitry Andric     }
151*0b57cec5SDimitry Andric 
152*0b57cec5SDimitry Andric     while (!WorkList.empty()) {
153*0b57cec5SDimitry Andric       MachineBasicBlock *MBB, *Succ;
154*0b57cec5SDimitry Andric       std::tie(MBB, Succ) = WorkList.pop_back_val();
155*0b57cec5SDimitry Andric       assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
156*0b57cec5SDimitry Andric       if (MBB != Entry) {
157*0b57cec5SDimitry Andric         // We recently added MBB => Succ, and that means we may have enabled
158*0b57cec5SDimitry Andric         // Pred => MBB => Succ.
159*0b57cec5SDimitry Andric         for (auto *Pred : MBB->predecessors()) {
160*0b57cec5SDimitry Andric           if (Reachable[Pred].insert(Succ).second) {
161*0b57cec5SDimitry Andric             WorkList.emplace_back(Pred, Succ);
162*0b57cec5SDimitry Andric           }
163*0b57cec5SDimitry Andric         }
164*0b57cec5SDimitry Andric       }
165*0b57cec5SDimitry Andric     }
166*0b57cec5SDimitry Andric 
167*0b57cec5SDimitry Andric     // Blocks that can return to themselves are in a loop.
168*0b57cec5SDimitry Andric     for (auto *MBB : Blocks) {
169*0b57cec5SDimitry Andric       if (canReach(MBB, MBB)) {
170*0b57cec5SDimitry Andric         Loopers.insert(MBB);
171*0b57cec5SDimitry Andric       }
172*0b57cec5SDimitry Andric     }
173*0b57cec5SDimitry Andric     assert(!Loopers.count(Entry));
174*0b57cec5SDimitry Andric 
175*0b57cec5SDimitry Andric     // Find the loop entries - loopers reachable from blocks not in that loop -
176*0b57cec5SDimitry Andric     // and those outside blocks that reach them, the "loop enterers".
177*0b57cec5SDimitry Andric     for (auto *Looper : Loopers) {
178*0b57cec5SDimitry Andric       for (auto *Pred : Looper->predecessors()) {
179*0b57cec5SDimitry Andric         // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
180*0b57cec5SDimitry Andric         // otherwise, it is a block that enters into the loop.
181*0b57cec5SDimitry Andric         if (!canReach(Looper, Pred)) {
182*0b57cec5SDimitry Andric           LoopEntries.insert(Looper);
183*0b57cec5SDimitry Andric           LoopEnterers[Looper].insert(Pred);
184*0b57cec5SDimitry Andric         }
185*0b57cec5SDimitry Andric       }
186*0b57cec5SDimitry Andric     }
187*0b57cec5SDimitry Andric   }
188*0b57cec5SDimitry Andric };
189*0b57cec5SDimitry Andric 
190*0b57cec5SDimitry Andric // Finds the blocks in a single-entry loop, given the loop entry and the
191*0b57cec5SDimitry Andric // list of blocks that enter the loop.
192*0b57cec5SDimitry Andric class LoopBlocks {
193*0b57cec5SDimitry Andric public:
LoopBlocks(MachineBasicBlock * Entry,const BlockSet & Enterers)194*0b57cec5SDimitry Andric   LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
195*0b57cec5SDimitry Andric       : Entry(Entry), Enterers(Enterers) {
196*0b57cec5SDimitry Andric     calculate();
197*0b57cec5SDimitry Andric   }
198*0b57cec5SDimitry Andric 
getBlocks()199*0b57cec5SDimitry Andric   BlockSet &getBlocks() { return Blocks; }
200*0b57cec5SDimitry Andric 
201*0b57cec5SDimitry Andric private:
202*0b57cec5SDimitry Andric   MachineBasicBlock *Entry;
203*0b57cec5SDimitry Andric   const BlockSet &Enterers;
204*0b57cec5SDimitry Andric 
205*0b57cec5SDimitry Andric   BlockSet Blocks;
206*0b57cec5SDimitry Andric 
calculate()207*0b57cec5SDimitry Andric   void calculate() {
208*0b57cec5SDimitry Andric     // Going backwards from the loop entry, if we ignore the blocks entering
209*0b57cec5SDimitry Andric     // from outside, we will traverse all the blocks in the loop.
210*0b57cec5SDimitry Andric     BlockVector WorkList;
211*0b57cec5SDimitry Andric     BlockSet AddedToWorkList;
212*0b57cec5SDimitry Andric     Blocks.insert(Entry);
213*0b57cec5SDimitry Andric     for (auto *Pred : Entry->predecessors()) {
214*0b57cec5SDimitry Andric       if (!Enterers.count(Pred)) {
215*0b57cec5SDimitry Andric         WorkList.push_back(Pred);
216*0b57cec5SDimitry Andric         AddedToWorkList.insert(Pred);
217*0b57cec5SDimitry Andric       }
218*0b57cec5SDimitry Andric     }
219*0b57cec5SDimitry Andric 
220*0b57cec5SDimitry Andric     while (!WorkList.empty()) {
221*0b57cec5SDimitry Andric       auto *MBB = WorkList.pop_back_val();
222*0b57cec5SDimitry Andric       assert(!Enterers.count(MBB));
223*0b57cec5SDimitry Andric       if (Blocks.insert(MBB).second) {
224*0b57cec5SDimitry Andric         for (auto *Pred : MBB->predecessors()) {
225*0b57cec5SDimitry Andric           if (AddedToWorkList.insert(Pred).second)
226*0b57cec5SDimitry Andric             WorkList.push_back(Pred);
227*0b57cec5SDimitry Andric         }
228*0b57cec5SDimitry Andric       }
229*0b57cec5SDimitry Andric     }
230*0b57cec5SDimitry Andric   }
231*0b57cec5SDimitry Andric };
232*0b57cec5SDimitry Andric 
233*0b57cec5SDimitry Andric class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
getPassName() const234*0b57cec5SDimitry Andric   StringRef getPassName() const override {
235*0b57cec5SDimitry Andric     return "WebAssembly Fix Irreducible Control Flow";
236*0b57cec5SDimitry Andric   }
237*0b57cec5SDimitry Andric 
238*0b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
239*0b57cec5SDimitry Andric 
240*0b57cec5SDimitry Andric   bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
241*0b57cec5SDimitry Andric                      MachineFunction &MF);
242*0b57cec5SDimitry Andric 
243*0b57cec5SDimitry Andric   void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
244*0b57cec5SDimitry Andric                            MachineFunction &MF, const ReachabilityGraph &Graph);
245*0b57cec5SDimitry Andric 
246*0b57cec5SDimitry Andric public:
247*0b57cec5SDimitry Andric   static char ID; // Pass identification, replacement for typeid
WebAssemblyFixIrreducibleControlFlow()248*0b57cec5SDimitry Andric   WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
249*0b57cec5SDimitry Andric };
250*0b57cec5SDimitry Andric 
processRegion(MachineBasicBlock * Entry,BlockSet & Blocks,MachineFunction & MF)251*0b57cec5SDimitry Andric bool WebAssemblyFixIrreducibleControlFlow::processRegion(
252*0b57cec5SDimitry Andric     MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
253*0b57cec5SDimitry Andric   bool Changed = false;
254*0b57cec5SDimitry Andric   // Remove irreducibility before processing child loops, which may take
255*0b57cec5SDimitry Andric   // multiple iterations.
256*0b57cec5SDimitry Andric   while (true) {
257*0b57cec5SDimitry Andric     ReachabilityGraph Graph(Entry, Blocks);
258*0b57cec5SDimitry Andric 
259*0b57cec5SDimitry Andric     bool FoundIrreducibility = false;
260*0b57cec5SDimitry Andric 
261*0b57cec5SDimitry Andric     for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) {
262*0b57cec5SDimitry Andric       // Find mutual entries - all entries which can reach this one, and
263*0b57cec5SDimitry Andric       // are reached by it (that always includes LoopEntry itself). All mutual
264*0b57cec5SDimitry Andric       // entries must be in the same loop, so if we have more than one, then we
265*0b57cec5SDimitry Andric       // have irreducible control flow.
266*0b57cec5SDimitry Andric       //
267*0b57cec5SDimitry Andric       // (Note that we need to sort the entries here, as otherwise the order can
268*0b57cec5SDimitry Andric       // matter: being mutual is a symmetric relationship, and each set of
269*0b57cec5SDimitry Andric       // mutuals will be handled properly no matter which we see first. However,
270*0b57cec5SDimitry Andric       // there can be multiple disjoint sets of mutuals, and which we process
271*0b57cec5SDimitry Andric       // first changes the output.)
272*0b57cec5SDimitry Andric       //
273*0b57cec5SDimitry Andric       // Note that irreducibility may involve inner loops, e.g. imagine A
274*0b57cec5SDimitry Andric       // starts one loop, and it has B inside it which starts an inner loop.
275*0b57cec5SDimitry Andric       // If we add a branch from all the way on the outside to B, then in a
276*0b57cec5SDimitry Andric       // sense B is no longer an "inner" loop, semantically speaking. We will
277*0b57cec5SDimitry Andric       // fix that irreducibility by adding a block that dispatches to either
278*0b57cec5SDimitry Andric       // either A or B, so B will no longer be an inner loop in our output.
279*0b57cec5SDimitry Andric       // (A fancier approach might try to keep it as such.)
280*0b57cec5SDimitry Andric       //
281*0b57cec5SDimitry Andric       // Note that we still need to recurse into inner loops later, to handle
282*0b57cec5SDimitry Andric       // the case where the irreducibility is entirely nested - we would not
283*0b57cec5SDimitry Andric       // be able to identify that at this point, since the enclosing loop is
284*0b57cec5SDimitry Andric       // a group of blocks all of whom can reach each other. (We'll see the
285*0b57cec5SDimitry Andric       // irreducibility after removing branches to the top of that enclosing
286*0b57cec5SDimitry Andric       // loop.)
287*0b57cec5SDimitry Andric       BlockSet MutualLoopEntries;
288*0b57cec5SDimitry Andric       MutualLoopEntries.insert(LoopEntry);
289*0b57cec5SDimitry Andric       for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
290*0b57cec5SDimitry Andric         if (OtherLoopEntry != LoopEntry &&
291*0b57cec5SDimitry Andric             Graph.canReach(LoopEntry, OtherLoopEntry) &&
292*0b57cec5SDimitry Andric             Graph.canReach(OtherLoopEntry, LoopEntry)) {
293*0b57cec5SDimitry Andric           MutualLoopEntries.insert(OtherLoopEntry);
294*0b57cec5SDimitry Andric         }
295*0b57cec5SDimitry Andric       }
296*0b57cec5SDimitry Andric 
297*0b57cec5SDimitry Andric       if (MutualLoopEntries.size() > 1) {
298*0b57cec5SDimitry Andric         makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
299*0b57cec5SDimitry Andric         FoundIrreducibility = true;
300*0b57cec5SDimitry Andric         Changed = true;
301*0b57cec5SDimitry Andric         break;
302*0b57cec5SDimitry Andric       }
303*0b57cec5SDimitry Andric     }
304*0b57cec5SDimitry Andric     // Only go on to actually process the inner loops when we are done
305*0b57cec5SDimitry Andric     // removing irreducible control flow and changing the graph. Modifying
306*0b57cec5SDimitry Andric     // the graph as we go is possible, and that might let us avoid looking at
307*0b57cec5SDimitry Andric     // the already-fixed loops again if we are careful, but all that is
308*0b57cec5SDimitry Andric     // complex and bug-prone. Since irreducible loops are rare, just starting
309*0b57cec5SDimitry Andric     // another iteration is best.
310*0b57cec5SDimitry Andric     if (FoundIrreducibility) {
311*0b57cec5SDimitry Andric       continue;
312*0b57cec5SDimitry Andric     }
313*0b57cec5SDimitry Andric 
314*0b57cec5SDimitry Andric     for (auto *LoopEntry : Graph.getLoopEntries()) {
315*0b57cec5SDimitry Andric       LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
316*0b57cec5SDimitry Andric       // Each of these calls to processRegion may change the graph, but are
317*0b57cec5SDimitry Andric       // guaranteed not to interfere with each other. The only changes we make
318*0b57cec5SDimitry Andric       // to the graph are to add blocks on the way to a loop entry. As the
319*0b57cec5SDimitry Andric       // loops are disjoint, that means we may only alter branches that exit
320*0b57cec5SDimitry Andric       // another loop, which are ignored when recursing into that other loop
321*0b57cec5SDimitry Andric       // anyhow.
322*0b57cec5SDimitry Andric       if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
323*0b57cec5SDimitry Andric         Changed = true;
324*0b57cec5SDimitry Andric       }
325*0b57cec5SDimitry Andric     }
326*0b57cec5SDimitry Andric 
327*0b57cec5SDimitry Andric     return Changed;
328*0b57cec5SDimitry Andric   }
329*0b57cec5SDimitry Andric }
330*0b57cec5SDimitry Andric 
331*0b57cec5SDimitry Andric // Given a set of entries to a single loop, create a single entry for that
332*0b57cec5SDimitry Andric // loop by creating a dispatch block for them, routing control flow using
333*0b57cec5SDimitry Andric // a helper variable. Also updates Blocks with any new blocks created, so
334*0b57cec5SDimitry Andric // that we properly track all the blocks in the region. But this does not update
335*0b57cec5SDimitry Andric // ReachabilityGraph; this will be updated in the caller of this function as
336*0b57cec5SDimitry Andric // needed.
makeSingleEntryLoop(BlockSet & Entries,BlockSet & Blocks,MachineFunction & MF,const ReachabilityGraph & Graph)337*0b57cec5SDimitry Andric void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
338*0b57cec5SDimitry Andric     BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
339*0b57cec5SDimitry Andric     const ReachabilityGraph &Graph) {
340*0b57cec5SDimitry Andric   assert(Entries.size() >= 2);
341*0b57cec5SDimitry Andric 
342*0b57cec5SDimitry Andric   // Sort the entries to ensure a deterministic build.
343*0b57cec5SDimitry Andric   BlockVector SortedEntries = getSortedEntries(Entries);
344*0b57cec5SDimitry Andric 
345*0b57cec5SDimitry Andric #ifndef NDEBUG
346*0b57cec5SDimitry Andric   for (auto *Block : SortedEntries)
347*0b57cec5SDimitry Andric     assert(Block->getNumber() != -1);
348*0b57cec5SDimitry Andric   if (SortedEntries.size() > 1) {
349*0b57cec5SDimitry Andric     for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
350*0b57cec5SDimitry Andric          ++I) {
351*0b57cec5SDimitry Andric       auto ANum = (*I)->getNumber();
352*0b57cec5SDimitry Andric       auto BNum = (*(std::next(I)))->getNumber();
353*0b57cec5SDimitry Andric       assert(ANum != BNum);
354*0b57cec5SDimitry Andric     }
355*0b57cec5SDimitry Andric   }
356*0b57cec5SDimitry Andric #endif
357*0b57cec5SDimitry Andric 
358*0b57cec5SDimitry Andric   // Create a dispatch block which will contain a jump table to the entries.
359*0b57cec5SDimitry Andric   MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
360*0b57cec5SDimitry Andric   MF.insert(MF.end(), Dispatch);
361*0b57cec5SDimitry Andric   Blocks.insert(Dispatch);
362*0b57cec5SDimitry Andric 
363*0b57cec5SDimitry Andric   // Add the jump table.
364*0b57cec5SDimitry Andric   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
365*0b57cec5SDimitry Andric   MachineInstrBuilder MIB =
366*0b57cec5SDimitry Andric       BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
367*0b57cec5SDimitry Andric 
368*0b57cec5SDimitry Andric   // Add the register which will be used to tell the jump table which block to
369*0b57cec5SDimitry Andric   // jump to.
370*0b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
371*0b57cec5SDimitry Andric   Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
372*0b57cec5SDimitry Andric   MIB.addReg(Reg);
373*0b57cec5SDimitry Andric 
374*0b57cec5SDimitry Andric   // Compute the indices in the superheader, one for each bad block, and
375*0b57cec5SDimitry Andric   // add them as successors.
376*0b57cec5SDimitry Andric   DenseMap<MachineBasicBlock *, unsigned> Indices;
377*0b57cec5SDimitry Andric   for (auto *Entry : SortedEntries) {
378*0b57cec5SDimitry Andric     auto Pair = Indices.insert(std::make_pair(Entry, 0));
379*0b57cec5SDimitry Andric     assert(Pair.second);
380*0b57cec5SDimitry Andric 
381*0b57cec5SDimitry Andric     unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
382*0b57cec5SDimitry Andric     Pair.first->second = Index;
383*0b57cec5SDimitry Andric 
384*0b57cec5SDimitry Andric     MIB.addMBB(Entry);
385*0b57cec5SDimitry Andric     Dispatch->addSuccessor(Entry);
386*0b57cec5SDimitry Andric   }
387*0b57cec5SDimitry Andric 
388*0b57cec5SDimitry Andric   // Rewrite the problematic successors for every block that wants to reach
389*0b57cec5SDimitry Andric   // the bad blocks. For simplicity, we just introduce a new block for every
390*0b57cec5SDimitry Andric   // edge we need to rewrite. (Fancier things are possible.)
391*0b57cec5SDimitry Andric 
392*0b57cec5SDimitry Andric   BlockVector AllPreds;
393*0b57cec5SDimitry Andric   for (auto *Entry : SortedEntries) {
394*0b57cec5SDimitry Andric     for (auto *Pred : Entry->predecessors()) {
395*0b57cec5SDimitry Andric       if (Pred != Dispatch) {
396*0b57cec5SDimitry Andric         AllPreds.push_back(Pred);
397*0b57cec5SDimitry Andric       }
398*0b57cec5SDimitry Andric     }
399*0b57cec5SDimitry Andric   }
400*0b57cec5SDimitry Andric 
401*0b57cec5SDimitry Andric   // This set stores predecessors within this loop.
402*0b57cec5SDimitry Andric   DenseSet<MachineBasicBlock *> InLoop;
403*0b57cec5SDimitry Andric   for (auto *Pred : AllPreds) {
404*0b57cec5SDimitry Andric     for (auto *Entry : Pred->successors()) {
405*0b57cec5SDimitry Andric       if (!Entries.count(Entry))
406*0b57cec5SDimitry Andric         continue;
407*0b57cec5SDimitry Andric       if (Graph.canReach(Entry, Pred)) {
408*0b57cec5SDimitry Andric         InLoop.insert(Pred);
409*0b57cec5SDimitry Andric         break;
410*0b57cec5SDimitry Andric       }
411*0b57cec5SDimitry Andric     }
412*0b57cec5SDimitry Andric   }
413*0b57cec5SDimitry Andric 
414*0b57cec5SDimitry Andric   // Record if each entry has a layout predecessor. This map stores
415*0b57cec5SDimitry Andric   // <<loop entry, Predecessor is within the loop?>, layout predecessor>
416*0b57cec5SDimitry Andric   DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
417*0b57cec5SDimitry Andric       EntryToLayoutPred;
418*0b57cec5SDimitry Andric   for (auto *Pred : AllPreds) {
419*0b57cec5SDimitry Andric     bool PredInLoop = InLoop.count(Pred);
420*0b57cec5SDimitry Andric     for (auto *Entry : Pred->successors())
421*0b57cec5SDimitry Andric       if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
422*0b57cec5SDimitry Andric         EntryToLayoutPred[{Entry, PredInLoop}] = Pred;
423*0b57cec5SDimitry Andric   }
424*0b57cec5SDimitry Andric 
425*0b57cec5SDimitry Andric   // We need to create at most two routing blocks per entry: one for
426*0b57cec5SDimitry Andric   // predecessors outside the loop and one for predecessors inside the loop.
427*0b57cec5SDimitry Andric   // This map stores
428*0b57cec5SDimitry Andric   // <<loop entry, Predecessor is within the loop?>, routing block>
429*0b57cec5SDimitry Andric   DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
430*0b57cec5SDimitry Andric       Map;
431*0b57cec5SDimitry Andric   for (auto *Pred : AllPreds) {
432*0b57cec5SDimitry Andric     bool PredInLoop = InLoop.count(Pred);
433*0b57cec5SDimitry Andric     for (auto *Entry : Pred->successors()) {
434*0b57cec5SDimitry Andric       if (!Entries.count(Entry) || Map.count({Entry, PredInLoop}))
435*0b57cec5SDimitry Andric         continue;
436*0b57cec5SDimitry Andric       // If there exists a layout predecessor of this entry and this predecessor
437*0b57cec5SDimitry Andric       // is not that, we rather create a routing block after that layout
438*0b57cec5SDimitry Andric       // predecessor to save a branch.
439*0b57cec5SDimitry Andric       if (auto *OtherPred = EntryToLayoutPred.lookup({Entry, PredInLoop}))
440*0b57cec5SDimitry Andric         if (OtherPred != Pred)
441*0b57cec5SDimitry Andric           continue;
442*0b57cec5SDimitry Andric 
443*0b57cec5SDimitry Andric       // This is a successor we need to rewrite.
444*0b57cec5SDimitry Andric       MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
445*0b57cec5SDimitry Andric       MF.insert(Pred->isLayoutSuccessor(Entry)
446*0b57cec5SDimitry Andric                     ? MachineFunction::iterator(Entry)
447*0b57cec5SDimitry Andric                     : MF.end(),
448*0b57cec5SDimitry Andric                 Routing);
449*0b57cec5SDimitry Andric       Blocks.insert(Routing);
450*0b57cec5SDimitry Andric 
451*0b57cec5SDimitry Andric       // Set the jump table's register of the index of the block we wish to
452*0b57cec5SDimitry Andric       // jump to, and jump to the jump table.
453*0b57cec5SDimitry Andric       BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
454*0b57cec5SDimitry Andric           .addImm(Indices[Entry]);
455*0b57cec5SDimitry Andric       BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
456*0b57cec5SDimitry Andric       Routing->addSuccessor(Dispatch);
457*0b57cec5SDimitry Andric       Map[{Entry, PredInLoop}] = Routing;
458*0b57cec5SDimitry Andric     }
459*0b57cec5SDimitry Andric   }
460*0b57cec5SDimitry Andric 
461*0b57cec5SDimitry Andric   for (auto *Pred : AllPreds) {
462*0b57cec5SDimitry Andric     bool PredInLoop = InLoop.count(Pred);
463*0b57cec5SDimitry Andric     // Remap the terminator operands and the successor list.
464*0b57cec5SDimitry Andric     for (MachineInstr &Term : Pred->terminators())
465*0b57cec5SDimitry Andric       for (auto &Op : Term.explicit_uses())
466*0b57cec5SDimitry Andric         if (Op.isMBB() && Indices.count(Op.getMBB()))
467*0b57cec5SDimitry Andric           Op.setMBB(Map[{Op.getMBB(), PredInLoop}]);
468*0b57cec5SDimitry Andric 
469*0b57cec5SDimitry Andric     for (auto *Succ : Pred->successors()) {
470*0b57cec5SDimitry Andric       if (!Entries.count(Succ))
471*0b57cec5SDimitry Andric         continue;
472*0b57cec5SDimitry Andric       auto *Routing = Map[{Succ, PredInLoop}];
473*0b57cec5SDimitry Andric       Pred->replaceSuccessor(Succ, Routing);
474*0b57cec5SDimitry Andric     }
475*0b57cec5SDimitry Andric   }
476*0b57cec5SDimitry Andric 
477*0b57cec5SDimitry Andric   // Create a fake default label, because br_table requires one.
478*0b57cec5SDimitry Andric   MIB.addMBB(MIB.getInstr()
479*0b57cec5SDimitry Andric                  ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
480*0b57cec5SDimitry Andric                  .getMBB());
481*0b57cec5SDimitry Andric }
482*0b57cec5SDimitry Andric 
483*0b57cec5SDimitry Andric } // end anonymous namespace
484*0b57cec5SDimitry Andric 
485*0b57cec5SDimitry Andric char WebAssemblyFixIrreducibleControlFlow::ID = 0;
486*0b57cec5SDimitry Andric INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
487*0b57cec5SDimitry Andric                 "Removes irreducible control flow", false, false)
488*0b57cec5SDimitry Andric 
createWebAssemblyFixIrreducibleControlFlow()489*0b57cec5SDimitry Andric FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
490*0b57cec5SDimitry Andric   return new WebAssemblyFixIrreducibleControlFlow();
491*0b57cec5SDimitry Andric }
492*0b57cec5SDimitry Andric 
493*0b57cec5SDimitry Andric // Test whether the given register has an ARGUMENT def.
hasArgumentDef(unsigned Reg,const MachineRegisterInfo & MRI)494*0b57cec5SDimitry Andric static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
495*0b57cec5SDimitry Andric   for (const auto &Def : MRI.def_instructions(Reg))
496*0b57cec5SDimitry Andric     if (WebAssembly::isArgument(Def.getOpcode()))
497*0b57cec5SDimitry Andric       return true;
498*0b57cec5SDimitry Andric   return false;
499*0b57cec5SDimitry Andric }
500*0b57cec5SDimitry Andric 
501*0b57cec5SDimitry Andric // Add a register definition with IMPLICIT_DEFs for every register to cover for
502 // register uses that don't have defs in every possible path.
503 // TODO: This is fairly heavy-handed; find a better approach.
addImplicitDefs(MachineFunction & MF)504 static void addImplicitDefs(MachineFunction &MF) {
505   const MachineRegisterInfo &MRI = MF.getRegInfo();
506   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
507   MachineBasicBlock &Entry = *MF.begin();
508   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
509     Register Reg = Register::index2VirtReg(I);
510 
511     // Skip unused registers.
512     if (MRI.use_nodbg_empty(Reg))
513       continue;
514 
515     // Skip registers that have an ARGUMENT definition.
516     if (hasArgumentDef(Reg, MRI))
517       continue;
518 
519     BuildMI(Entry, Entry.begin(), DebugLoc(),
520             TII.get(WebAssembly::IMPLICIT_DEF), Reg);
521   }
522 
523   // Move ARGUMENT_* instructions to the top of the entry block, so that their
524   // liveness reflects the fact that these really are live-in values.
525   for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) {
526     if (WebAssembly::isArgument(MI.getOpcode())) {
527       MI.removeFromParent();
528       Entry.insert(Entry.begin(), &MI);
529     }
530   }
531 }
532 
runOnMachineFunction(MachineFunction & MF)533 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
534     MachineFunction &MF) {
535   LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
536                        "********** Function: "
537                     << MF.getName() << '\n');
538 
539   // Start the recursive process on the entire function body.
540   BlockSet AllBlocks;
541   for (auto &MBB : MF) {
542     AllBlocks.insert(&MBB);
543   }
544 
545   if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
546     // We rewrote part of the function; recompute relevant things.
547     MF.RenumberBlocks();
548     // Now we've inserted dispatch blocks, some register uses can have incoming
549     // paths without a def. For example, before this pass register %a was
550     // defined in BB1 and used in BB2, and there was only one path from BB1 and
551     // BB2. But if this pass inserts a dispatch block having multiple
552     // predecessors between the two BBs, now there are paths to BB2 without
553     // visiting BB1, and %a's use in BB2 is not dominated by its def. Adding
554     // IMPLICIT_DEFs to all regs is one simple way to fix it.
555     addImplicitDefs(MF);
556     return true;
557   }
558 
559   return false;
560 }
561