1 //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a register stacking pass.
12 ///
13 /// This pass reorders instructions to put register uses and defs in an order
14 /// such that they form single-use expression trees. Registers fitting this form
15 /// are then marked as "stackified", meaning references to them are replaced by
16 /// "push" and "pop" from the stack.
17 ///
18 /// This is primarily a code size optimization, since temporary values on the
19 /// expression don't need to be named.
20 ///
21 //===----------------------------------------------------------------------===//
22 
23 #include "WebAssembly.h"
24 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
25 #include "WebAssemblyMachineFunctionInfo.h"
26 #include "WebAssemblySubtarget.h"
27 #include "llvm/Analysis/AliasAnalysis.h"
28 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
29 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
30 #include "llvm/CodeGen/MachineDominators.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/Passes.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "wasm-reg-stackify"
39 
40 namespace {
41 class WebAssemblyRegStackify final : public MachineFunctionPass {
42   const char *getPassName() const override {
43     return "WebAssembly Register Stackify";
44   }
45 
46   void getAnalysisUsage(AnalysisUsage &AU) const override {
47     AU.setPreservesCFG();
48     AU.addRequired<AAResultsWrapperPass>();
49     AU.addRequired<MachineDominatorTree>();
50     AU.addRequired<LiveIntervals>();
51     AU.addPreserved<MachineBlockFrequencyInfo>();
52     AU.addPreserved<SlotIndexes>();
53     AU.addPreserved<LiveIntervals>();
54     AU.addPreservedID(LiveVariablesID);
55     AU.addPreserved<MachineDominatorTree>();
56     MachineFunctionPass::getAnalysisUsage(AU);
57   }
58 
59   bool runOnMachineFunction(MachineFunction &MF) override;
60 
61 public:
62   static char ID; // Pass identification, replacement for typeid
63   WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
64 };
65 } // end anonymous namespace
66 
67 char WebAssemblyRegStackify::ID = 0;
68 FunctionPass *llvm::createWebAssemblyRegStackify() {
69   return new WebAssemblyRegStackify();
70 }
71 
72 // Decorate the given instruction with implicit operands that enforce the
73 // expression stack ordering constraints for an instruction which is on
74 // the expression stack.
75 static void ImposeStackOrdering(MachineInstr *MI) {
76   // Write the opaque EXPR_STACK register.
77   if (!MI->definesRegister(WebAssembly::EXPR_STACK))
78     MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
79                                              /*isDef=*/true,
80                                              /*isImp=*/true));
81 
82   // Also read the opaque EXPR_STACK register.
83   if (!MI->readsRegister(WebAssembly::EXPR_STACK))
84     MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
85                                              /*isDef=*/false,
86                                              /*isImp=*/true));
87 }
88 
89 // Test whether it's safe to move Def to just before Insert.
90 // TODO: Compute memory dependencies in a way that doesn't require always
91 // walking the block.
92 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
93 // more precise.
94 static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
95                          AliasAnalysis &AA, const LiveIntervals &LIS,
96                          const MachineRegisterInfo &MRI) {
97   assert(Def->getParent() == Insert->getParent());
98   bool SawStore = false, SawSideEffects = false;
99   MachineBasicBlock::const_iterator D(Def), I(Insert);
100 
101   // Check for register dependencies.
102   for (const MachineOperand &MO : Def->operands()) {
103     if (!MO.isReg() || MO.isUndef())
104       continue;
105     unsigned Reg = MO.getReg();
106 
107     // If the register is dead here and at Insert, ignore it.
108     if (MO.isDead() && Insert->definesRegister(Reg) &&
109         !Insert->readsRegister(Reg))
110       continue;
111 
112     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
113       // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
114       // from moving down, and we've already checked for that.
115       if (Reg == WebAssembly::ARGUMENTS)
116         continue;
117       // If the physical register is never modified, ignore it.
118       if (!MRI.isPhysRegModified(Reg))
119         continue;
120       // Otherwise, it's a physical register with unknown liveness.
121       return false;
122     }
123 
124     // Ask LiveIntervals whether moving this virtual register use or def to
125     // Insert will change which value numbers are seen.
126     const LiveInterval &LI = LIS.getInterval(Reg);
127     VNInfo *DefVNI =
128         MO.isDef() ? LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot())
129                    : LI.getVNInfoBefore(LIS.getInstructionIndex(*Def));
130     assert(DefVNI && "Instruction input missing value number");
131     VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*Insert));
132     if (InsVNI && DefVNI != InsVNI)
133       return false;
134   }
135 
136   SawStore = Def->isCall() || Def->mayStore();
137   // Check for memory dependencies and side effects.
138   for (--I; I != D; --I)
139     SawSideEffects |= !I->isSafeToMove(&AA, SawStore);
140   return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) &&
141          !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore));
142 }
143 
144 /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
145 static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
146                                      const MachineBasicBlock &MBB,
147                                      const MachineRegisterInfo &MRI,
148                                      const MachineDominatorTree &MDT,
149                                      LiveIntervals &LIS) {
150   const LiveInterval &LI = LIS.getInterval(Reg);
151 
152   const MachineInstr *OneUseInst = OneUse.getParent();
153   VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
154 
155   for (const MachineOperand &Use : MRI.use_operands(Reg)) {
156     if (&Use == &OneUse)
157       continue;
158 
159     const MachineInstr *UseInst = Use.getParent();
160     VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
161 
162     if (UseVNI != OneUseVNI)
163       continue;
164 
165     const MachineInstr *OneUseInst = OneUse.getParent();
166     if (UseInst->getOpcode() == TargetOpcode::PHI) {
167       // Test that the PHI use, which happens on the CFG edge rather than
168       // within the PHI's own block, is dominated by the one selected use.
169       const MachineBasicBlock *Pred =
170           UseInst->getOperand(&Use - &UseInst->getOperand(0) + 1).getMBB();
171       if (!MDT.dominates(&MBB, Pred))
172         return false;
173     } else if (UseInst == OneUseInst) {
174       // Another use in the same instruction. We need to ensure that the one
175       // selected use happens "before" it.
176       if (&OneUse > &Use)
177         return false;
178     } else {
179       // Test that the use is dominated by the one selected use.
180       if (!MDT.dominates(OneUseInst, UseInst))
181         return false;
182     }
183   }
184   return true;
185 }
186 
187 /// Get the appropriate tee_local opcode for the given register class.
188 static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) {
189   if (RC == &WebAssembly::I32RegClass)
190     return WebAssembly::TEE_LOCAL_I32;
191   if (RC == &WebAssembly::I64RegClass)
192     return WebAssembly::TEE_LOCAL_I64;
193   if (RC == &WebAssembly::F32RegClass)
194     return WebAssembly::TEE_LOCAL_F32;
195   if (RC == &WebAssembly::F64RegClass)
196     return WebAssembly::TEE_LOCAL_F64;
197   llvm_unreachable("Unexpected register class");
198 }
199 
200 /// A single-use def in the same block with no intervening memory or register
201 /// dependencies; move the def down and nest it with the current instruction.
202 static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
203                                       MachineInstr *Def,
204                                       MachineBasicBlock &MBB,
205                                       MachineInstr *Insert, LiveIntervals &LIS,
206                                       WebAssemblyFunctionInfo &MFI,
207                                       MachineRegisterInfo &MRI) {
208   MBB.splice(Insert, &MBB, Def);
209   LIS.handleMove(*Def);
210 
211   if (MRI.hasOneDef(Reg)) {
212     MFI.stackifyVReg(Reg);
213   } else {
214     unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
215     Def->getOperand(0).setReg(NewReg);
216     Op.setReg(NewReg);
217 
218     // Tell LiveIntervals about the new register.
219     LIS.createAndComputeVirtRegInterval(NewReg);
220 
221     // Tell LiveIntervals about the changes to the old register.
222     LiveInterval &LI = LIS.getInterval(Reg);
223     LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*Def).getRegSlot());
224     LIS.shrinkToUses(&LI);
225 
226     MFI.stackifyVReg(NewReg);
227   }
228 
229   ImposeStackOrdering(Def);
230   return Def;
231 }
232 
233 /// A trivially cloneable instruction; clone it and nest the new copy with the
234 /// current instruction.
235 static MachineInstr *
236 RematerializeCheapDef(unsigned Reg, MachineOperand &Op, MachineInstr *Def,
237                       MachineBasicBlock &MBB, MachineInstr *Insert,
238                       LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
239                       MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII,
240                       const WebAssemblyRegisterInfo *TRI) {
241   unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
242   TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
243   Op.setReg(NewReg);
244   MachineInstr *Clone = &*std::prev(MachineBasicBlock::instr_iterator(Insert));
245   LIS.InsertMachineInstrInMaps(*Clone);
246   LIS.createAndComputeVirtRegInterval(NewReg);
247   MFI.stackifyVReg(NewReg);
248   ImposeStackOrdering(Clone);
249 
250   // Shrink the interval.
251   bool IsDead = MRI.use_empty(Reg);
252   if (!IsDead) {
253     LiveInterval &LI = LIS.getInterval(Reg);
254     LIS.shrinkToUses(&LI);
255     IsDead = !LI.liveAt(LIS.getInstructionIndex(*Def).getDeadSlot());
256   }
257 
258   // If that was the last use of the original, delete the original.
259   if (IsDead) {
260     SlotIndex Idx = LIS.getInstructionIndex(*Def).getRegSlot();
261     LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
262     LIS.removeInterval(Reg);
263     LIS.RemoveMachineInstrFromMaps(*Def);
264     Def->eraseFromParent();
265   }
266 
267   return Clone;
268 }
269 
270 /// A multiple-use def in the same block with no intervening memory or register
271 /// dependencies; move the def down, nest it with the current instruction, and
272 /// insert a tee_local to satisfy the rest of the uses. As an illustration,
273 /// rewrite this:
274 ///
275 ///    Reg = INST ...        // Def
276 ///    INST ..., Reg, ...    // Insert
277 ///    INST ..., Reg, ...
278 ///    INST ..., Reg, ...
279 ///
280 /// to this:
281 ///
282 ///    DefReg = INST ...     // Def (to become the new Insert)
283 ///    TeeReg, NewReg = TEE_LOCAL_... DefReg
284 ///    INST ..., TeeReg, ... // Insert
285 ///    INST ..., NewReg, ...
286 ///    INST ..., NewReg, ...
287 ///
288 /// with DefReg and TeeReg stackified. This eliminates a get_local from the
289 /// resulting code.
290 static MachineInstr *MoveAndTeeForMultiUse(
291     unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
292     MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
293     MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
294   MBB.splice(Insert, &MBB, Def);
295   LIS.handleMove(*Def);
296   const auto *RegClass = MRI.getRegClass(Reg);
297   unsigned NewReg = MRI.createVirtualRegister(RegClass);
298   unsigned TeeReg = MRI.createVirtualRegister(RegClass);
299   unsigned DefReg = MRI.createVirtualRegister(RegClass);
300   MRI.replaceRegWith(Reg, NewReg);
301   MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
302                               TII->get(GetTeeLocalOpcode(RegClass)), TeeReg)
303                           .addReg(NewReg, RegState::Define)
304                           .addReg(DefReg);
305   Op.setReg(TeeReg);
306   Def->getOperand(0).setReg(DefReg);
307   LIS.InsertMachineInstrInMaps(*Tee);
308   LIS.removeInterval(Reg);
309   LIS.createAndComputeVirtRegInterval(NewReg);
310   LIS.createAndComputeVirtRegInterval(TeeReg);
311   LIS.createAndComputeVirtRegInterval(DefReg);
312   MFI.stackifyVReg(DefReg);
313   MFI.stackifyVReg(TeeReg);
314   ImposeStackOrdering(Def);
315   ImposeStackOrdering(Tee);
316   return Def;
317 }
318 
319 namespace {
320 /// A stack for walking the tree of instructions being built, visiting the
321 /// MachineOperands in DFS order.
322 class TreeWalkerState {
323   typedef MachineInstr::mop_iterator mop_iterator;
324   typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator;
325   typedef iterator_range<mop_reverse_iterator> RangeTy;
326   SmallVector<RangeTy, 4> Worklist;
327 
328 public:
329   explicit TreeWalkerState(MachineInstr *Insert) {
330     const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
331     if (Range.begin() != Range.end())
332       Worklist.push_back(reverse(Range));
333   }
334 
335   bool Done() const { return Worklist.empty(); }
336 
337   MachineOperand &Pop() {
338     RangeTy &Range = Worklist.back();
339     MachineOperand &Op = *Range.begin();
340     Range = drop_begin(Range, 1);
341     if (Range.begin() == Range.end())
342       Worklist.pop_back();
343     assert((Worklist.empty() ||
344             Worklist.back().begin() != Worklist.back().end()) &&
345            "Empty ranges shouldn't remain in the worklist");
346     return Op;
347   }
348 
349   /// Push Instr's operands onto the stack to be visited.
350   void PushOperands(MachineInstr *Instr) {
351     const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
352     if (Range.begin() != Range.end())
353       Worklist.push_back(reverse(Range));
354   }
355 
356   /// Some of Instr's operands are on the top of the stack; remove them and
357   /// re-insert them starting from the beginning (because we've commuted them).
358   void ResetTopOperands(MachineInstr *Instr) {
359     assert(HasRemainingOperands(Instr) &&
360            "Reseting operands should only be done when the instruction has "
361            "an operand still on the stack");
362     Worklist.back() = reverse(Instr->explicit_uses());
363   }
364 
365   /// Test whether Instr has operands remaining to be visited at the top of
366   /// the stack.
367   bool HasRemainingOperands(const MachineInstr *Instr) const {
368     if (Worklist.empty())
369       return false;
370     const RangeTy &Range = Worklist.back();
371     return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
372   }
373 
374   /// Test whether the given register is present on the stack, indicating an
375   /// operand in the tree that we haven't visited yet. Moving a definition of
376   /// Reg to a point in the tree after that would change its value.
377   bool IsOnStack(unsigned Reg) const {
378     for (const RangeTy &Range : Worklist)
379       for (const MachineOperand &MO : Range)
380         if (MO.isReg() && MO.getReg() == Reg)
381           return true;
382     return false;
383   }
384 };
385 
386 /// State to keep track of whether commuting is in flight or whether it's been
387 /// tried for the current instruction and didn't work.
388 class CommutingState {
389   /// There are effectively three states: the initial state where we haven't
390   /// started commuting anything and we don't know anything yet, the tenative
391   /// state where we've commuted the operands of the current instruction and are
392   /// revisting it, and the declined state where we've reverted the operands
393   /// back to their original order and will no longer commute it further.
394   bool TentativelyCommuting;
395   bool Declined;
396 
397   /// During the tentative state, these hold the operand indices of the commuted
398   /// operands.
399   unsigned Operand0, Operand1;
400 
401 public:
402   CommutingState() : TentativelyCommuting(false), Declined(false) {}
403 
404   /// Stackification for an operand was not successful due to ordering
405   /// constraints. If possible, and if we haven't already tried it and declined
406   /// it, commute Insert's operands and prepare to revisit it.
407   void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
408                     const WebAssemblyInstrInfo *TII) {
409     if (TentativelyCommuting) {
410       assert(!Declined &&
411              "Don't decline commuting until you've finished trying it");
412       // Commuting didn't help. Revert it.
413       TII->commuteInstruction(Insert, /*NewMI=*/false, Operand0, Operand1);
414       TentativelyCommuting = false;
415       Declined = true;
416     } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) {
417       Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
418       Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
419       if (TII->findCommutedOpIndices(Insert, Operand0, Operand1)) {
420         // Tentatively commute the operands and try again.
421         TII->commuteInstruction(Insert, /*NewMI=*/false, Operand0, Operand1);
422         TreeWalker.ResetTopOperands(Insert);
423         TentativelyCommuting = true;
424         Declined = false;
425       }
426     }
427   }
428 
429   /// Stackification for some operand was successful. Reset to the default
430   /// state.
431   void Reset() {
432     TentativelyCommuting = false;
433     Declined = false;
434   }
435 };
436 } // end anonymous namespace
437 
438 bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
439   DEBUG(dbgs() << "********** Register Stackifying **********\n"
440                   "********** Function: "
441                << MF.getName() << '\n');
442 
443   bool Changed = false;
444   MachineRegisterInfo &MRI = MF.getRegInfo();
445   WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
446   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
447   const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
448   AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
449   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
450   LiveIntervals &LIS = getAnalysis<LiveIntervals>();
451 
452   // Walk the instructions from the bottom up. Currently we don't look past
453   // block boundaries, and the blocks aren't ordered so the block visitation
454   // order isn't significant, but we may want to change this in the future.
455   for (MachineBasicBlock &MBB : MF) {
456     // Don't use a range-based for loop, because we modify the list as we're
457     // iterating over it and the end iterator may change.
458     for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
459       MachineInstr *Insert = &*MII;
460       // Don't nest anything inside a phi.
461       if (Insert->getOpcode() == TargetOpcode::PHI)
462         break;
463 
464       // Don't nest anything inside an inline asm, because we don't have
465       // constraints for $push inputs.
466       if (Insert->getOpcode() == TargetOpcode::INLINEASM)
467         continue;
468 
469       // Ignore debugging intrinsics.
470       if (Insert->getOpcode() == TargetOpcode::DBG_VALUE)
471         continue;
472 
473       // Iterate through the inputs in reverse order, since we'll be pulling
474       // operands off the stack in LIFO order.
475       CommutingState Commuting;
476       TreeWalkerState TreeWalker(Insert);
477       while (!TreeWalker.Done()) {
478         MachineOperand &Op = TreeWalker.Pop();
479 
480         // We're only interested in explicit virtual register operands.
481         if (!Op.isReg())
482           continue;
483 
484         unsigned Reg = Op.getReg();
485         assert(Op.isUse() && "explicit_uses() should only iterate over uses");
486         assert(!Op.isImplicit() &&
487                "explicit_uses() should only iterate over explicit operands");
488         if (TargetRegisterInfo::isPhysicalRegister(Reg))
489           continue;
490 
491         // Identify the definition for this register at this point. Most
492         // registers are in SSA form here so we try a quick MRI query first.
493         MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
494         if (!Def) {
495           // MRI doesn't know what the Def is. Try asking LIS.
496           const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
497               LIS.getInstructionIndex(*Insert));
498           if (!ValNo)
499             continue;
500           Def = LIS.getInstructionFromIndex(ValNo->def);
501           if (!Def)
502             continue;
503         }
504 
505         // Don't nest an INLINE_ASM def into anything, because we don't have
506         // constraints for $pop outputs.
507         if (Def->getOpcode() == TargetOpcode::INLINEASM)
508           continue;
509 
510         // Don't nest PHIs inside of anything.
511         if (Def->getOpcode() == TargetOpcode::PHI)
512           continue;
513 
514         // Argument instructions represent live-in registers and not real
515         // instructions.
516         if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
517             Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
518             Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
519             Def->getOpcode() == WebAssembly::ARGUMENT_F64)
520           continue;
521 
522         // Decide which strategy to take. Prefer to move a single-use value
523         // over cloning it, and prefer cloning over introducing a tee_local.
524         // For moving, we require the def to be in the same block as the use;
525         // this makes things simpler (LiveIntervals' handleMove function only
526         // supports intra-block moves) and it's MachineSink's job to catch all
527         // the sinking opportunities anyway.
528         bool SameBlock = Def->getParent() == &MBB;
529         bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) &&
530                        !TreeWalker.IsOnStack(Reg);
531         if (CanMove && MRI.hasOneUse(Reg)) {
532           Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
533         } else if (Def->isAsCheapAsAMove() &&
534                    TII->isTriviallyReMaterializable(Def, &AA)) {
535           Insert = RematerializeCheapDef(Reg, Op, Def, MBB, Insert, LIS, MFI,
536                                          MRI, TII, TRI);
537         } else if (CanMove &&
538                    OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS)) {
539           Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
540                                          MRI, TII);
541         } else {
542           // We failed to stackify the operand. If the problem was ordering
543           // constraints, Commuting may be able to help.
544           if (!CanMove && SameBlock)
545             Commuting.MaybeCommute(Insert, TreeWalker, TII);
546           // Proceed to the next operand.
547           continue;
548         }
549 
550         // We stackified an operand. Add the defining instruction's operands to
551         // the worklist stack now to continue to build an ever deeper tree.
552         Commuting.Reset();
553         TreeWalker.PushOperands(Insert);
554       }
555 
556       // If we stackified any operands, skip over the tree to start looking for
557       // the next instruction we can build a tree on.
558       if (Insert != &*MII) {
559         ImposeStackOrdering(&*MII);
560         MII = std::prev(
561             llvm::make_reverse_iterator(MachineBasicBlock::iterator(Insert)));
562         Changed = true;
563       }
564     }
565   }
566 
567   // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere so
568   // that it never looks like a use-before-def.
569   if (Changed) {
570     MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
571     for (MachineBasicBlock &MBB : MF)
572       MBB.addLiveIn(WebAssembly::EXPR_STACK);
573   }
574 
575 #ifndef NDEBUG
576   // Verify that pushes and pops are performed in LIFO order.
577   SmallVector<unsigned, 0> Stack;
578   for (MachineBasicBlock &MBB : MF) {
579     for (MachineInstr &MI : MBB) {
580       if (MI.isDebugValue())
581         continue;
582       for (MachineOperand &MO : reverse(MI.explicit_operands())) {
583         if (!MO.isReg())
584           continue;
585         unsigned Reg = MO.getReg();
586 
587         if (MFI.isVRegStackified(Reg)) {
588           if (MO.isDef())
589             Stack.push_back(Reg);
590           else
591             assert(Stack.pop_back_val() == Reg &&
592                    "Register stack pop should be paired with a push");
593         }
594       }
595     }
596     // TODO: Generalize this code to support keeping values on the stack across
597     // basic block boundaries.
598     assert(Stack.empty() &&
599            "Register stack pushes and pops should be balanced");
600   }
601 #endif
602 
603   return Changed;
604 }
605