1 //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a register stacking pass.
12 ///
13 /// This pass reorders instructions to put register uses and defs in an order
14 /// such that they form single-use expression trees. Registers fitting this form
15 /// are then marked as "stackified", meaning references to them are replaced by
16 /// "push" and "pop" from the stack.
17 ///
18 /// This is primarily a code size optimization, since temporary values on the
19 /// expression don't need to be named.
20 ///
21 //===----------------------------------------------------------------------===//
22 
23 #include "WebAssembly.h"
24 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
25 #include "WebAssemblyMachineFunctionInfo.h"
26 #include "WebAssemblySubtarget.h"
27 #include "llvm/Analysis/AliasAnalysis.h"
28 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
29 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
30 #include "llvm/CodeGen/MachineDominators.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/Passes.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "wasm-reg-stackify"
39 
40 namespace {
41 class WebAssemblyRegStackify final : public MachineFunctionPass {
42   const char *getPassName() const override {
43     return "WebAssembly Register Stackify";
44   }
45 
46   void getAnalysisUsage(AnalysisUsage &AU) const override {
47     AU.setPreservesCFG();
48     AU.addRequired<AAResultsWrapperPass>();
49     AU.addRequired<MachineDominatorTree>();
50     AU.addRequired<LiveIntervals>();
51     AU.addPreserved<MachineBlockFrequencyInfo>();
52     AU.addPreserved<SlotIndexes>();
53     AU.addPreserved<LiveIntervals>();
54     AU.addPreservedID(LiveVariablesID);
55     AU.addPreserved<MachineDominatorTree>();
56     MachineFunctionPass::getAnalysisUsage(AU);
57   }
58 
59   bool runOnMachineFunction(MachineFunction &MF) override;
60 
61 public:
62   static char ID; // Pass identification, replacement for typeid
63   WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
64 };
65 } // end anonymous namespace
66 
67 char WebAssemblyRegStackify::ID = 0;
68 FunctionPass *llvm::createWebAssemblyRegStackify() {
69   return new WebAssemblyRegStackify();
70 }
71 
72 // Decorate the given instruction with implicit operands that enforce the
73 // expression stack ordering constraints for an instruction which is on
74 // the expression stack.
75 static void ImposeStackOrdering(MachineInstr *MI) {
76   // Write the opaque EXPR_STACK register.
77   if (!MI->definesRegister(WebAssembly::EXPR_STACK))
78     MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
79                                              /*isDef=*/true,
80                                              /*isImp=*/true));
81 
82   // Also read the opaque EXPR_STACK register.
83   if (!MI->readsRegister(WebAssembly::EXPR_STACK))
84     MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
85                                              /*isDef=*/false,
86                                              /*isImp=*/true));
87 }
88 
89 // Determine whether a call to the callee referenced by
90 // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
91 // effects.
92 static void QueryCallee(const MachineInstr *MI, unsigned CalleeOpNo,
93                         bool &Read, bool &Write, bool &Effects,
94                         bool &StackPointer) {
95   // All calls can use the stack pointer.
96   StackPointer = true;
97 
98   const MachineOperand &MO = MI->getOperand(CalleeOpNo);
99   if (MO.isGlobal()) {
100     const Constant *GV = MO.getGlobal();
101     if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
102       if (!GA->isInterposable())
103         GV = GA->getAliasee();
104 
105     if (const Function *F = dyn_cast<Function>(GV)) {
106       if (!F->doesNotThrow())
107         Effects = true;
108       if (F->doesNotAccessMemory())
109         return;
110       if (F->onlyReadsMemory()) {
111         Read = true;
112         return;
113       }
114     }
115   }
116 
117   // Assume the worst.
118   Write = true;
119   Read = true;
120   Effects = true;
121 }
122 
123 // Determine whether MI reads memory, writes memory, has side effects,
124 // and/or uses the __stack_pointer value.
125 static void Query(const MachineInstr *MI, AliasAnalysis &AA,
126                   bool &Read, bool &Write, bool &Effects, bool &StackPointer) {
127   assert(!MI->isPosition());
128   assert(!MI->isTerminator());
129   assert(!MI->isDebugValue());
130 
131   // Check for loads.
132   if (MI->mayLoad() && !MI->isInvariantLoad(&AA))
133     Read = true;
134 
135   // Check for stores.
136   if (MI->mayStore()) {
137     Write = true;
138 
139     // Check for stores to __stack_pointer.
140     for (auto MMO : MI->memoperands()) {
141       const MachinePointerInfo &MPI = MMO->getPointerInfo();
142       if (MPI.V.is<const PseudoSourceValue *>()) {
143         auto PSV = MPI.V.get<const PseudoSourceValue *>();
144         if (const ExternalSymbolPseudoSourceValue *EPSV =
145                 dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
146           if (StringRef(EPSV->getSymbol()) == "__stack_pointer")
147             StackPointer = true;
148       }
149     }
150   } else if (MI->hasOrderedMemoryRef()) {
151     switch (MI->getOpcode()) {
152     case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
153     case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
154     case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
155     case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
156     case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
157     case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
158     case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
159     case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
160       // These instruction have hasUnmodeledSideEffects() returning true
161       // because they trap on overflow and invalid so they can't be arbitrarily
162       // moved, however hasOrderedMemoryRef() interprets this plus their lack
163       // of memoperands as having a potential unknown memory reference.
164       break;
165     default:
166       // Record potential stores, unless it's a call, as calls are handled
167       // specially below.
168       if (!MI->isCall())
169         Write = true;
170       break;
171     }
172   }
173 
174   // Check for side effects.
175   if (MI->hasUnmodeledSideEffects()) {
176     switch (MI->getOpcode()) {
177     case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
178     case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
179     case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
180     case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
181     case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
182     case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
183     case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
184     case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
185       // These instructions have hasUnmodeledSideEffects() returning true
186       // because they trap on overflow and invalid so they can't be arbitrarily
187       // moved, however in the specific case of register stackifying, it is safe
188       // to move them because overflow and invalid are Undefined Behavior.
189       break;
190     default:
191       Effects = true;
192       break;
193     }
194   }
195 
196   // Analyze calls.
197   if (MI->isCall()) {
198     switch (MI->getOpcode()) {
199     case WebAssembly::CALL_VOID:
200       QueryCallee(MI, 0, Read, Write, Effects, StackPointer);
201       break;
202     case WebAssembly::CALL_I32:
203     case WebAssembly::CALL_I64:
204     case WebAssembly::CALL_F32:
205     case WebAssembly::CALL_F64:
206       QueryCallee(MI, 1, Read, Write, Effects, StackPointer);
207       break;
208     case WebAssembly::CALL_INDIRECT_VOID:
209     case WebAssembly::CALL_INDIRECT_I32:
210     case WebAssembly::CALL_INDIRECT_I64:
211     case WebAssembly::CALL_INDIRECT_F32:
212     case WebAssembly::CALL_INDIRECT_F64:
213       Read = true;
214       Write = true;
215       Effects = true;
216       StackPointer = true;
217       break;
218     default:
219       llvm_unreachable("unexpected call opcode");
220     }
221   }
222 }
223 
224 // Test whether Def is safe and profitable to rematerialize.
225 static bool ShouldRematerialize(const MachineInstr *Def, AliasAnalysis &AA,
226                                 const WebAssemblyInstrInfo *TII) {
227   return Def->isAsCheapAsAMove() &&
228          TII->isTriviallyReMaterializable(Def, &AA);
229 }
230 
231 // Identify the definition for this register at this point. This is a
232 // generalization of MachineRegisterInfo::getUniqueVRegDef that uses
233 // LiveIntervals to handle complex cases.
234 static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
235                                 const MachineRegisterInfo &MRI,
236                                 const LiveIntervals &LIS)
237 {
238   // Most registers are in SSA form here so we try a quick MRI query first.
239   if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
240     return Def;
241 
242   // MRI doesn't know what the Def is. Try asking LIS.
243   if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
244           LIS.getInstructionIndex(*Insert)))
245     return LIS.getInstructionFromIndex(ValNo->def);
246 
247   return nullptr;
248 }
249 
250 // Test whether Reg, as defined at Def, has exactly one use. This is a
251 // generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
252 // to handle complex cases.
253 static bool HasOneUse(unsigned Reg, MachineInstr *Def,
254                       MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
255                       LiveIntervals &LIS) {
256   // Most registers are in SSA form here so we try a quick MRI query first.
257   if (MRI.hasOneUse(Reg))
258     return true;
259 
260   bool HasOne = false;
261   const LiveInterval &LI = LIS.getInterval(Reg);
262   const VNInfo *DefVNI = LI.getVNInfoAt(
263       LIS.getInstructionIndex(*Def).getRegSlot());
264   assert(DefVNI);
265   for (auto I : MRI.use_operands(Reg)) {
266     const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
267     if (Result.valueIn() == DefVNI) {
268       if (!Result.isKill())
269         return false;
270       if (HasOne)
271         return false;
272       HasOne = true;
273     }
274   }
275   return HasOne;
276 }
277 
278 // Test whether it's safe to move Def to just before Insert.
279 // TODO: Compute memory dependencies in a way that doesn't require always
280 // walking the block.
281 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
282 // more precise.
283 static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
284                          AliasAnalysis &AA, const LiveIntervals &LIS,
285                          const MachineRegisterInfo &MRI) {
286   assert(Def->getParent() == Insert->getParent());
287 
288   // Check for register dependencies.
289   for (const MachineOperand &MO : Def->operands()) {
290     if (!MO.isReg() || MO.isUndef())
291       continue;
292     unsigned Reg = MO.getReg();
293 
294     // If the register is dead here and at Insert, ignore it.
295     if (MO.isDead() && Insert->definesRegister(Reg) &&
296         !Insert->readsRegister(Reg))
297       continue;
298 
299     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
300       // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
301       // from moving down, and we've already checked for that.
302       if (Reg == WebAssembly::ARGUMENTS)
303         continue;
304       // If the physical register is never modified, ignore it.
305       if (!MRI.isPhysRegModified(Reg))
306         continue;
307       // Otherwise, it's a physical register with unknown liveness.
308       return false;
309     }
310 
311     // Ask LiveIntervals whether moving this virtual register use or def to
312     // Insert will change which value numbers are seen.
313     //
314     // If the operand is a use of a register that is also defined in the same
315     // instruction, test that the newly defined value reaches the insert point,
316     // since the operand will be moving along with the def.
317     const LiveInterval &LI = LIS.getInterval(Reg);
318     VNInfo *DefVNI =
319         (MO.isDef() || Def->definesRegister(Reg)) ?
320         LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot()) :
321         LI.getVNInfoBefore(LIS.getInstructionIndex(*Def));
322     assert(DefVNI && "Instruction input missing value number");
323     VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*Insert));
324     if (InsVNI && DefVNI != InsVNI)
325       return false;
326   }
327 
328   bool Read = false, Write = false, Effects = false, StackPointer = false;
329   Query(Def, AA, Read, Write, Effects, StackPointer);
330 
331   // If the instruction does not access memory and has no side effects, it has
332   // no additional dependencies.
333   if (!Read && !Write && !Effects && !StackPointer)
334     return true;
335 
336   // Scan through the intervening instructions between Def and Insert.
337   MachineBasicBlock::const_iterator D(Def), I(Insert);
338   for (--I; I != D; --I) {
339     bool InterveningRead = false;
340     bool InterveningWrite = false;
341     bool InterveningEffects = false;
342     bool InterveningStackPointer = false;
343     Query(I, AA, InterveningRead, InterveningWrite, InterveningEffects,
344           InterveningStackPointer);
345     if (Effects && InterveningEffects)
346       return false;
347     if (Read && InterveningWrite)
348       return false;
349     if (Write && (InterveningRead || InterveningWrite))
350       return false;
351     if (StackPointer && InterveningStackPointer)
352       return false;
353   }
354 
355   return true;
356 }
357 
358 /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
359 static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
360                                      const MachineBasicBlock &MBB,
361                                      const MachineRegisterInfo &MRI,
362                                      const MachineDominatorTree &MDT,
363                                      LiveIntervals &LIS) {
364   const LiveInterval &LI = LIS.getInterval(Reg);
365 
366   const MachineInstr *OneUseInst = OneUse.getParent();
367   VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
368 
369   for (const MachineOperand &Use : MRI.use_operands(Reg)) {
370     if (&Use == &OneUse)
371       continue;
372 
373     const MachineInstr *UseInst = Use.getParent();
374     VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
375 
376     if (UseVNI != OneUseVNI)
377       continue;
378 
379     const MachineInstr *OneUseInst = OneUse.getParent();
380     if (UseInst == OneUseInst) {
381       // Another use in the same instruction. We need to ensure that the one
382       // selected use happens "before" it.
383       if (&OneUse > &Use)
384         return false;
385     } else {
386       // Test that the use is dominated by the one selected use.
387       if (!MDT.dominates(OneUseInst, UseInst))
388         return false;
389     }
390   }
391   return true;
392 }
393 
394 /// Get the appropriate tee_local opcode for the given register class.
395 static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) {
396   if (RC == &WebAssembly::I32RegClass)
397     return WebAssembly::TEE_LOCAL_I32;
398   if (RC == &WebAssembly::I64RegClass)
399     return WebAssembly::TEE_LOCAL_I64;
400   if (RC == &WebAssembly::F32RegClass)
401     return WebAssembly::TEE_LOCAL_F32;
402   if (RC == &WebAssembly::F64RegClass)
403     return WebAssembly::TEE_LOCAL_F64;
404   llvm_unreachable("Unexpected register class");
405 }
406 
407 // Shrink LI to its uses, cleaning up LI.
408 static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
409   if (LIS.shrinkToUses(&LI)) {
410     SmallVector<LiveInterval*, 4> SplitLIs;
411     LIS.splitSeparateComponents(LI, SplitLIs);
412   }
413 }
414 
415 /// A single-use def in the same block with no intervening memory or register
416 /// dependencies; move the def down and nest it with the current instruction.
417 static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
418                                       MachineInstr *Def,
419                                       MachineBasicBlock &MBB,
420                                       MachineInstr *Insert, LiveIntervals &LIS,
421                                       WebAssemblyFunctionInfo &MFI,
422                                       MachineRegisterInfo &MRI) {
423   DEBUG(dbgs() << "Move for single use: "; Def->dump());
424 
425   MBB.splice(Insert, &MBB, Def);
426   LIS.handleMove(*Def);
427 
428   if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
429     // No one else is using this register for anything so we can just stackify
430     // it in place.
431     MFI.stackifyVReg(Reg);
432   } else {
433     // The register may have unrelated uses or defs; create a new register for
434     // just our one def and use so that we can stackify it.
435     unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
436     Def->getOperand(0).setReg(NewReg);
437     Op.setReg(NewReg);
438 
439     // Tell LiveIntervals about the new register.
440     LIS.createAndComputeVirtRegInterval(NewReg);
441 
442     // Tell LiveIntervals about the changes to the old register.
443     LiveInterval &LI = LIS.getInterval(Reg);
444     LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*Def).getRegSlot());
445     ShrinkToUses(LI, LIS);
446 
447     MFI.stackifyVReg(NewReg);
448 
449     DEBUG(dbgs() << " - Replaced register: "; Def->dump());
450   }
451 
452   ImposeStackOrdering(Def);
453   return Def;
454 }
455 
456 /// A trivially cloneable instruction; clone it and nest the new copy with the
457 /// current instruction.
458 static MachineInstr *
459 RematerializeCheapDef(unsigned Reg, MachineOperand &Op, MachineInstr *Def,
460                       MachineBasicBlock &MBB, MachineInstr *Insert,
461                       LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
462                       MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII,
463                       const WebAssemblyRegisterInfo *TRI) {
464   DEBUG(dbgs() << "Rematerializing cheap def: "; Def->dump());
465   DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
466 
467   unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
468   TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
469   Op.setReg(NewReg);
470   MachineInstr *Clone = &*std::prev(MachineBasicBlock::instr_iterator(Insert));
471   LIS.InsertMachineInstrInMaps(*Clone);
472   LIS.createAndComputeVirtRegInterval(NewReg);
473   MFI.stackifyVReg(NewReg);
474   ImposeStackOrdering(Clone);
475 
476   DEBUG(dbgs() << " - Cloned to "; Clone->dump());
477 
478   // Shrink the interval.
479   bool IsDead = MRI.use_empty(Reg);
480   if (!IsDead) {
481     LiveInterval &LI = LIS.getInterval(Reg);
482     ShrinkToUses(LI, LIS);
483     IsDead = !LI.liveAt(LIS.getInstructionIndex(*Def).getDeadSlot());
484   }
485 
486   // If that was the last use of the original, delete the original.
487   if (IsDead) {
488     DEBUG(dbgs() << " - Deleting original\n");
489     SlotIndex Idx = LIS.getInstructionIndex(*Def).getRegSlot();
490     LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
491     LIS.removeInterval(Reg);
492     LIS.RemoveMachineInstrFromMaps(*Def);
493     Def->eraseFromParent();
494   }
495 
496   return Clone;
497 }
498 
499 /// A multiple-use def in the same block with no intervening memory or register
500 /// dependencies; move the def down, nest it with the current instruction, and
501 /// insert a tee_local to satisfy the rest of the uses. As an illustration,
502 /// rewrite this:
503 ///
504 ///    Reg = INST ...        // Def
505 ///    INST ..., Reg, ...    // Insert
506 ///    INST ..., Reg, ...
507 ///    INST ..., Reg, ...
508 ///
509 /// to this:
510 ///
511 ///    DefReg = INST ...     // Def (to become the new Insert)
512 ///    TeeReg, Reg = TEE_LOCAL_... DefReg
513 ///    INST ..., TeeReg, ... // Insert
514 ///    INST ..., NewReg, ...
515 ///    INST ..., NewReg, ...
516 ///
517 /// with DefReg and TeeReg stackified. This eliminates a get_local from the
518 /// resulting code.
519 static MachineInstr *MoveAndTeeForMultiUse(
520     unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
521     MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
522     MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
523   DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
524 
525   // Move Def into place.
526   MBB.splice(Insert, &MBB, Def);
527   LIS.handleMove(*Def);
528 
529   // Create the Tee and attach the registers.
530   const auto *RegClass = MRI.getRegClass(Reg);
531   unsigned TeeReg = MRI.createVirtualRegister(RegClass);
532   unsigned DefReg = MRI.createVirtualRegister(RegClass);
533   MachineOperand &DefMO = Def->getOperand(0);
534   MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
535                               TII->get(GetTeeLocalOpcode(RegClass)), TeeReg)
536                           .addReg(Reg, RegState::Define)
537                           .addReg(DefReg, getUndefRegState(DefMO.isDead()));
538   Op.setReg(TeeReg);
539   DefMO.setReg(DefReg);
540   SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
541   SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
542 
543   // Tell LiveIntervals we moved the original vreg def from Def to Tee.
544   LiveInterval &LI = LIS.getInterval(Reg);
545   LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
546   VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
547   I->start = TeeIdx;
548   ValNo->def = TeeIdx;
549   ShrinkToUses(LI, LIS);
550 
551   // Finish stackifying the new regs.
552   LIS.createAndComputeVirtRegInterval(TeeReg);
553   LIS.createAndComputeVirtRegInterval(DefReg);
554   MFI.stackifyVReg(DefReg);
555   MFI.stackifyVReg(TeeReg);
556   ImposeStackOrdering(Def);
557   ImposeStackOrdering(Tee);
558 
559   DEBUG(dbgs() << " - Replaced register: "; Def->dump());
560   DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
561   return Def;
562 }
563 
564 namespace {
565 /// A stack for walking the tree of instructions being built, visiting the
566 /// MachineOperands in DFS order.
567 class TreeWalkerState {
568   typedef MachineInstr::mop_iterator mop_iterator;
569   typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator;
570   typedef iterator_range<mop_reverse_iterator> RangeTy;
571   SmallVector<RangeTy, 4> Worklist;
572 
573 public:
574   explicit TreeWalkerState(MachineInstr *Insert) {
575     const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
576     if (Range.begin() != Range.end())
577       Worklist.push_back(reverse(Range));
578   }
579 
580   bool Done() const { return Worklist.empty(); }
581 
582   MachineOperand &Pop() {
583     RangeTy &Range = Worklist.back();
584     MachineOperand &Op = *Range.begin();
585     Range = drop_begin(Range, 1);
586     if (Range.begin() == Range.end())
587       Worklist.pop_back();
588     assert((Worklist.empty() ||
589             Worklist.back().begin() != Worklist.back().end()) &&
590            "Empty ranges shouldn't remain in the worklist");
591     return Op;
592   }
593 
594   /// Push Instr's operands onto the stack to be visited.
595   void PushOperands(MachineInstr *Instr) {
596     const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
597     if (Range.begin() != Range.end())
598       Worklist.push_back(reverse(Range));
599   }
600 
601   /// Some of Instr's operands are on the top of the stack; remove them and
602   /// re-insert them starting from the beginning (because we've commuted them).
603   void ResetTopOperands(MachineInstr *Instr) {
604     assert(HasRemainingOperands(Instr) &&
605            "Reseting operands should only be done when the instruction has "
606            "an operand still on the stack");
607     Worklist.back() = reverse(Instr->explicit_uses());
608   }
609 
610   /// Test whether Instr has operands remaining to be visited at the top of
611   /// the stack.
612   bool HasRemainingOperands(const MachineInstr *Instr) const {
613     if (Worklist.empty())
614       return false;
615     const RangeTy &Range = Worklist.back();
616     return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
617   }
618 
619   /// Test whether the given register is present on the stack, indicating an
620   /// operand in the tree that we haven't visited yet. Moving a definition of
621   /// Reg to a point in the tree after that would change its value.
622   bool IsOnStack(unsigned Reg) const {
623     for (const RangeTy &Range : Worklist)
624       for (const MachineOperand &MO : Range)
625         if (MO.isReg() && MO.getReg() == Reg)
626           return true;
627     return false;
628   }
629 };
630 
631 /// State to keep track of whether commuting is in flight or whether it's been
632 /// tried for the current instruction and didn't work.
633 class CommutingState {
634   /// There are effectively three states: the initial state where we haven't
635   /// started commuting anything and we don't know anything yet, the tenative
636   /// state where we've commuted the operands of the current instruction and are
637   /// revisting it, and the declined state where we've reverted the operands
638   /// back to their original order and will no longer commute it further.
639   bool TentativelyCommuting;
640   bool Declined;
641 
642   /// During the tentative state, these hold the operand indices of the commuted
643   /// operands.
644   unsigned Operand0, Operand1;
645 
646 public:
647   CommutingState() : TentativelyCommuting(false), Declined(false) {}
648 
649   /// Stackification for an operand was not successful due to ordering
650   /// constraints. If possible, and if we haven't already tried it and declined
651   /// it, commute Insert's operands and prepare to revisit it.
652   void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
653                     const WebAssemblyInstrInfo *TII) {
654     if (TentativelyCommuting) {
655       assert(!Declined &&
656              "Don't decline commuting until you've finished trying it");
657       // Commuting didn't help. Revert it.
658       TII->commuteInstruction(Insert, /*NewMI=*/false, Operand0, Operand1);
659       TentativelyCommuting = false;
660       Declined = true;
661     } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) {
662       Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
663       Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
664       if (TII->findCommutedOpIndices(Insert, Operand0, Operand1)) {
665         // Tentatively commute the operands and try again.
666         TII->commuteInstruction(Insert, /*NewMI=*/false, Operand0, Operand1);
667         TreeWalker.ResetTopOperands(Insert);
668         TentativelyCommuting = true;
669         Declined = false;
670       }
671     }
672   }
673 
674   /// Stackification for some operand was successful. Reset to the default
675   /// state.
676   void Reset() {
677     TentativelyCommuting = false;
678     Declined = false;
679   }
680 };
681 } // end anonymous namespace
682 
683 bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
684   DEBUG(dbgs() << "********** Register Stackifying **********\n"
685                   "********** Function: "
686                << MF.getName() << '\n');
687 
688   bool Changed = false;
689   MachineRegisterInfo &MRI = MF.getRegInfo();
690   WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
691   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
692   const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
693   AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
694   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
695   LiveIntervals &LIS = getAnalysis<LiveIntervals>();
696 
697   // Walk the instructions from the bottom up. Currently we don't look past
698   // block boundaries, and the blocks aren't ordered so the block visitation
699   // order isn't significant, but we may want to change this in the future.
700   for (MachineBasicBlock &MBB : MF) {
701     // Don't use a range-based for loop, because we modify the list as we're
702     // iterating over it and the end iterator may change.
703     for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
704       MachineInstr *Insert = &*MII;
705       // Don't nest anything inside an inline asm, because we don't have
706       // constraints for $push inputs.
707       if (Insert->getOpcode() == TargetOpcode::INLINEASM)
708         continue;
709 
710       // Ignore debugging intrinsics.
711       if (Insert->getOpcode() == TargetOpcode::DBG_VALUE)
712         continue;
713 
714       // Iterate through the inputs in reverse order, since we'll be pulling
715       // operands off the stack in LIFO order.
716       CommutingState Commuting;
717       TreeWalkerState TreeWalker(Insert);
718       while (!TreeWalker.Done()) {
719         MachineOperand &Op = TreeWalker.Pop();
720 
721         // We're only interested in explicit virtual register operands.
722         if (!Op.isReg())
723           continue;
724 
725         unsigned Reg = Op.getReg();
726         assert(Op.isUse() && "explicit_uses() should only iterate over uses");
727         assert(!Op.isImplicit() &&
728                "explicit_uses() should only iterate over explicit operands");
729         if (TargetRegisterInfo::isPhysicalRegister(Reg))
730           continue;
731 
732         // Identify the definition for this register at this point. Most
733         // registers are in SSA form here so we try a quick MRI query first.
734         MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
735         if (!Def)
736           continue;
737 
738         // Don't nest an INLINE_ASM def into anything, because we don't have
739         // constraints for $pop outputs.
740         if (Def->getOpcode() == TargetOpcode::INLINEASM)
741           continue;
742 
743         // Argument instructions represent live-in registers and not real
744         // instructions.
745         if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
746             Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
747             Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
748             Def->getOpcode() == WebAssembly::ARGUMENT_F64)
749           continue;
750 
751         // Decide which strategy to take. Prefer to move a single-use value
752         // over cloning it, and prefer cloning over introducing a tee_local.
753         // For moving, we require the def to be in the same block as the use;
754         // this makes things simpler (LiveIntervals' handleMove function only
755         // supports intra-block moves) and it's MachineSink's job to catch all
756         // the sinking opportunities anyway.
757         bool SameBlock = Def->getParent() == &MBB;
758         bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) &&
759                        !TreeWalker.IsOnStack(Reg);
760         if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
761           Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
762         } else if (ShouldRematerialize(Def, AA, TII)) {
763           Insert = RematerializeCheapDef(Reg, Op, Def, MBB, Insert, LIS, MFI,
764                                          MRI, TII, TRI);
765         } else if (CanMove &&
766                    OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS)) {
767           Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
768                                          MRI, TII);
769         } else {
770           // We failed to stackify the operand. If the problem was ordering
771           // constraints, Commuting may be able to help.
772           if (!CanMove && SameBlock)
773             Commuting.MaybeCommute(Insert, TreeWalker, TII);
774           // Proceed to the next operand.
775           continue;
776         }
777 
778         // We stackified an operand. Add the defining instruction's operands to
779         // the worklist stack now to continue to build an ever deeper tree.
780         Commuting.Reset();
781         TreeWalker.PushOperands(Insert);
782       }
783 
784       // If we stackified any operands, skip over the tree to start looking for
785       // the next instruction we can build a tree on.
786       if (Insert != &*MII) {
787         ImposeStackOrdering(&*MII);
788         MII = std::prev(
789             llvm::make_reverse_iterator(MachineBasicBlock::iterator(Insert)));
790         Changed = true;
791       }
792     }
793   }
794 
795   // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere so
796   // that it never looks like a use-before-def.
797   if (Changed) {
798     MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
799     for (MachineBasicBlock &MBB : MF)
800       MBB.addLiveIn(WebAssembly::EXPR_STACK);
801   }
802 
803 #ifndef NDEBUG
804   // Verify that pushes and pops are performed in LIFO order.
805   SmallVector<unsigned, 0> Stack;
806   for (MachineBasicBlock &MBB : MF) {
807     for (MachineInstr &MI : MBB) {
808       if (MI.isDebugValue())
809         continue;
810       for (MachineOperand &MO : reverse(MI.explicit_operands())) {
811         if (!MO.isReg())
812           continue;
813         unsigned Reg = MO.getReg();
814 
815         if (MFI.isVRegStackified(Reg)) {
816           if (MO.isDef())
817             Stack.push_back(Reg);
818           else
819             assert(Stack.pop_back_val() == Reg &&
820                    "Register stack pop should be paired with a push");
821         }
822       }
823     }
824     // TODO: Generalize this code to support keeping values on the stack across
825     // basic block boundaries.
826     assert(Stack.empty() &&
827            "Register stack pushes and pops should be balanced");
828   }
829 #endif
830 
831   return Changed;
832 }
833