1 //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements a register stacking pass.
12 ///
13 /// This pass reorders instructions to put register uses and defs in an order
14 /// such that they form single-use expression trees. Registers fitting this form
15 /// are then marked as "stackified", meaning references to them are replaced by
16 /// "push" and "pop" from the value stack.
17 ///
18 /// This is primarily a code size optimization, since temporary values on the
19 /// value stack don't need to be named.
20 ///
21 //===----------------------------------------------------------------------===//
22 
23 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
24 #include "WebAssembly.h"
25 #include "WebAssemblyMachineFunctionInfo.h"
26 #include "WebAssemblySubtarget.h"
27 #include "WebAssemblyUtilities.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/Analysis/AliasAnalysis.h"
30 #include "llvm/CodeGen/LiveIntervals.h"
31 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
32 #include "llvm/CodeGen/MachineDominators.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/Passes.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/raw_ostream.h"
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "wasm-reg-stackify"
42 
43 namespace {
44 class WebAssemblyRegStackify final : public MachineFunctionPass {
45   StringRef getPassName() const override {
46     return "WebAssembly Register Stackify";
47   }
48 
49   void getAnalysisUsage(AnalysisUsage &AU) const override {
50     AU.setPreservesCFG();
51     AU.addRequired<AAResultsWrapperPass>();
52     AU.addRequired<MachineDominatorTree>();
53     AU.addRequired<LiveIntervals>();
54     AU.addPreserved<MachineBlockFrequencyInfo>();
55     AU.addPreserved<SlotIndexes>();
56     AU.addPreserved<LiveIntervals>();
57     AU.addPreservedID(LiveVariablesID);
58     AU.addPreserved<MachineDominatorTree>();
59     MachineFunctionPass::getAnalysisUsage(AU);
60   }
61 
62   bool runOnMachineFunction(MachineFunction &MF) override;
63 
64 public:
65   static char ID; // Pass identification, replacement for typeid
66   WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
67 };
68 } // end anonymous namespace
69 
70 char WebAssemblyRegStackify::ID = 0;
71 INITIALIZE_PASS(WebAssemblyRegStackify, DEBUG_TYPE,
72                 "Reorder instructions to use the WebAssembly value stack",
73                 false, false)
74 
75 FunctionPass *llvm::createWebAssemblyRegStackify() {
76   return new WebAssemblyRegStackify();
77 }
78 
79 // Decorate the given instruction with implicit operands that enforce the
80 // expression stack ordering constraints for an instruction which is on
81 // the expression stack.
82 static void ImposeStackOrdering(MachineInstr *MI) {
83   // Write the opaque VALUE_STACK register.
84   if (!MI->definesRegister(WebAssembly::VALUE_STACK))
85     MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
86                                              /*isDef=*/true,
87                                              /*isImp=*/true));
88 
89   // Also read the opaque VALUE_STACK register.
90   if (!MI->readsRegister(WebAssembly::VALUE_STACK))
91     MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
92                                              /*isDef=*/false,
93                                              /*isImp=*/true));
94 }
95 
96 // Convert an IMPLICIT_DEF instruction into an instruction which defines
97 // a constant zero value.
98 static void ConvertImplicitDefToConstZero(MachineInstr *MI,
99                                           MachineRegisterInfo &MRI,
100                                           const TargetInstrInfo *TII,
101                                           MachineFunction &MF) {
102   assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);
103 
104   const auto *RegClass = MRI.getRegClass(MI->getOperand(0).getReg());
105   if (RegClass == &WebAssembly::I32RegClass) {
106     MI->setDesc(TII->get(WebAssembly::CONST_I32));
107     MI->addOperand(MachineOperand::CreateImm(0));
108   } else if (RegClass == &WebAssembly::I64RegClass) {
109     MI->setDesc(TII->get(WebAssembly::CONST_I64));
110     MI->addOperand(MachineOperand::CreateImm(0));
111   } else if (RegClass == &WebAssembly::F32RegClass) {
112     MI->setDesc(TII->get(WebAssembly::CONST_F32));
113     ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
114         Type::getFloatTy(MF.getFunction().getContext())));
115     MI->addOperand(MachineOperand::CreateFPImm(Val));
116   } else if (RegClass == &WebAssembly::F64RegClass) {
117     MI->setDesc(TII->get(WebAssembly::CONST_F64));
118     ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
119         Type::getDoubleTy(MF.getFunction().getContext())));
120     MI->addOperand(MachineOperand::CreateFPImm(Val));
121   } else if (RegClass == &WebAssembly::V128RegClass) {
122     // TODO: make splat instead of constant
123     MI->setDesc(TII->get(WebAssembly::CONST_V128_v16i8));
124     for (int I = 0; I < 16; ++I)
125       MI->addOperand(MachineOperand::CreateImm(0));
126   } else {
127     llvm_unreachable("Unexpected reg class");
128   }
129 }
130 
131 // Determine whether a call to the callee referenced by
132 // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
133 // effects.
134 static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
135                         bool &Write, bool &Effects, bool &StackPointer) {
136   // All calls can use the stack pointer.
137   StackPointer = true;
138 
139   const MachineOperand &MO = MI.getOperand(CalleeOpNo);
140   if (MO.isGlobal()) {
141     const Constant *GV = MO.getGlobal();
142     if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
143       if (!GA->isInterposable())
144         GV = GA->getAliasee();
145 
146     if (const Function *F = dyn_cast<Function>(GV)) {
147       if (!F->doesNotThrow())
148         Effects = true;
149       if (F->doesNotAccessMemory())
150         return;
151       if (F->onlyReadsMemory()) {
152         Read = true;
153         return;
154       }
155     }
156   }
157 
158   // Assume the worst.
159   Write = true;
160   Read = true;
161   Effects = true;
162 }
163 
164 // Determine whether MI reads memory, writes memory, has side effects,
165 // and/or uses the stack pointer value.
166 static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
167                   bool &Write, bool &Effects, bool &StackPointer) {
168   assert(!MI.isTerminator());
169 
170   if (MI.isDebugInstr() || MI.isPosition())
171     return;
172 
173   // Check for loads.
174   if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA))
175     Read = true;
176 
177   // Check for stores.
178   if (MI.mayStore()) {
179     Write = true;
180 
181     // Check for stores to __stack_pointer.
182     for (auto MMO : MI.memoperands()) {
183       const MachinePointerInfo &MPI = MMO->getPointerInfo();
184       if (MPI.V.is<const PseudoSourceValue *>()) {
185         auto PSV = MPI.V.get<const PseudoSourceValue *>();
186         if (const ExternalSymbolPseudoSourceValue *EPSV =
187                 dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
188           if (StringRef(EPSV->getSymbol()) == "__stack_pointer") {
189             StackPointer = true;
190           }
191       }
192     }
193   } else if (MI.hasOrderedMemoryRef()) {
194     switch (MI.getOpcode()) {
195     case WebAssembly::DIV_S_I32:
196     case WebAssembly::DIV_S_I64:
197     case WebAssembly::REM_S_I32:
198     case WebAssembly::REM_S_I64:
199     case WebAssembly::DIV_U_I32:
200     case WebAssembly::DIV_U_I64:
201     case WebAssembly::REM_U_I32:
202     case WebAssembly::REM_U_I64:
203     case WebAssembly::I32_TRUNC_S_F32:
204     case WebAssembly::I64_TRUNC_S_F32:
205     case WebAssembly::I32_TRUNC_S_F64:
206     case WebAssembly::I64_TRUNC_S_F64:
207     case WebAssembly::I32_TRUNC_U_F32:
208     case WebAssembly::I64_TRUNC_U_F32:
209     case WebAssembly::I32_TRUNC_U_F64:
210     case WebAssembly::I64_TRUNC_U_F64:
211       // These instruction have hasUnmodeledSideEffects() returning true
212       // because they trap on overflow and invalid so they can't be arbitrarily
213       // moved, however hasOrderedMemoryRef() interprets this plus their lack
214       // of memoperands as having a potential unknown memory reference.
215       break;
216     default:
217       // Record volatile accesses, unless it's a call, as calls are handled
218       // specially below.
219       if (!MI.isCall()) {
220         Write = true;
221         Effects = true;
222       }
223       break;
224     }
225   }
226 
227   // Check for side effects.
228   if (MI.hasUnmodeledSideEffects()) {
229     switch (MI.getOpcode()) {
230     case WebAssembly::DIV_S_I32:
231     case WebAssembly::DIV_S_I64:
232     case WebAssembly::REM_S_I32:
233     case WebAssembly::REM_S_I64:
234     case WebAssembly::DIV_U_I32:
235     case WebAssembly::DIV_U_I64:
236     case WebAssembly::REM_U_I32:
237     case WebAssembly::REM_U_I64:
238     case WebAssembly::I32_TRUNC_S_F32:
239     case WebAssembly::I64_TRUNC_S_F32:
240     case WebAssembly::I32_TRUNC_S_F64:
241     case WebAssembly::I64_TRUNC_S_F64:
242     case WebAssembly::I32_TRUNC_U_F32:
243     case WebAssembly::I64_TRUNC_U_F32:
244     case WebAssembly::I32_TRUNC_U_F64:
245     case WebAssembly::I64_TRUNC_U_F64:
246       // These instructions have hasUnmodeledSideEffects() returning true
247       // because they trap on overflow and invalid so they can't be arbitrarily
248       // moved, however in the specific case of register stackifying, it is safe
249       // to move them because overflow and invalid are Undefined Behavior.
250       break;
251     default:
252       Effects = true;
253       break;
254     }
255   }
256 
257   // Analyze calls.
258   if (MI.isCall()) {
259     unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI);
260     QueryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
261   }
262 }
263 
264 // Test whether Def is safe and profitable to rematerialize.
265 static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
266                                 const WebAssemblyInstrInfo *TII) {
267   return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA);
268 }
269 
270 // Identify the definition for this register at this point. This is a
271 // generalization of MachineRegisterInfo::getUniqueVRegDef that uses
272 // LiveIntervals to handle complex cases.
273 static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
274                                 const MachineRegisterInfo &MRI,
275                                 const LiveIntervals &LIS) {
276   // Most registers are in SSA form here so we try a quick MRI query first.
277   if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
278     return Def;
279 
280   // MRI doesn't know what the Def is. Try asking LIS.
281   if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
282           LIS.getInstructionIndex(*Insert)))
283     return LIS.getInstructionFromIndex(ValNo->def);
284 
285   return nullptr;
286 }
287 
288 // Test whether Reg, as defined at Def, has exactly one use. This is a
289 // generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
290 // to handle complex cases.
291 static bool HasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
292                       MachineDominatorTree &MDT, LiveIntervals &LIS) {
293   // Most registers are in SSA form here so we try a quick MRI query first.
294   if (MRI.hasOneUse(Reg))
295     return true;
296 
297   bool HasOne = false;
298   const LiveInterval &LI = LIS.getInterval(Reg);
299   const VNInfo *DefVNI =
300       LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot());
301   assert(DefVNI);
302   for (auto &I : MRI.use_nodbg_operands(Reg)) {
303     const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
304     if (Result.valueIn() == DefVNI) {
305       if (!Result.isKill())
306         return false;
307       if (HasOne)
308         return false;
309       HasOne = true;
310     }
311   }
312   return HasOne;
313 }
314 
315 // Test whether it's safe to move Def to just before Insert.
316 // TODO: Compute memory dependencies in a way that doesn't require always
317 // walking the block.
318 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
319 // more precise.
320 static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
321                          AliasAnalysis &AA, const MachineRegisterInfo &MRI) {
322   assert(Def->getParent() == Insert->getParent());
323 
324   // Check for register dependencies.
325   SmallVector<unsigned, 4> MutableRegisters;
326   for (const MachineOperand &MO : Def->operands()) {
327     if (!MO.isReg() || MO.isUndef())
328       continue;
329     unsigned Reg = MO.getReg();
330 
331     // If the register is dead here and at Insert, ignore it.
332     if (MO.isDead() && Insert->definesRegister(Reg) &&
333         !Insert->readsRegister(Reg))
334       continue;
335 
336     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
337       // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
338       // from moving down, and we've already checked for that.
339       if (Reg == WebAssembly::ARGUMENTS)
340         continue;
341       // If the physical register is never modified, ignore it.
342       if (!MRI.isPhysRegModified(Reg))
343         continue;
344       // Otherwise, it's a physical register with unknown liveness.
345       return false;
346     }
347 
348     // If one of the operands isn't in SSA form, it has different values at
349     // different times, and we need to make sure we don't move our use across
350     // a different def.
351     if (!MO.isDef() && !MRI.hasOneDef(Reg))
352       MutableRegisters.push_back(Reg);
353   }
354 
355   bool Read = false, Write = false, Effects = false, StackPointer = false;
356   Query(*Def, AA, Read, Write, Effects, StackPointer);
357 
358   // If the instruction does not access memory and has no side effects, it has
359   // no additional dependencies.
360   bool HasMutableRegisters = !MutableRegisters.empty();
361   if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters)
362     return true;
363 
364   // Scan through the intervening instructions between Def and Insert.
365   MachineBasicBlock::const_iterator D(Def), I(Insert);
366   for (--I; I != D; --I) {
367     bool InterveningRead = false;
368     bool InterveningWrite = false;
369     bool InterveningEffects = false;
370     bool InterveningStackPointer = false;
371     Query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
372           InterveningStackPointer);
373     if (Effects && InterveningEffects)
374       return false;
375     if (Read && InterveningWrite)
376       return false;
377     if (Write && (InterveningRead || InterveningWrite))
378       return false;
379     if (StackPointer && InterveningStackPointer)
380       return false;
381 
382     for (unsigned Reg : MutableRegisters)
383       for (const MachineOperand &MO : I->operands())
384         if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
385           return false;
386   }
387 
388   return true;
389 }
390 
391 /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
392 static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
393                                      const MachineBasicBlock &MBB,
394                                      const MachineRegisterInfo &MRI,
395                                      const MachineDominatorTree &MDT,
396                                      LiveIntervals &LIS,
397                                      WebAssemblyFunctionInfo &MFI) {
398   const LiveInterval &LI = LIS.getInterval(Reg);
399 
400   const MachineInstr *OneUseInst = OneUse.getParent();
401   VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
402 
403   for (const MachineOperand &Use : MRI.use_nodbg_operands(Reg)) {
404     if (&Use == &OneUse)
405       continue;
406 
407     const MachineInstr *UseInst = Use.getParent();
408     VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
409 
410     if (UseVNI != OneUseVNI)
411       continue;
412 
413     const MachineInstr *OneUseInst = OneUse.getParent();
414     if (UseInst == OneUseInst) {
415       // Another use in the same instruction. We need to ensure that the one
416       // selected use happens "before" it.
417       if (&OneUse > &Use)
418         return false;
419     } else {
420       // Test that the use is dominated by the one selected use.
421       while (!MDT.dominates(OneUseInst, UseInst)) {
422         // Actually, dominating is over-conservative. Test that the use would
423         // happen after the one selected use in the stack evaluation order.
424         //
425         // This is needed as a consequence of using implicit get_locals for
426         // uses and implicit set_locals for defs.
427         if (UseInst->getDesc().getNumDefs() == 0)
428           return false;
429         const MachineOperand &MO = UseInst->getOperand(0);
430         if (!MO.isReg())
431           return false;
432         unsigned DefReg = MO.getReg();
433         if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
434             !MFI.isVRegStackified(DefReg))
435           return false;
436         assert(MRI.hasOneNonDBGUse(DefReg));
437         const MachineOperand &NewUse = *MRI.use_nodbg_begin(DefReg);
438         const MachineInstr *NewUseInst = NewUse.getParent();
439         if (NewUseInst == OneUseInst) {
440           if (&OneUse > &NewUse)
441             return false;
442           break;
443         }
444         UseInst = NewUseInst;
445       }
446     }
447   }
448   return true;
449 }
450 
451 /// Get the appropriate tee opcode for the given register class.
452 static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
453   if (RC == &WebAssembly::I32RegClass)
454     return WebAssembly::TEE_I32;
455   if (RC == &WebAssembly::I64RegClass)
456     return WebAssembly::TEE_I64;
457   if (RC == &WebAssembly::F32RegClass)
458     return WebAssembly::TEE_F32;
459   if (RC == &WebAssembly::F64RegClass)
460     return WebAssembly::TEE_F64;
461   if (RC == &WebAssembly::V128RegClass)
462     return WebAssembly::TEE_V128;
463   llvm_unreachable("Unexpected register class");
464 }
465 
466 // Shrink LI to its uses, cleaning up LI.
467 static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
468   if (LIS.shrinkToUses(&LI)) {
469     SmallVector<LiveInterval *, 4> SplitLIs;
470     LIS.splitSeparateComponents(LI, SplitLIs);
471   }
472 }
473 
474 static void MoveDebugValues(unsigned Reg, MachineInstr *Insert,
475                             MachineBasicBlock &MBB, MachineRegisterInfo &MRI) {
476   for (auto &Op : MRI.reg_operands(Reg)) {
477     MachineInstr *MI = Op.getParent();
478     assert(MI != nullptr);
479     if (MI->isDebugValue() && MI->getParent() == &MBB)
480       MBB.splice(Insert, &MBB, MI);
481   }
482 }
483 
484 static void UpdateDebugValuesReg(unsigned Reg, unsigned NewReg,
485                                  MachineBasicBlock &MBB,
486                                  MachineRegisterInfo &MRI) {
487   for (auto &Op : MRI.reg_operands(Reg)) {
488     MachineInstr *MI = Op.getParent();
489     assert(MI != nullptr);
490     if (MI->isDebugValue() && MI->getParent() == &MBB)
491       Op.setReg(NewReg);
492   }
493 }
494 
495 /// A single-use def in the same block with no intervening memory or register
496 /// dependencies; move the def down and nest it with the current instruction.
497 static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand &Op,
498                                       MachineInstr *Def, MachineBasicBlock &MBB,
499                                       MachineInstr *Insert, LiveIntervals &LIS,
500                                       WebAssemblyFunctionInfo &MFI,
501                                       MachineRegisterInfo &MRI) {
502   LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump());
503 
504   MBB.splice(Insert, &MBB, Def);
505   MoveDebugValues(Reg, Insert, MBB, MRI);
506   LIS.handleMove(*Def);
507 
508   if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
509     // No one else is using this register for anything so we can just stackify
510     // it in place.
511     MFI.stackifyVReg(Reg);
512   } else {
513     // The register may have unrelated uses or defs; create a new register for
514     // just our one def and use so that we can stackify it.
515     unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
516     Def->getOperand(0).setReg(NewReg);
517     Op.setReg(NewReg);
518 
519     // Tell LiveIntervals about the new register.
520     LIS.createAndComputeVirtRegInterval(NewReg);
521 
522     // Tell LiveIntervals about the changes to the old register.
523     LiveInterval &LI = LIS.getInterval(Reg);
524     LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(),
525                      LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
526                      /*RemoveDeadValNo=*/true);
527 
528     MFI.stackifyVReg(NewReg);
529 
530     UpdateDebugValuesReg(Reg, NewReg, MBB, MRI);
531 
532     LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
533   }
534 
535   ImposeStackOrdering(Def);
536   return Def;
537 }
538 
539 static void CloneDebugValues(unsigned Reg, MachineInstr *Insert,
540                              unsigned TargetReg, MachineBasicBlock &MBB,
541                              MachineRegisterInfo &MRI,
542                              const WebAssemblyInstrInfo *TII) {
543   SmallPtrSet<MachineInstr *, 4> Instrs;
544   for (auto &Op : MRI.reg_operands(Reg)) {
545     MachineInstr *MI = Op.getParent();
546     assert(MI != nullptr);
547     if (MI->isDebugValue() && MI->getParent() == &MBB &&
548         Instrs.find(MI) == Instrs.end())
549       Instrs.insert(MI);
550   }
551   for (const auto &MI : Instrs) {
552     MachineInstr &Clone = TII->duplicate(MBB, Insert, *MI);
553     for (unsigned i = 0, e = Clone.getNumOperands(); i != e; ++i) {
554       MachineOperand &MO = Clone.getOperand(i);
555       if (MO.isReg() && MO.getReg() == Reg)
556         MO.setReg(TargetReg);
557     }
558     LLVM_DEBUG(dbgs() << " - - Cloned DBG_VALUE: "; Clone.dump());
559   }
560 }
561 
562 /// A trivially cloneable instruction; clone it and nest the new copy with the
563 /// current instruction.
564 static MachineInstr *RematerializeCheapDef(
565     unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
566     MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
567     WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
568     const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) {
569   LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
570   LLVM_DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
571 
572   unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
573   TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
574   Op.setReg(NewReg);
575   MachineInstr *Clone = &*std::prev(Insert);
576   LIS.InsertMachineInstrInMaps(*Clone);
577   LIS.createAndComputeVirtRegInterval(NewReg);
578   MFI.stackifyVReg(NewReg);
579   ImposeStackOrdering(Clone);
580 
581   LLVM_DEBUG(dbgs() << " - Cloned to "; Clone->dump());
582 
583   // Shrink the interval.
584   bool IsDead = MRI.use_empty(Reg);
585   if (!IsDead) {
586     LiveInterval &LI = LIS.getInterval(Reg);
587     ShrinkToUses(LI, LIS);
588     IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
589   }
590 
591   // If that was the last use of the original, delete the original.
592   // Move or clone corresponding DBG_VALUEs to the 'Insert' location.
593   if (IsDead) {
594     LLVM_DEBUG(dbgs() << " - Deleting original\n");
595     SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
596     LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
597     LIS.removeInterval(Reg);
598     LIS.RemoveMachineInstrFromMaps(Def);
599     Def.eraseFromParent();
600 
601     MoveDebugValues(Reg, &*Insert, MBB, MRI);
602     UpdateDebugValuesReg(Reg, NewReg, MBB, MRI);
603   } else {
604     CloneDebugValues(Reg, &*Insert, NewReg, MBB, MRI, TII);
605   }
606 
607   return Clone;
608 }
609 
610 /// A multiple-use def in the same block with no intervening memory or register
611 /// dependencies; move the def down, nest it with the current instruction, and
612 /// insert a tee to satisfy the rest of the uses. As an illustration, rewrite
613 /// this:
614 ///
615 ///    Reg = INST ...        // Def
616 ///    INST ..., Reg, ...    // Insert
617 ///    INST ..., Reg, ...
618 ///    INST ..., Reg, ...
619 ///
620 /// to this:
621 ///
622 ///    DefReg = INST ...     // Def (to become the new Insert)
623 ///    TeeReg, Reg = TEE_... DefReg
624 ///    INST ..., TeeReg, ... // Insert
625 ///    INST ..., Reg, ...
626 ///    INST ..., Reg, ...
627 ///
628 /// with DefReg and TeeReg stackified. This eliminates a get_local from the
629 /// resulting code.
630 static MachineInstr *MoveAndTeeForMultiUse(
631     unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
632     MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
633     MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
634   LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
635 
636   // Move Def into place.
637   MBB.splice(Insert, &MBB, Def);
638   LIS.handleMove(*Def);
639 
640   // Create the Tee and attach the registers.
641   const auto *RegClass = MRI.getRegClass(Reg);
642   unsigned TeeReg = MRI.createVirtualRegister(RegClass);
643   unsigned DefReg = MRI.createVirtualRegister(RegClass);
644   MachineOperand &DefMO = Def->getOperand(0);
645   MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
646                               TII->get(GetTeeOpcode(RegClass)), TeeReg)
647                           .addReg(Reg, RegState::Define)
648                           .addReg(DefReg, getUndefRegState(DefMO.isDead()));
649   Op.setReg(TeeReg);
650   DefMO.setReg(DefReg);
651   SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
652   SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
653 
654   MoveDebugValues(Reg, Insert, MBB, MRI);
655 
656   // Tell LiveIntervals we moved the original vreg def from Def to Tee.
657   LiveInterval &LI = LIS.getInterval(Reg);
658   LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
659   VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
660   I->start = TeeIdx;
661   ValNo->def = TeeIdx;
662   ShrinkToUses(LI, LIS);
663 
664   // Finish stackifying the new regs.
665   LIS.createAndComputeVirtRegInterval(TeeReg);
666   LIS.createAndComputeVirtRegInterval(DefReg);
667   MFI.stackifyVReg(DefReg);
668   MFI.stackifyVReg(TeeReg);
669   ImposeStackOrdering(Def);
670   ImposeStackOrdering(Tee);
671 
672   CloneDebugValues(Reg, Tee, DefReg, MBB, MRI, TII);
673   CloneDebugValues(Reg, Insert, TeeReg, MBB, MRI, TII);
674 
675   LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
676   LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
677   return Def;
678 }
679 
680 namespace {
681 /// A stack for walking the tree of instructions being built, visiting the
682 /// MachineOperands in DFS order.
683 class TreeWalkerState {
684   typedef MachineInstr::mop_iterator mop_iterator;
685   typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator;
686   typedef iterator_range<mop_reverse_iterator> RangeTy;
687   SmallVector<RangeTy, 4> Worklist;
688 
689 public:
690   explicit TreeWalkerState(MachineInstr *Insert) {
691     const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
692     if (Range.begin() != Range.end())
693       Worklist.push_back(reverse(Range));
694   }
695 
696   bool Done() const { return Worklist.empty(); }
697 
698   MachineOperand &Pop() {
699     RangeTy &Range = Worklist.back();
700     MachineOperand &Op = *Range.begin();
701     Range = drop_begin(Range, 1);
702     if (Range.begin() == Range.end())
703       Worklist.pop_back();
704     assert((Worklist.empty() ||
705             Worklist.back().begin() != Worklist.back().end()) &&
706            "Empty ranges shouldn't remain in the worklist");
707     return Op;
708   }
709 
710   /// Push Instr's operands onto the stack to be visited.
711   void PushOperands(MachineInstr *Instr) {
712     const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
713     if (Range.begin() != Range.end())
714       Worklist.push_back(reverse(Range));
715   }
716 
717   /// Some of Instr's operands are on the top of the stack; remove them and
718   /// re-insert them starting from the beginning (because we've commuted them).
719   void ResetTopOperands(MachineInstr *Instr) {
720     assert(HasRemainingOperands(Instr) &&
721            "Reseting operands should only be done when the instruction has "
722            "an operand still on the stack");
723     Worklist.back() = reverse(Instr->explicit_uses());
724   }
725 
726   /// Test whether Instr has operands remaining to be visited at the top of
727   /// the stack.
728   bool HasRemainingOperands(const MachineInstr *Instr) const {
729     if (Worklist.empty())
730       return false;
731     const RangeTy &Range = Worklist.back();
732     return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
733   }
734 
735   /// Test whether the given register is present on the stack, indicating an
736   /// operand in the tree that we haven't visited yet. Moving a definition of
737   /// Reg to a point in the tree after that would change its value.
738   ///
739   /// This is needed as a consequence of using implicit get_locals for
740   /// uses and implicit set_locals for defs.
741   bool IsOnStack(unsigned Reg) const {
742     for (const RangeTy &Range : Worklist)
743       for (const MachineOperand &MO : Range)
744         if (MO.isReg() && MO.getReg() == Reg)
745           return true;
746     return false;
747   }
748 };
749 
750 /// State to keep track of whether commuting is in flight or whether it's been
751 /// tried for the current instruction and didn't work.
752 class CommutingState {
753   /// There are effectively three states: the initial state where we haven't
754   /// started commuting anything and we don't know anything yet, the tenative
755   /// state where we've commuted the operands of the current instruction and are
756   /// revisting it, and the declined state where we've reverted the operands
757   /// back to their original order and will no longer commute it further.
758   bool TentativelyCommuting;
759   bool Declined;
760 
761   /// During the tentative state, these hold the operand indices of the commuted
762   /// operands.
763   unsigned Operand0, Operand1;
764 
765 public:
766   CommutingState() : TentativelyCommuting(false), Declined(false) {}
767 
768   /// Stackification for an operand was not successful due to ordering
769   /// constraints. If possible, and if we haven't already tried it and declined
770   /// it, commute Insert's operands and prepare to revisit it.
771   void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
772                     const WebAssemblyInstrInfo *TII) {
773     if (TentativelyCommuting) {
774       assert(!Declined &&
775              "Don't decline commuting until you've finished trying it");
776       // Commuting didn't help. Revert it.
777       TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
778       TentativelyCommuting = false;
779       Declined = true;
780     } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) {
781       Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
782       Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
783       if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
784         // Tentatively commute the operands and try again.
785         TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
786         TreeWalker.ResetTopOperands(Insert);
787         TentativelyCommuting = true;
788         Declined = false;
789       }
790     }
791   }
792 
793   /// Stackification for some operand was successful. Reset to the default
794   /// state.
795   void Reset() {
796     TentativelyCommuting = false;
797     Declined = false;
798   }
799 };
800 } // end anonymous namespace
801 
802 bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
803   LLVM_DEBUG(dbgs() << "********** Register Stackifying **********\n"
804                        "********** Function: "
805                     << MF.getName() << '\n');
806 
807   bool Changed = false;
808   MachineRegisterInfo &MRI = MF.getRegInfo();
809   WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
810   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
811   const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
812   AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
813   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
814   LiveIntervals &LIS = getAnalysis<LiveIntervals>();
815 
816   // Walk the instructions from the bottom up. Currently we don't look past
817   // block boundaries, and the blocks aren't ordered so the block visitation
818   // order isn't significant, but we may want to change this in the future.
819   for (MachineBasicBlock &MBB : MF) {
820     // Don't use a range-based for loop, because we modify the list as we're
821     // iterating over it and the end iterator may change.
822     for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
823       MachineInstr *Insert = &*MII;
824       // Don't nest anything inside an inline asm, because we don't have
825       // constraints for $push inputs.
826       if (Insert->getOpcode() == TargetOpcode::INLINEASM)
827         continue;
828 
829       // Ignore debugging intrinsics.
830       if (Insert->getOpcode() == TargetOpcode::DBG_VALUE)
831         continue;
832 
833       // Iterate through the inputs in reverse order, since we'll be pulling
834       // operands off the stack in LIFO order.
835       CommutingState Commuting;
836       TreeWalkerState TreeWalker(Insert);
837       while (!TreeWalker.Done()) {
838         MachineOperand &Op = TreeWalker.Pop();
839 
840         // We're only interested in explicit virtual register operands.
841         if (!Op.isReg())
842           continue;
843 
844         unsigned Reg = Op.getReg();
845         assert(Op.isUse() && "explicit_uses() should only iterate over uses");
846         assert(!Op.isImplicit() &&
847                "explicit_uses() should only iterate over explicit operands");
848         if (TargetRegisterInfo::isPhysicalRegister(Reg))
849           continue;
850 
851         // Identify the definition for this register at this point.
852         MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
853         if (!Def)
854           continue;
855 
856         // Don't nest an INLINE_ASM def into anything, because we don't have
857         // constraints for $pop outputs.
858         if (Def->getOpcode() == TargetOpcode::INLINEASM)
859           continue;
860 
861         // Argument instructions represent live-in registers and not real
862         // instructions.
863         if (WebAssembly::isArgument(*Def))
864           continue;
865 
866         // Decide which strategy to take. Prefer to move a single-use value
867         // over cloning it, and prefer cloning over introducing a tee.
868         // For moving, we require the def to be in the same block as the use;
869         // this makes things simpler (LiveIntervals' handleMove function only
870         // supports intra-block moves) and it's MachineSink's job to catch all
871         // the sinking opportunities anyway.
872         bool SameBlock = Def->getParent() == &MBB;
873         bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, MRI) &&
874                        !TreeWalker.IsOnStack(Reg);
875         if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
876           Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
877         } else if (ShouldRematerialize(*Def, AA, TII)) {
878           Insert =
879               RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
880                                     LIS, MFI, MRI, TII, TRI);
881         } else if (CanMove &&
882                    OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
883           Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
884                                          MRI, TII);
885         } else {
886           // We failed to stackify the operand. If the problem was ordering
887           // constraints, Commuting may be able to help.
888           if (!CanMove && SameBlock)
889             Commuting.MaybeCommute(Insert, TreeWalker, TII);
890           // Proceed to the next operand.
891           continue;
892         }
893 
894         // If the instruction we just stackified is an IMPLICIT_DEF, convert it
895         // to a constant 0 so that the def is explicit, and the push/pop
896         // correspondence is maintained.
897         if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
898           ConvertImplicitDefToConstZero(Insert, MRI, TII, MF);
899 
900         // We stackified an operand. Add the defining instruction's operands to
901         // the worklist stack now to continue to build an ever deeper tree.
902         Commuting.Reset();
903         TreeWalker.PushOperands(Insert);
904       }
905 
906       // If we stackified any operands, skip over the tree to start looking for
907       // the next instruction we can build a tree on.
908       if (Insert != &*MII) {
909         ImposeStackOrdering(&*MII);
910         MII = MachineBasicBlock::iterator(Insert).getReverse();
911         Changed = true;
912       }
913     }
914   }
915 
916   // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so
917   // that it never looks like a use-before-def.
918   if (Changed) {
919     MF.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK);
920     for (MachineBasicBlock &MBB : MF)
921       MBB.addLiveIn(WebAssembly::VALUE_STACK);
922   }
923 
924 #ifndef NDEBUG
925   // Verify that pushes and pops are performed in LIFO order.
926   SmallVector<unsigned, 0> Stack;
927   for (MachineBasicBlock &MBB : MF) {
928     for (MachineInstr &MI : MBB) {
929       if (MI.isDebugInstr())
930         continue;
931       for (MachineOperand &MO : reverse(MI.explicit_operands())) {
932         if (!MO.isReg())
933           continue;
934         unsigned Reg = MO.getReg();
935 
936         if (MFI.isVRegStackified(Reg)) {
937           if (MO.isDef())
938             Stack.push_back(Reg);
939           else
940             assert(Stack.pop_back_val() == Reg &&
941                    "Register stack pop should be paired with a push");
942         }
943       }
944     }
945     // TODO: Generalize this code to support keeping values on the stack across
946     // basic block boundaries.
947     assert(Stack.empty() &&
948            "Register stack pushes and pops should be balanced");
949   }
950 #endif
951 
952   return Changed;
953 }
954