1 //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Computations in WWM can overwrite values in inactive channels for
12 /// variables that the register allocator thinks are dead. This pass adds fake
13 /// uses of those variables to their def(s) to make sure that they aren't
14 /// overwritten.
15 ///
16 /// As an example, consider this snippet:
17 /// %vgpr0 = V_MOV_B32_e32 0.0
18 /// if (...) {
19 ///   %vgpr1 = ...
20 ///   %vgpr2 = WWM killed %vgpr1
21 ///   ... = killed %vgpr2
22 ///   %vgpr0 = V_MOV_B32_e32 1.0
23 /// }
24 /// ... = %vgpr0
25 ///
26 /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
27 /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
28 /// writing %vgpr1 would only write to channels that would be clobbered by the
29 /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
30 /// it would clobber even the inactive channels for which the if-condition is
31 /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
32 /// of %vgpr0 to its def to make sure they aren't allocated to the
33 /// same register.
34 ///
35 /// In general, we need to figure out what registers might have their inactive
36 /// channels which are eventually used accidentally clobbered by a WWM
37 /// instruction. We do that by spotting three separate cases of registers:
38 ///
39 /// 1. A "then phi": the value resulting from phi elimination of a phi node at
40 ///    the end of an if..endif. If there is WWM code in the "then", then we
41 ///    make the def at the end of the "then" branch a partial def by adding an
42 ///    implicit use of the register.
43 ///
44 /// 2. A "loop exit register": a value written inside a loop but used outside the
45 ///    loop, where there is WWM code inside the loop (the case in the example
46 ///    above). We add an implicit_def of the register in the loop pre-header,
47 ///    and make the original def a partial def by adding an implicit use of the
48 ///    register.
49 ///
50 /// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
51 ///    in a loop header. If there is WWM code inside the loop, then we make all
52 ///    defs inside the loop partial defs by adding an implicit use of the
53 ///    register on each one.
54 ///
55 /// Note that we do not need to consider an if..else..endif phi. We only need to
56 /// consider non-uniform control flow, and control flow structurization would
57 /// have transformed a non-uniform if..else..endif into two if..endifs.
58 ///
59 /// The analysis to detect these cases relies on a property of the MIR
60 /// arising from this pass running straight after PHIElimination and before any
61 /// coalescing: that any virtual register with more than one definition must be
62 /// the new register added to lower a phi node by PHIElimination.
63 ///
64 /// FIXME: We should detect whether a register in one of the above categories is
65 /// already live at the WWM code before deciding to add the implicit uses to
66 /// synthesize its liveness.
67 ///
68 /// FIXME: I believe this whole scheme may be flawed due to the possibility of
69 /// the register allocator doing live interval splitting.
70 ///
71 //===----------------------------------------------------------------------===//
72 
73 #include "AMDGPU.h"
74 #include "AMDGPUSubtarget.h"
75 #include "SIInstrInfo.h"
76 #include "SIRegisterInfo.h"
77 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
78 #include "llvm/ADT/DepthFirstIterator.h"
79 #include "llvm/ADT/SparseBitVector.h"
80 #include "llvm/CodeGen/LiveIntervals.h"
81 #include "llvm/CodeGen/MachineDominators.h"
82 #include "llvm/CodeGen/MachineFunctionPass.h"
83 #include "llvm/CodeGen/MachineLoopInfo.h"
84 #include "llvm/CodeGen/Passes.h"
85 #include "llvm/CodeGen/TargetRegisterInfo.h"
86 
87 using namespace llvm;
88 
89 #define DEBUG_TYPE "si-fix-wwm-liveness"
90 
91 namespace {
92 
93 class SIFixWWMLiveness : public MachineFunctionPass {
94 private:
95   MachineDominatorTree *DomTree;
96   MachineLoopInfo *LoopInfo;
97   LiveIntervals *LIS = nullptr;
98   const SIInstrInfo *TII;
99   const SIRegisterInfo *TRI;
100   MachineRegisterInfo *MRI;
101 
102   std::vector<MachineInstr *> WWMs;
103   std::vector<MachineOperand *> ThenDefs;
104   std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs;
105   std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs;
106 
107 public:
108   static char ID;
109 
SIFixWWMLiveness()110   SIFixWWMLiveness() : MachineFunctionPass(ID) {
111     initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
112   }
113 
114   bool runOnMachineFunction(MachineFunction &MF) override;
115 
getPassName() const116   StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
117 
getAnalysisUsage(AnalysisUsage & AU) const118   void getAnalysisUsage(AnalysisUsage &AU) const override {
119     AU.addRequiredID(MachineDominatorsID);
120     AU.addRequiredID(MachineLoopInfoID);
121     // Should preserve the same set that TwoAddressInstructions does.
122     AU.addPreserved<SlotIndexes>();
123     AU.addPreserved<LiveIntervals>();
124     AU.addPreservedID(LiveVariablesID);
125     AU.addPreservedID(MachineLoopInfoID);
126     AU.addPreservedID(MachineDominatorsID);
127     AU.setPreservesCFG();
128     MachineFunctionPass::getAnalysisUsage(AU);
129   }
130 
131 private:
132   void processDef(MachineOperand &DefOpnd);
133   bool processThenDef(MachineOperand *DefOpnd);
134   bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop);
135   bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop);
136 };
137 
138 } // End anonymous namespace.
139 
140 INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE,
141                 "SI fix WWM liveness", false, false)
142 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
143 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
144 INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE,
145                 "SI fix WWM liveness", false, false)
146 
147 char SIFixWWMLiveness::ID = 0;
148 
149 char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
150 
createSIFixWWMLivenessPass()151 FunctionPass *llvm::createSIFixWWMLivenessPass() {
152   return new SIFixWWMLiveness();
153 }
154 
runOnMachineFunction(MachineFunction & MF)155 bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
156   LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n");
157   bool Modified = false;
158 
159   // This doesn't actually need LiveIntervals, but we can preserve them.
160   LIS = getAnalysisIfAvailable<LiveIntervals>();
161 
162   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
163 
164   TII = ST.getInstrInfo();
165   TRI = &TII->getRegisterInfo();
166   MRI = &MF.getRegInfo();
167 
168   DomTree = &getAnalysis<MachineDominatorTree>();
169   LoopInfo = &getAnalysis<MachineLoopInfo>();
170 
171   // Scan the function to find the WWM sections and the candidate registers for
172   // having liveness modified.
173   for (MachineBasicBlock &MBB : MF) {
174     for (MachineInstr &MI : MBB) {
175       if (MI.getOpcode() == AMDGPU::EXIT_WWM)
176         WWMs.push_back(&MI);
177       else {
178         for (MachineOperand &DefOpnd : MI.defs()) {
179           if (DefOpnd.isReg()) {
180             unsigned Reg = DefOpnd.getReg();
181             if (TRI->isVGPR(*MRI, Reg))
182               processDef(DefOpnd);
183           }
184         }
185       }
186     }
187   }
188   if (!WWMs.empty()) {
189     // Synthesize liveness over WWM sections as required.
190     for (auto ThenDef : ThenDefs)
191       Modified |= processThenDef(ThenDef);
192     for (auto LoopExitDef : LoopExitDefs)
193       Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second);
194     for (auto LoopPhiDef : LoopPhiDefs)
195       Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second);
196   }
197 
198   WWMs.clear();
199   ThenDefs.clear();
200   LoopExitDefs.clear();
201   LoopPhiDefs.clear();
202 
203   return Modified;
204 }
205 
206 // During the function scan, process an operand that defines a VGPR.
207 // This categorizes the register and puts it in the appropriate list for later
208 // use when processing a WWM section.
processDef(MachineOperand & DefOpnd)209 void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) {
210   unsigned Reg = DefOpnd.getReg();
211   // Get all the defining instructions. For convenience, make Defs[0] the def
212   // we are on now.
213   SmallVector<const MachineInstr *, 4> Defs;
214   Defs.push_back(DefOpnd.getParent());
215   for (auto &MI : MRI->def_instructions(Reg)) {
216     if (&MI != DefOpnd.getParent())
217       Defs.push_back(&MI);
218   }
219   // Check whether this def dominates all the others. If not, ignore this def.
220   // Either it is going to be processed when the scan encounters its other def
221   // that dominates all defs, or there is no def that dominates all others.
222   // The latter case is an eliminated phi from an if..else..endif or similar,
223   // which must be for uniform control flow so can be ignored.
224   // Because this pass runs shortly after PHIElimination, we assume that any
225   // multi-def register is a lowered phi, and thus has each def in a separate
226   // basic block.
227   for (unsigned I = 1; I != Defs.size(); ++I) {
228     if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent()))
229       return;
230   }
231   // Check for the case of an if..endif lowered phi: It has two defs, one
232   // dominates the other, and there is a single use in a successor of the
233   // dominant def.
234   // Later we will spot any WWM code inside
235   // the "then" clause and turn the second def into a partial def so its
236   // liveness goes through the WWM code in the "then" clause.
237   if (Defs.size() == 2) {
238     auto DomDefBlock = Defs[0]->getParent();
239     if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) {
240       auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
241       for (auto Succ : DomDefBlock->successors()) {
242         if (Succ == UseBlock) {
243           LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n");
244           ThenDefs.push_back(&DefOpnd);
245           return;
246         }
247       }
248     }
249   }
250   // Check for the case of a non-lowered-phi register (single def) that exits
251   // a loop, that is, it has a use that is outside a loop that the def is
252   // inside. We find the outermost loop that the def is inside but a use is
253   // outside. Later we will spot any WWM code inside that loop and then make
254   // the def a partial def so its liveness goes round the loop and through the
255   // WWM code.
256   if (Defs.size() == 1) {
257     auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent());
258     if (!Loop)
259       return;
260     bool IsLoopExit = false;
261     for (auto &Use : MRI->use_instructions(Reg)) {
262       auto UseBlock = Use.getParent();
263       if (Loop->contains(UseBlock))
264         continue;
265       IsLoopExit = true;
266       while (auto Parent = Loop->getParentLoop()) {
267         if (Parent->contains(UseBlock))
268           break;
269         Loop = Parent;
270       }
271     }
272     if (!IsLoopExit)
273       return;
274     LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
275         << " is a loop exit reg with loop header at "
276         << "bb." << Loop->getHeader()->getNumber() << "\n");
277     LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>(
278             &DefOpnd, Loop));
279     return;
280   }
281   // Check for the case of a lowered single-preheader-loop phi, that is, a
282   // multi-def register where the dominating def is in the loop pre-header and
283   // all other defs are in backedges. Later we will spot any WWM code inside
284   // that loop and then make the backedge defs partial defs so the liveness
285   // goes through the WWM code.
286   // Note that we are ignoring multi-preheader loops on the basis that the
287   // structurizer does not allow that for non-uniform loops.
288   // There must be a single use in the loop header.
289   if (!MRI->hasOneUse(Reg))
290     return;
291   auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
292   auto Loop = LoopInfo->getLoopFor(UseBlock);
293   if (!Loop || Loop->getHeader() != UseBlock
294       || Loop->contains(Defs[0]->getParent())) {
295     LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
296         << " is multi-def but single use not in loop header\n");
297     return;
298   }
299   for (unsigned I = 1; I != Defs.size(); ++I) {
300     if (!Loop->contains(Defs[I]->getParent()))
301       return;
302   }
303   LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
304       << " is a loop phi reg with loop header at "
305       << "bb." << Loop->getHeader()->getNumber() << "\n");
306   LoopPhiDefs.push_back(
307       std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop));
308 }
309 
310 // Process a then phi def: It has two defs, one dominates the other, and there
311 // is a single use in a successor of the dominant def. Here we spot any WWM
312 // code inside the "then" clause and turn the second def into a partial def so
313 // its liveness goes through the WWM code in the "then" clause.
processThenDef(MachineOperand * DefOpnd)314 bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) {
315   LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent());
316   if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
317     // Ignore if dominating def is undef.
318     LLVM_DEBUG(dbgs() << "  ignoring as dominating def is undef\n");
319     return false;
320   }
321   unsigned Reg = DefOpnd->getReg();
322   // Get the use block, which is the endif block.
323   auto UseBlock = MRI->use_instr_begin(Reg)->getParent();
324   // Check whether there is WWM code inside the then branch. The WWM code must
325   // be dominated by the if but not dominated by the endif.
326   bool ContainsWWM = false;
327   for (auto WWM : WWMs) {
328     if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent())
329         && !DomTree->dominates(UseBlock, WWM->getParent())) {
330       LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
331       ContainsWWM = true;
332       break;
333     }
334   }
335   if (!ContainsWWM)
336     return false;
337   // Get the other def.
338   MachineInstr *OtherDef = nullptr;
339   for (auto &MI : MRI->def_instructions(Reg)) {
340     if (&MI != DefOpnd->getParent())
341       OtherDef = &MI;
342   }
343   // Make it a partial def.
344   OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
345   LLVM_DEBUG(dbgs() << *OtherDef);
346   return true;
347 }
348 
349 // Process a loop exit def, that is, a register with a single use in a loop
350 // that has a use outside the loop.  Here we spot any WWM code inside that loop
351 // and then make the def a partial def so its liveness goes round the loop and
352 // through the WWM code.
processLoopExitDef(MachineOperand * DefOpnd,MachineLoop * Loop)353 bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd,
354       MachineLoop *Loop) {
355   LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent());
356   // Check whether there is WWM code inside the loop.
357   bool ContainsWWM = false;
358   for (auto WWM : WWMs) {
359     if (Loop->contains(WWM->getParent())) {
360       LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
361       ContainsWWM = true;
362       break;
363     }
364   }
365   if (!ContainsWWM)
366     return false;
367   unsigned Reg = DefOpnd->getReg();
368   // Add a new implicit_def in loop preheader(s).
369   for (auto Pred : Loop->getHeader()->predecessors()) {
370     if (!Loop->contains(Pred)) {
371       auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(),
372           TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
373       LLVM_DEBUG(dbgs() << *ImplicitDef);
374       (void)ImplicitDef;
375     }
376   }
377   // Make the original def partial.
378   DefOpnd->getParent()->addOperand(MachineOperand::CreateReg(
379           Reg, false, /*isImp=*/true));
380   LLVM_DEBUG(dbgs() << *DefOpnd->getParent());
381   return true;
382 }
383 
384 // Process a loop phi def, that is, a multi-def register where the dominating
385 // def is in the loop pre-header and all other defs are in backedges. Here we
386 // spot any WWM code inside that loop and then make the backedge defs partial
387 // defs so the liveness goes through the WWM code.
processLoopPhiDef(MachineOperand * DefOpnd,MachineLoop * Loop)388 bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd,
389       MachineLoop *Loop) {
390   LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent());
391   // Check whether there is WWM code inside the loop.
392   bool ContainsWWM = false;
393   for (auto WWM : WWMs) {
394     if (Loop->contains(WWM->getParent())) {
395       LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
396       ContainsWWM = true;
397       break;
398     }
399   }
400   if (!ContainsWWM)
401     return false;
402   unsigned Reg = DefOpnd->getReg();
403   // Remove kill mark from uses.
404   for (auto &Use : MRI->use_operands(Reg))
405     Use.setIsKill(false);
406   // Make all defs except the dominating one partial defs.
407   SmallVector<MachineInstr *, 4> Defs;
408   for (auto &Def : MRI->def_instructions(Reg))
409     Defs.push_back(&Def);
410   for (auto Def : Defs) {
411     if (DefOpnd->getParent() == Def)
412       continue;
413     Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
414     LLVM_DEBUG(dbgs() << *Def);
415   }
416   return true;
417 }
418 
419