1 //===--------------------- SIOptimizeVGPRLiveRange.cpp  -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass tries to remove unnecessary VGPR live range in divergent if-else
11 /// structure.
12 ///
13 /// When we do structurization, we usually transform a if-else into two
14 /// sucessive if-then (with a flow block to do predicate inversion). Consider a
15 /// simple case after structurization: A divergent value %a was defined before
16 /// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17 ///    bb.if:
18 ///      %a = ...
19 ///      ...
20 ///    bb.then:
21 ///      ... = op %a
22 ///      ... // %a can be dead here
23 ///    bb.flow:
24 ///      ...
25 ///    bb.else:
26 ///      ... = %a
27 ///      ...
28 ///    bb.endif
29 ///
30 ///  As register allocator has no idea of the thread-control-flow, it will just
31 ///  assume %a would be alive in the whole range of bb.then because of a later
32 ///  use in bb.else. On AMDGPU architecture, the VGPR was accessed with respect
33 ///  to exec mask. For this if-else case, the lanes active in bb.then will be
34 ///  inactive in bb.else, and vice-verse. So we are safe to say that %a was dead
35 ///  after the last use in bb.then untill the end of the block. The reason is
36 ///  the instructions in bb.then will only overwrite lanes that will never be
37 ///  accessed in bb.else.
38 ///
39 ///  This pass aims to to tell register allocator that %a is in-fact dead,
40 ///  through inserting a phi-node in bb.flow saying that %a is undef when coming
41 ///  from bb.then, and then replace the uses in the bb.else with the result of
42 ///  newly inserted phi.
43 ///
44 ///  Two key conditions must be met to ensure correctness:
45 ///  1.) The def-point should be in the same loop-level as if-else-endif to make
46 ///      sure the second loop iteration still get correct data.
47 ///  2.) There should be no further uses after the IF-ELSE region.
48 ///
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "AMDGPU.h"
53 #include "GCNSubtarget.h"
54 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
55 #include "SIMachineFunctionInfo.h"
56 #include "llvm/CodeGen/LiveVariables.h"
57 #include "llvm/CodeGen/MachineDominators.h"
58 #include "llvm/CodeGen/MachineLoopInfo.h"
59 #include "llvm/CodeGen/TargetRegisterInfo.h"
60 #include "llvm/InitializePasses.h"
61 
62 using namespace llvm;
63 
64 #define DEBUG_TYPE "si-opt-vgpr-liverange"
65 
66 namespace {
67 
68 class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
69 private:
70   const SIRegisterInfo *TRI = nullptr;
71   const SIInstrInfo *TII = nullptr;
72   LiveVariables *LV = nullptr;
73   MachineDominatorTree *MDT = nullptr;
74   const MachineLoopInfo *Loops = nullptr;
75   MachineRegisterInfo *MRI = nullptr;
76 
77 public:
78   static char ID;
79 
80   MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
81 
82   void collectElseRegionBlocks(MachineBasicBlock *Flow,
83                                MachineBasicBlock *Endif,
84                                SmallSetVector<MachineBasicBlock *, 16> &) const;
85 
86   void
87   collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
88                             MachineBasicBlock *Endif,
89                             SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
90                             SmallVectorImpl<Register> &CandidateRegs) const;
91 
92   void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
93                              SmallVectorImpl<MachineInstr *> &Uses) const;
94 
95   void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
96                                    MachineBasicBlock *Flow) const;
97 
98   void updateLiveRangeInElseRegion(
99       Register Reg, Register NewReg, MachineBasicBlock *Flow,
100       MachineBasicBlock *Endif,
101       SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
102 
103   void
104   optimizeLiveRange(Register Reg, MachineBasicBlock *If,
105                     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
106                     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
107 
108   SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
109 
110   bool runOnMachineFunction(MachineFunction &MF) override;
111 
112   StringRef getPassName() const override {
113     return "SI Optimize VGPR LiveRange";
114   }
115 
116   void getAnalysisUsage(AnalysisUsage &AU) const override {
117     AU.addRequired<LiveVariables>();
118     AU.addRequired<MachineDominatorTree>();
119     AU.addRequired<MachineLoopInfo>();
120     AU.addPreserved<LiveVariables>();
121     AU.addPreserved<MachineDominatorTree>();
122     AU.addPreserved<MachineLoopInfo>();
123     MachineFunctionPass::getAnalysisUsage(AU);
124   }
125 
126   MachineFunctionProperties getRequiredProperties() const override {
127     return MachineFunctionProperties().set(
128         MachineFunctionProperties::Property::IsSSA);
129   }
130 };
131 
132 } // end anonymous namespace
133 
134 // Check whether the MBB is a else flow block and get the branching target which
135 // is the Endif block
136 MachineBasicBlock *
137 SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
138   for (auto &BR : MBB->terminators()) {
139     if (BR.getOpcode() == AMDGPU::SI_ELSE)
140       return BR.getOperand(2).getMBB();
141   }
142   return nullptr;
143 }
144 
145 void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
146     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
147     SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
148   assert(Flow != Endif);
149 
150   MachineBasicBlock *MBB = Endif;
151   unsigned Cur = 0;
152   while (MBB) {
153     for (auto *Pred : MBB->predecessors()) {
154       if (Pred != Flow && !Blocks.contains(Pred))
155         Blocks.insert(Pred);
156     }
157 
158     if (Cur < Blocks.size())
159       MBB = Blocks[Cur++];
160     else
161       MBB = nullptr;
162   }
163 
164   LLVM_DEBUG(dbgs() << "Found Else blocks: ");
165   for (auto *MBB : Blocks)
166     LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ' ');
167   LLVM_DEBUG(dbgs() << '\n');
168 }
169 
170 /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
171 void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
172     Register Reg, MachineBasicBlock *MBB,
173     SmallVectorImpl<MachineInstr *> &Uses) const {
174   for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
175     if (UseMI.getParent() == MBB && !UseMI.isPHI())
176       Uses.push_back(&UseMI);
177   }
178 }
179 
180 /// Collect the killed registers in the ELSE region which are not alive through
181 /// the whole THEN region.
182 void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
183     MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
184     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
185     SmallVectorImpl<Register> &CandidateRegs) const {
186 
187   SmallSet<Register, 8> KillsInElse;
188 
189   for (auto *Else : ElseBlocks) {
190     for (auto &MI : Else->instrs()) {
191       if (MI.isDebugInstr())
192         continue;
193 
194       for (auto &MO : MI.operands()) {
195         if (!MO.isReg() || !MO.getReg() || MO.isDef())
196           continue;
197 
198         Register MOReg = MO.getReg();
199         // We can only optimize AGPR/VGPR virtual register
200         if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
201           continue;
202 
203         if (MO.isKill() && MO.readsReg()) {
204           LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
205           const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
206           // Make sure two conditions are met:
207           // a.) the value is defined before/in the IF block
208           // b.) should be defined in the same loop-level.
209           if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
210               Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
211             KillsInElse.insert(MOReg);
212         }
213       }
214     }
215   }
216 
217   // Check the phis in the Endif, looking for value coming from the ELSE
218   // region. Make sure the phi-use is the last use.
219   for (auto &MI : Endif->phis()) {
220     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
221       auto &MO = MI.getOperand(Idx);
222       auto *Pred = MI.getOperand(Idx + 1).getMBB();
223       if (Pred == Flow)
224         continue;
225       assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
226 
227       if (!MO.isReg() || !MO.getReg() || MO.isUndef())
228         continue;
229 
230       Register Reg = MO.getReg();
231       if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
232         continue;
233 
234       LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
235 
236       if (VI.isLiveIn(*Endif, Reg, *MRI)) {
237         LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
238                           << " as Live in Endif\n");
239         continue;
240       }
241       // Make sure two conditions are met:
242       // a.) the value is defined before/in the IF block
243       // b.) should be defined in the same loop-level.
244       const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
245       if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
246           Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
247         KillsInElse.insert(Reg);
248     }
249   }
250 
251   auto IsLiveThroughThen = [&](Register Reg) {
252     for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
253          ++I) {
254       if (!I->readsReg())
255         continue;
256       auto *UseMI = I->getParent();
257       auto *UseMBB = UseMI->getParent();
258       if (UseMBB == Flow || UseMBB == Endif) {
259         if (!UseMI->isPHI())
260           return true;
261 
262         auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
263         // The register is live through the path If->Flow or Flow->Endif.
264         // we should not optimize for such cases.
265         if ((UseMBB == Flow && IncomingMBB != If) ||
266             (UseMBB == Endif && IncomingMBB == Flow))
267           return true;
268       }
269     }
270     return false;
271   };
272 
273   for (auto Reg : KillsInElse) {
274     if (!IsLiveThroughThen(Reg))
275       CandidateRegs.push_back(Reg);
276   }
277 }
278 
279 // Re-calculate the liveness of \p Reg in the THEN-region
280 void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
281     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
282 
283   SmallPtrSet<MachineBasicBlock *, 16> PHIIncoming;
284 
285   MachineBasicBlock *ThenEntry = nullptr;
286   for (auto *Succ : If->successors()) {
287     if (Succ != Flow) {
288       ThenEntry = Succ;
289       break;
290     }
291   }
292   assert(ThenEntry && "No successor in Then region?");
293 
294   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
295   df_iterator_default_set<MachineBasicBlock *, 16> Visited;
296 
297   for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
298     if (MBB == Flow)
299       break;
300 
301     // Clear Live bit, as we will recalculate afterwards
302     LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
303                       << '\n');
304     OldVarInfo.AliveBlocks.reset(MBB->getNumber());
305   }
306 
307   // Get the blocks the Reg should be alive through
308   for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
309        ++I) {
310     auto *UseMI = I->getParent();
311     if (UseMI->isPHI() && I->readsReg()) {
312       if (Visited.contains(UseMI->getParent()))
313         PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
314     }
315   }
316 
317   Visited.clear();
318 
319   for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
320     if (MBB == Flow)
321       break;
322 
323     SmallVector<MachineInstr *> Uses;
324     // PHI instructions has been processed before.
325     findNonPHIUsesInBlock(Reg, MBB, Uses);
326 
327     if (Uses.size() == 1) {
328       LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
329                         << printMBBReference(*MBB) << '\n');
330       LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
331     } else if (Uses.size() > 1) {
332       // Process the instructions in-order
333       LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
334                         << printMBBReference(*MBB) << '\n');
335       for (MachineInstr &MI : *MBB) {
336         if (llvm::is_contained(Uses, &MI))
337           LV->HandleVirtRegUse(Reg, MBB, MI);
338       }
339     }
340 
341     // Mark Reg alive through the block if this is a PHI incoming block
342     if (PHIIncoming.contains(MBB))
343       LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
344                                   MBB);
345   }
346 
347   // Set the isKilled flag if we get new Kills in the THEN region.
348   for (auto *MI : OldVarInfo.Kills) {
349     if (Visited.contains(MI->getParent()))
350       MI->addRegisterKilled(Reg, TRI);
351   }
352 }
353 
354 void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
355     Register Reg, Register NewReg, MachineBasicBlock *Flow,
356     MachineBasicBlock *Endif,
357     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
358   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
359   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
360 
361   // Transfer aliveBlocks from Reg to NewReg
362   for (auto *MBB : ElseBlocks) {
363     unsigned BBNum = MBB->getNumber();
364     if (OldVarInfo.AliveBlocks.test(BBNum)) {
365       NewVarInfo.AliveBlocks.set(BBNum);
366       LLVM_DEBUG(dbgs() << "Removing ALiveBlock " << printMBBReference(*MBB)
367                         << '\n');
368       OldVarInfo.AliveBlocks.reset(BBNum);
369     }
370   }
371 
372   // Transfer the possible Kills in ElseBlocks from Reg to NewReg
373   auto I = OldVarInfo.Kills.begin();
374   while (I != OldVarInfo.Kills.end()) {
375     if (ElseBlocks.contains((*I)->getParent())) {
376       NewVarInfo.Kills.push_back(*I);
377       I = OldVarInfo.Kills.erase(I);
378     } else {
379       ++I;
380     }
381   }
382 }
383 
384 void SIOptimizeVGPRLiveRange::optimizeLiveRange(
385     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
386     MachineBasicBlock *Endif,
387     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
388   // Insert a new PHI, marking the value from the THEN region being
389   // undef.
390   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
391   const auto *RC = MRI->getRegClass(Reg);
392   Register NewReg = MRI->createVirtualRegister(RC);
393   Register UndefReg = MRI->createVirtualRegister(RC);
394   MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
395                                     TII->get(TargetOpcode::PHI), NewReg);
396   for (auto *Pred : Flow->predecessors()) {
397     if (Pred == If)
398       PHI.addReg(Reg).addMBB(Pred);
399     else
400       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
401   }
402 
403   // Replace all uses in the ELSE region or the PHIs in ENDIF block
404   for (auto I = MRI->use_begin(Reg), E = MRI->use_end(); I != E;) {
405     MachineOperand &O = *I;
406     // This is a little bit tricky, the setReg() will update the linked list,
407     // so we have to increment the iterator before setReg() to avoid skipping
408     // some uses.
409     ++I;
410     auto *UseMI = O.getParent();
411     auto *UseBlock = UseMI->getParent();
412     // Replace uses in Endif block
413     if (UseBlock == Endif) {
414       assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
415       O.setReg(NewReg);
416       continue;
417     }
418 
419     // Replace uses in Else region
420     if (ElseBlocks.contains(UseBlock))
421       O.setReg(NewReg);
422   }
423 
424   // The optimized Reg is not alive through Flow blocks anymore.
425   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
426   OldVarInfo.AliveBlocks.reset(Flow->getNumber());
427 
428   updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
429   updateLiveRangeInThenRegion(Reg, If, Flow);
430 }
431 
432 char SIOptimizeVGPRLiveRange::ID = 0;
433 
434 INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
435                       "SI Optimize VGPR LiveRange", false, false)
436 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
437 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
438 INITIALIZE_PASS_DEPENDENCY(LiveVariables)
439 INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
440                     "SI Optimize VGPR LiveRange", false, false)
441 
442 char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
443 
444 FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
445   return new SIOptimizeVGPRLiveRange();
446 }
447 
448 bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
449 
450   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
451   TII = ST.getInstrInfo();
452   TRI = &TII->getRegisterInfo();
453   MDT = &getAnalysis<MachineDominatorTree>();
454   Loops = &getAnalysis<MachineLoopInfo>();
455   LV = &getAnalysis<LiveVariables>();
456   MRI = &MF.getRegInfo();
457 
458   if (skipFunction(MF.getFunction()))
459     return false;
460 
461   bool MadeChange = false;
462 
463   // TODO: we need to think about the order of visiting the blocks to get
464   // optimal result for nesting if-else cases.
465   for (MachineBasicBlock &MBB : MF) {
466     for (auto &MI : MBB.terminators()) {
467       // Detect the if-else blocks
468       if (MI.getOpcode() == AMDGPU::SI_IF) {
469         MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
470         auto *Endif = getElseTarget(IfTarget);
471         if (!Endif)
472           continue;
473 
474         SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
475         SmallVector<Register> CandidateRegs;
476 
477         LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
478                           << printMBBReference(MBB) << ' '
479                           << printMBBReference(*IfTarget) << ' '
480                           << printMBBReference(*Endif) << '\n');
481 
482         // Collect all the blocks in the ELSE region
483         collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
484 
485         // Collect the registers can be optimized
486         collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
487                                   CandidateRegs);
488         MadeChange |= !CandidateRegs.empty();
489         // Now we are safe to optimize.
490         for (auto Reg : CandidateRegs)
491           optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
492       }
493     }
494   }
495 
496   return MadeChange;
497 }
498