1208332deSRuiling Song //===--------------------- SIOptimizeVGPRLiveRange.cpp  -------------------===//
2208332deSRuiling Song //
3208332deSRuiling Song // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4208332deSRuiling Song // See https://llvm.org/LICENSE.txt for license information.
5208332deSRuiling Song // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6208332deSRuiling Song //
7208332deSRuiling Song //===----------------------------------------------------------------------===//
8208332deSRuiling Song //
9208332deSRuiling Song /// \file
10ad2c66ecSSebastian Neubauer /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11ad2c66ecSSebastian Neubauer /// structures and waterfall loops.
12208332deSRuiling Song ///
13ad2c66ecSSebastian Neubauer /// When we do structurization, we usually transform an if-else into two
14d1f45ed5SNeubauer, Sebastian /// successive if-then (with a flow block to do predicate inversion). Consider a
15208332deSRuiling Song /// simple case after structurization: A divergent value %a was defined before
16208332deSRuiling Song /// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17208332deSRuiling Song ///    bb.if:
18208332deSRuiling Song ///      %a = ...
19208332deSRuiling Song ///      ...
20208332deSRuiling Song ///    bb.then:
21208332deSRuiling Song ///      ... = op %a
22208332deSRuiling Song ///      ... // %a can be dead here
23208332deSRuiling Song ///    bb.flow:
24208332deSRuiling Song ///      ...
25208332deSRuiling Song ///    bb.else:
26208332deSRuiling Song ///      ... = %a
27208332deSRuiling Song ///      ...
28208332deSRuiling Song ///    bb.endif
29208332deSRuiling Song ///
30208332deSRuiling Song ///  As register allocator has no idea of the thread-control-flow, it will just
31208332deSRuiling Song ///  assume %a would be alive in the whole range of bb.then because of a later
32ad2c66ecSSebastian Neubauer ///  use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33208332deSRuiling Song ///  to exec mask. For this if-else case, the lanes active in bb.then will be
34ad2c66ecSSebastian Neubauer ///  inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35ad2c66ecSSebastian Neubauer ///  after the last use in bb.then until the end of the block. The reason is
36208332deSRuiling Song ///  the instructions in bb.then will only overwrite lanes that will never be
37208332deSRuiling Song ///  accessed in bb.else.
38208332deSRuiling Song ///
39208332deSRuiling Song ///  This pass aims to to tell register allocator that %a is in-fact dead,
40208332deSRuiling Song ///  through inserting a phi-node in bb.flow saying that %a is undef when coming
41208332deSRuiling Song ///  from bb.then, and then replace the uses in the bb.else with the result of
42208332deSRuiling Song ///  newly inserted phi.
43208332deSRuiling Song ///
44208332deSRuiling Song ///  Two key conditions must be met to ensure correctness:
45208332deSRuiling Song ///  1.) The def-point should be in the same loop-level as if-else-endif to make
46208332deSRuiling Song ///      sure the second loop iteration still get correct data.
47208332deSRuiling Song ///  2.) There should be no further uses after the IF-ELSE region.
48208332deSRuiling Song ///
49ad2c66ecSSebastian Neubauer ///
50ad2c66ecSSebastian Neubauer /// Waterfall loops get inserted around instructions that use divergent values
51ad2c66ecSSebastian Neubauer /// but can only be executed with a uniform value. For example an indirect call
52ad2c66ecSSebastian Neubauer /// to a divergent address:
53ad2c66ecSSebastian Neubauer ///    bb.start:
54ad2c66ecSSebastian Neubauer ///      %a = ...
55ad2c66ecSSebastian Neubauer ///      %fun = ...
56ad2c66ecSSebastian Neubauer ///      ...
57ad2c66ecSSebastian Neubauer ///    bb.loop:
58ad2c66ecSSebastian Neubauer ///      call %fun (%a)
59ad2c66ecSSebastian Neubauer ///      ... // %a can be dead here
60ad2c66ecSSebastian Neubauer ///      loop %bb.loop
61ad2c66ecSSebastian Neubauer ///
62ad2c66ecSSebastian Neubauer ///  The loop block is executed multiple times, but it is run exactly once for
63ad2c66ecSSebastian Neubauer ///  each active lane. Similar to the if-else case, the register allocator
64ad2c66ecSSebastian Neubauer ///  assumes that %a is live throughout the loop as it is used again in the next
65ad2c66ecSSebastian Neubauer ///  iteration. If %a is a VGPR that is unused after the loop, it does not need
66ad2c66ecSSebastian Neubauer ///  to be live after its last use in the loop block. By inserting a phi-node at
67ad2c66ecSSebastian Neubauer ///  the start of bb.loop that is undef when coming from bb.loop, the register
68ad2c66ecSSebastian Neubauer ///  allocation knows that the value of %a does not need to be preserved through
69ad2c66ecSSebastian Neubauer ///  iterations of the loop.
70ad2c66ecSSebastian Neubauer ///
71208332deSRuiling Song //
72208332deSRuiling Song //===----------------------------------------------------------------------===//
73208332deSRuiling Song 
74208332deSRuiling Song #include "AMDGPU.h"
75208332deSRuiling Song #include "GCNSubtarget.h"
76208332deSRuiling Song #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
77208332deSRuiling Song #include "SIMachineFunctionInfo.h"
78208332deSRuiling Song #include "llvm/CodeGen/LiveVariables.h"
79208332deSRuiling Song #include "llvm/CodeGen/MachineDominators.h"
80208332deSRuiling Song #include "llvm/CodeGen/MachineLoopInfo.h"
81208332deSRuiling Song #include "llvm/CodeGen/TargetRegisterInfo.h"
82208332deSRuiling Song #include "llvm/InitializePasses.h"
83208332deSRuiling Song 
84208332deSRuiling Song using namespace llvm;
85208332deSRuiling Song 
86208332deSRuiling Song #define DEBUG_TYPE "si-opt-vgpr-liverange"
87208332deSRuiling Song 
88208332deSRuiling Song namespace {
89208332deSRuiling Song 
90208332deSRuiling Song class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
91208332deSRuiling Song private:
92208332deSRuiling Song   const SIRegisterInfo *TRI = nullptr;
93208332deSRuiling Song   const SIInstrInfo *TII = nullptr;
94208332deSRuiling Song   LiveVariables *LV = nullptr;
95208332deSRuiling Song   MachineDominatorTree *MDT = nullptr;
96208332deSRuiling Song   const MachineLoopInfo *Loops = nullptr;
97208332deSRuiling Song   MachineRegisterInfo *MRI = nullptr;
98208332deSRuiling Song 
99208332deSRuiling Song public:
100208332deSRuiling Song   static char ID;
101208332deSRuiling Song 
102208332deSRuiling Song   MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
103208332deSRuiling Song 
104208332deSRuiling Song   void collectElseRegionBlocks(MachineBasicBlock *Flow,
105208332deSRuiling Song                                MachineBasicBlock *Endif,
106208332deSRuiling Song                                SmallSetVector<MachineBasicBlock *, 16> &) const;
107208332deSRuiling Song 
108208332deSRuiling Song   void
109208332deSRuiling Song   collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
110208332deSRuiling Song                             MachineBasicBlock *Endif,
111208332deSRuiling Song                             SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
112208332deSRuiling Song                             SmallVectorImpl<Register> &CandidateRegs) const;
113208332deSRuiling Song 
114ad2c66ecSSebastian Neubauer   void collectWaterfallCandidateRegisters(
1151f52d02cSCarl Ritson       MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
1161f52d02cSCarl Ritson       SmallSetVector<Register, 16> &CandidateRegs,
1171f52d02cSCarl Ritson       SmallSetVector<MachineBasicBlock *, 2> &Blocks,
1181f52d02cSCarl Ritson       SmallVectorImpl<MachineInstr *> &Instructions) const;
119ad2c66ecSSebastian Neubauer 
120208332deSRuiling Song   void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
121208332deSRuiling Song                              SmallVectorImpl<MachineInstr *> &Uses) const;
122208332deSRuiling Song 
123208332deSRuiling Song   void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
124208332deSRuiling Song                                    MachineBasicBlock *Flow) const;
125208332deSRuiling Song 
126208332deSRuiling Song   void updateLiveRangeInElseRegion(
127208332deSRuiling Song       Register Reg, Register NewReg, MachineBasicBlock *Flow,
128208332deSRuiling Song       MachineBasicBlock *Endif,
129208332deSRuiling Song       SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
130208332deSRuiling Song 
131208332deSRuiling Song   void
132208332deSRuiling Song   optimizeLiveRange(Register Reg, MachineBasicBlock *If,
133208332deSRuiling Song                     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
134208332deSRuiling Song                     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
135208332deSRuiling Song 
1361f52d02cSCarl Ritson   void optimizeWaterfallLiveRange(
1371f52d02cSCarl Ritson       Register Reg, MachineBasicBlock *LoopHeader,
1381f52d02cSCarl Ritson       SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks,
1391f52d02cSCarl Ritson       SmallVectorImpl<MachineInstr *> &Instructions) const;
140ad2c66ecSSebastian Neubauer 
SIOptimizeVGPRLiveRange()141208332deSRuiling Song   SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
142208332deSRuiling Song 
143208332deSRuiling Song   bool runOnMachineFunction(MachineFunction &MF) override;
144208332deSRuiling Song 
getPassName() const145208332deSRuiling Song   StringRef getPassName() const override {
146208332deSRuiling Song     return "SI Optimize VGPR LiveRange";
147208332deSRuiling Song   }
148208332deSRuiling Song 
getAnalysisUsage(AnalysisUsage & AU) const149208332deSRuiling Song   void getAnalysisUsage(AnalysisUsage &AU) const override {
150208332deSRuiling Song     AU.addRequired<LiveVariables>();
151208332deSRuiling Song     AU.addRequired<MachineDominatorTree>();
152208332deSRuiling Song     AU.addRequired<MachineLoopInfo>();
153208332deSRuiling Song     AU.addPreserved<LiveVariables>();
154208332deSRuiling Song     AU.addPreserved<MachineDominatorTree>();
155208332deSRuiling Song     AU.addPreserved<MachineLoopInfo>();
156208332deSRuiling Song     MachineFunctionPass::getAnalysisUsage(AU);
157208332deSRuiling Song   }
158208332deSRuiling Song 
getRequiredProperties() const159208332deSRuiling Song   MachineFunctionProperties getRequiredProperties() const override {
160208332deSRuiling Song     return MachineFunctionProperties().set(
161208332deSRuiling Song         MachineFunctionProperties::Property::IsSSA);
162208332deSRuiling Song   }
1634fc18de3SMatt Arsenault 
getClearedProperties() const1644fc18de3SMatt Arsenault   MachineFunctionProperties getClearedProperties() const override {
1654fc18de3SMatt Arsenault     return MachineFunctionProperties().set(
1664fc18de3SMatt Arsenault         MachineFunctionProperties::Property::NoPHIs);
1674fc18de3SMatt Arsenault   }
168208332deSRuiling Song };
169208332deSRuiling Song 
170208332deSRuiling Song } // end anonymous namespace
171208332deSRuiling Song 
172208332deSRuiling Song // Check whether the MBB is a else flow block and get the branching target which
173208332deSRuiling Song // is the Endif block
174208332deSRuiling Song MachineBasicBlock *
getElseTarget(MachineBasicBlock * MBB) const175208332deSRuiling Song SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
176208332deSRuiling Song   for (auto &BR : MBB->terminators()) {
177208332deSRuiling Song     if (BR.getOpcode() == AMDGPU::SI_ELSE)
178208332deSRuiling Song       return BR.getOperand(2).getMBB();
179208332deSRuiling Song   }
180208332deSRuiling Song   return nullptr;
181208332deSRuiling Song }
182208332deSRuiling Song 
collectElseRegionBlocks(MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & Blocks) const183208332deSRuiling Song void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
184208332deSRuiling Song     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
185208332deSRuiling Song     SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
186208332deSRuiling Song   assert(Flow != Endif);
187208332deSRuiling Song 
188208332deSRuiling Song   MachineBasicBlock *MBB = Endif;
189208332deSRuiling Song   unsigned Cur = 0;
190208332deSRuiling Song   while (MBB) {
191208332deSRuiling Song     for (auto *Pred : MBB->predecessors()) {
192208332deSRuiling Song       if (Pred != Flow && !Blocks.contains(Pred))
193208332deSRuiling Song         Blocks.insert(Pred);
194208332deSRuiling Song     }
195208332deSRuiling Song 
196208332deSRuiling Song     if (Cur < Blocks.size())
197208332deSRuiling Song       MBB = Blocks[Cur++];
198208332deSRuiling Song     else
199208332deSRuiling Song       MBB = nullptr;
200208332deSRuiling Song   }
201208332deSRuiling Song 
202b650778dSJordan Rupprecht   LLVM_DEBUG({
203b650778dSJordan Rupprecht     dbgs() << "Found Else blocks: ";
204208332deSRuiling Song     for (auto *MBB : Blocks)
205b650778dSJordan Rupprecht       dbgs() << printMBBReference(*MBB) << ' ';
206b650778dSJordan Rupprecht     dbgs() << '\n';
207b650778dSJordan Rupprecht   });
208208332deSRuiling Song }
209208332deSRuiling Song 
210208332deSRuiling Song /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
findNonPHIUsesInBlock(Register Reg,MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Uses) const211208332deSRuiling Song void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
212208332deSRuiling Song     Register Reg, MachineBasicBlock *MBB,
213208332deSRuiling Song     SmallVectorImpl<MachineInstr *> &Uses) const {
214208332deSRuiling Song   for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
215208332deSRuiling Song     if (UseMI.getParent() == MBB && !UseMI.isPHI())
216208332deSRuiling Song       Uses.push_back(&UseMI);
217208332deSRuiling Song   }
218208332deSRuiling Song }
219208332deSRuiling Song 
220208332deSRuiling Song /// Collect the killed registers in the ELSE region which are not alive through
221208332deSRuiling Song /// the whole THEN region.
collectCandidateRegisters(MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks,SmallVectorImpl<Register> & CandidateRegs) const222208332deSRuiling Song void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
223208332deSRuiling Song     MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
224208332deSRuiling Song     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
225208332deSRuiling Song     SmallVectorImpl<Register> &CandidateRegs) const {
226208332deSRuiling Song 
227208332deSRuiling Song   SmallSet<Register, 8> KillsInElse;
228208332deSRuiling Song 
229208332deSRuiling Song   for (auto *Else : ElseBlocks) {
230208332deSRuiling Song     for (auto &MI : Else->instrs()) {
231208332deSRuiling Song       if (MI.isDebugInstr())
232208332deSRuiling Song         continue;
233208332deSRuiling Song 
234208332deSRuiling Song       for (auto &MO : MI.operands()) {
235208332deSRuiling Song         if (!MO.isReg() || !MO.getReg() || MO.isDef())
236208332deSRuiling Song           continue;
237208332deSRuiling Song 
238208332deSRuiling Song         Register MOReg = MO.getReg();
239208332deSRuiling Song         // We can only optimize AGPR/VGPR virtual register
240208332deSRuiling Song         if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
241208332deSRuiling Song           continue;
242208332deSRuiling Song 
243b642d01fSSebastian Neubauer         if (MO.readsReg()) {
244208332deSRuiling Song           LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
245208332deSRuiling Song           const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
246208332deSRuiling Song           // Make sure two conditions are met:
247208332deSRuiling Song           // a.) the value is defined before/in the IF block
248208332deSRuiling Song           // b.) should be defined in the same loop-level.
249208332deSRuiling Song           if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
250b642d01fSSebastian Neubauer               Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
251b642d01fSSebastian Neubauer             // Check if the register is live into the endif block. If not,
252b642d01fSSebastian Neubauer             // consider it killed in the else region.
253b642d01fSSebastian Neubauer             LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
254b642d01fSSebastian Neubauer             if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
255208332deSRuiling Song               KillsInElse.insert(MOReg);
256b642d01fSSebastian Neubauer             } else {
257b642d01fSSebastian Neubauer               LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
258b642d01fSSebastian Neubauer                                 << " as Live in Endif\n");
259b642d01fSSebastian Neubauer             }
260b642d01fSSebastian Neubauer           }
261208332deSRuiling Song         }
262208332deSRuiling Song       }
263208332deSRuiling Song     }
264208332deSRuiling Song   }
265208332deSRuiling Song 
266208332deSRuiling Song   // Check the phis in the Endif, looking for value coming from the ELSE
267208332deSRuiling Song   // region. Make sure the phi-use is the last use.
268208332deSRuiling Song   for (auto &MI : Endif->phis()) {
269208332deSRuiling Song     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
270208332deSRuiling Song       auto &MO = MI.getOperand(Idx);
271208332deSRuiling Song       auto *Pred = MI.getOperand(Idx + 1).getMBB();
272208332deSRuiling Song       if (Pred == Flow)
273208332deSRuiling Song         continue;
274208332deSRuiling Song       assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
275208332deSRuiling Song 
276208332deSRuiling Song       if (!MO.isReg() || !MO.getReg() || MO.isUndef())
277208332deSRuiling Song         continue;
278208332deSRuiling Song 
279208332deSRuiling Song       Register Reg = MO.getReg();
280208332deSRuiling Song       if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
281208332deSRuiling Song         continue;
282208332deSRuiling Song 
283208332deSRuiling Song       LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
284208332deSRuiling Song 
285208332deSRuiling Song       if (VI.isLiveIn(*Endif, Reg, *MRI)) {
286208332deSRuiling Song         LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
287208332deSRuiling Song                           << " as Live in Endif\n");
288208332deSRuiling Song         continue;
289208332deSRuiling Song       }
290208332deSRuiling Song       // Make sure two conditions are met:
291208332deSRuiling Song       // a.) the value is defined before/in the IF block
292208332deSRuiling Song       // b.) should be defined in the same loop-level.
293208332deSRuiling Song       const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
294208332deSRuiling Song       if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
295208332deSRuiling Song           Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
296208332deSRuiling Song         KillsInElse.insert(Reg);
297208332deSRuiling Song     }
298208332deSRuiling Song   }
299208332deSRuiling Song 
300208332deSRuiling Song   auto IsLiveThroughThen = [&](Register Reg) {
301208332deSRuiling Song     for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
302208332deSRuiling Song          ++I) {
303208332deSRuiling Song       if (!I->readsReg())
304208332deSRuiling Song         continue;
305208332deSRuiling Song       auto *UseMI = I->getParent();
306208332deSRuiling Song       auto *UseMBB = UseMI->getParent();
307208332deSRuiling Song       if (UseMBB == Flow || UseMBB == Endif) {
308208332deSRuiling Song         if (!UseMI->isPHI())
309208332deSRuiling Song           return true;
310208332deSRuiling Song 
311208332deSRuiling Song         auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
312208332deSRuiling Song         // The register is live through the path If->Flow or Flow->Endif.
313208332deSRuiling Song         // we should not optimize for such cases.
314208332deSRuiling Song         if ((UseMBB == Flow && IncomingMBB != If) ||
315208332deSRuiling Song             (UseMBB == Endif && IncomingMBB == Flow))
316208332deSRuiling Song           return true;
317208332deSRuiling Song       }
318208332deSRuiling Song     }
319208332deSRuiling Song     return false;
320208332deSRuiling Song   };
321208332deSRuiling Song 
322208332deSRuiling Song   for (auto Reg : KillsInElse) {
323208332deSRuiling Song     if (!IsLiveThroughThen(Reg))
324208332deSRuiling Song       CandidateRegs.push_back(Reg);
325208332deSRuiling Song   }
326208332deSRuiling Song }
327208332deSRuiling Song 
328ad2c66ecSSebastian Neubauer /// Collect the registers used in the waterfall loop block that are defined
329ad2c66ecSSebastian Neubauer /// before.
collectWaterfallCandidateRegisters(MachineBasicBlock * LoopHeader,MachineBasicBlock * LoopEnd,SmallSetVector<Register,16> & CandidateRegs,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const330ad2c66ecSSebastian Neubauer void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
3311f52d02cSCarl Ritson     MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
3321f52d02cSCarl Ritson     SmallSetVector<Register, 16> &CandidateRegs,
3331f52d02cSCarl Ritson     SmallSetVector<MachineBasicBlock *, 2> &Blocks,
3341f52d02cSCarl Ritson     SmallVectorImpl<MachineInstr *> &Instructions) const {
335ad2c66ecSSebastian Neubauer 
3361f52d02cSCarl Ritson   // Collect loop instructions, potentially spanning multiple blocks
3371f52d02cSCarl Ritson   auto *MBB = LoopHeader;
3381f52d02cSCarl Ritson   for (;;) {
3391f52d02cSCarl Ritson     Blocks.insert(MBB);
3401f52d02cSCarl Ritson     for (auto &MI : *MBB) {
341ad2c66ecSSebastian Neubauer       if (MI.isDebugInstr())
342ad2c66ecSSebastian Neubauer         continue;
3431f52d02cSCarl Ritson       Instructions.push_back(&MI);
3441f52d02cSCarl Ritson     }
3451f52d02cSCarl Ritson     if (MBB == LoopEnd)
3461f52d02cSCarl Ritson       break;
3472bca7d85SCarl Ritson 
3482bca7d85SCarl Ritson     if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
3492bca7d85SCarl Ritson         (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
3502bca7d85SCarl Ritson       LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
3512bca7d85SCarl Ritson       return;
3522bca7d85SCarl Ritson     }
3532bca7d85SCarl Ritson 
3541f52d02cSCarl Ritson     MBB = *MBB->succ_begin();
3551f52d02cSCarl Ritson   }
3561f52d02cSCarl Ritson 
3571f52d02cSCarl Ritson   for (auto *I : Instructions) {
3581f52d02cSCarl Ritson     auto &MI = *I;
359ad2c66ecSSebastian Neubauer 
360ad2c66ecSSebastian Neubauer     for (auto &MO : MI.operands()) {
361ad2c66ecSSebastian Neubauer       if (!MO.isReg() || !MO.getReg() || MO.isDef())
362ad2c66ecSSebastian Neubauer         continue;
363ad2c66ecSSebastian Neubauer 
364ad2c66ecSSebastian Neubauer       Register MOReg = MO.getReg();
365ad2c66ecSSebastian Neubauer       // We can only optimize AGPR/VGPR virtual register
366ad2c66ecSSebastian Neubauer       if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
367ad2c66ecSSebastian Neubauer         continue;
368ad2c66ecSSebastian Neubauer 
369ad2c66ecSSebastian Neubauer       if (MO.readsReg()) {
3701f52d02cSCarl Ritson         MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
371ad2c66ecSSebastian Neubauer         // Make sure the value is defined before the LOOP block
3721f52d02cSCarl Ritson         if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
373ad2c66ecSSebastian Neubauer           // If the variable is used after the loop, the register coalescer will
374ad2c66ecSSebastian Neubauer           // merge the newly created register and remove the phi node again.
375ad2c66ecSSebastian Neubauer           // Just do nothing in that case.
376ad2c66ecSSebastian Neubauer           LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
377ad2c66ecSSebastian Neubauer           bool IsUsed = false;
3781f52d02cSCarl Ritson           for (auto *Succ : LoopEnd->successors()) {
3791f52d02cSCarl Ritson             if (!Blocks.contains(Succ) &&
3801f52d02cSCarl Ritson                 OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
381ad2c66ecSSebastian Neubauer               IsUsed = true;
382ad2c66ecSSebastian Neubauer               break;
383ad2c66ecSSebastian Neubauer             }
384ad2c66ecSSebastian Neubauer           }
385ad2c66ecSSebastian Neubauer           if (!IsUsed) {
386ad2c66ecSSebastian Neubauer             LLVM_DEBUG(dbgs() << "Found candidate reg: "
387ad2c66ecSSebastian Neubauer                               << printReg(MOReg, TRI, 0, MRI) << '\n');
388ad2c66ecSSebastian Neubauer             CandidateRegs.insert(MOReg);
389ad2c66ecSSebastian Neubauer           } else {
390ad2c66ecSSebastian Neubauer             LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
391ad2c66ecSSebastian Neubauer                               << printReg(MOReg, TRI, 0, MRI) << '\n');
392ad2c66ecSSebastian Neubauer           }
393ad2c66ecSSebastian Neubauer         }
394ad2c66ecSSebastian Neubauer       }
395ad2c66ecSSebastian Neubauer     }
396ad2c66ecSSebastian Neubauer   }
397ad2c66ecSSebastian Neubauer }
398ad2c66ecSSebastian Neubauer 
399208332deSRuiling Song // Re-calculate the liveness of \p Reg in the THEN-region
updateLiveRangeInThenRegion(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow) const400208332deSRuiling Song void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
401208332deSRuiling Song     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
402e09f98a6SMatt Arsenault   SetVector<MachineBasicBlock *> Blocks;
403e09f98a6SMatt Arsenault   SmallVector<MachineBasicBlock *> WorkList({If});
404208332deSRuiling Song 
405e09f98a6SMatt Arsenault   // Collect all successors until we see the flow block, where we should
406e09f98a6SMatt Arsenault   // reconverge.
407e09f98a6SMatt Arsenault   while (!WorkList.empty()) {
408e09f98a6SMatt Arsenault     auto *MBB = WorkList.pop_back_val();
409e09f98a6SMatt Arsenault     for (auto *Succ : MBB->successors()) {
410e09f98a6SMatt Arsenault       if (Succ != Flow && !Blocks.contains(Succ)) {
411e09f98a6SMatt Arsenault         WorkList.push_back(Succ);
412e09f98a6SMatt Arsenault         Blocks.insert(Succ);
413208332deSRuiling Song       }
414208332deSRuiling Song     }
415e09f98a6SMatt Arsenault   }
416208332deSRuiling Song 
417208332deSRuiling Song   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
418e09f98a6SMatt Arsenault   for (MachineBasicBlock *MBB : Blocks) {
419208332deSRuiling Song     // Clear Live bit, as we will recalculate afterwards
420208332deSRuiling Song     LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
421208332deSRuiling Song                       << '\n');
422208332deSRuiling Song     OldVarInfo.AliveBlocks.reset(MBB->getNumber());
423208332deSRuiling Song   }
424208332deSRuiling Song 
425e09f98a6SMatt Arsenault   SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
426e09f98a6SMatt Arsenault 
427208332deSRuiling Song   // Get the blocks the Reg should be alive through
428208332deSRuiling Song   for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
429208332deSRuiling Song        ++I) {
430208332deSRuiling Song     auto *UseMI = I->getParent();
431208332deSRuiling Song     if (UseMI->isPHI() && I->readsReg()) {
432e09f98a6SMatt Arsenault       if (Blocks.contains(UseMI->getParent()))
433208332deSRuiling Song         PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
434208332deSRuiling Song     }
435208332deSRuiling Song   }
436208332deSRuiling Song 
437e09f98a6SMatt Arsenault   for (MachineBasicBlock *MBB : Blocks) {
438208332deSRuiling Song     SmallVector<MachineInstr *> Uses;
439208332deSRuiling Song     // PHI instructions has been processed before.
440208332deSRuiling Song     findNonPHIUsesInBlock(Reg, MBB, Uses);
441208332deSRuiling Song 
442208332deSRuiling Song     if (Uses.size() == 1) {
443208332deSRuiling Song       LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
444208332deSRuiling Song                         << printMBBReference(*MBB) << '\n');
445208332deSRuiling Song       LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
446208332deSRuiling Song     } else if (Uses.size() > 1) {
447208332deSRuiling Song       // Process the instructions in-order
448208332deSRuiling Song       LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
449208332deSRuiling Song                         << printMBBReference(*MBB) << '\n');
450208332deSRuiling Song       for (MachineInstr &MI : *MBB) {
451208332deSRuiling Song         if (llvm::is_contained(Uses, &MI))
452208332deSRuiling Song           LV->HandleVirtRegUse(Reg, MBB, MI);
453208332deSRuiling Song       }
454208332deSRuiling Song     }
455208332deSRuiling Song 
456208332deSRuiling Song     // Mark Reg alive through the block if this is a PHI incoming block
457208332deSRuiling Song     if (PHIIncoming.contains(MBB))
458208332deSRuiling Song       LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
459208332deSRuiling Song                                   MBB);
460208332deSRuiling Song   }
461208332deSRuiling Song 
462208332deSRuiling Song   // Set the isKilled flag if we get new Kills in the THEN region.
463208332deSRuiling Song   for (auto *MI : OldVarInfo.Kills) {
464e09f98a6SMatt Arsenault     if (Blocks.contains(MI->getParent()))
465208332deSRuiling Song       MI->addRegisterKilled(Reg, TRI);
466208332deSRuiling Song   }
467208332deSRuiling Song }
468208332deSRuiling Song 
updateLiveRangeInElseRegion(Register Reg,Register NewReg,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const469208332deSRuiling Song void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
470208332deSRuiling Song     Register Reg, Register NewReg, MachineBasicBlock *Flow,
471208332deSRuiling Song     MachineBasicBlock *Endif,
472208332deSRuiling Song     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
473208332deSRuiling Song   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
474208332deSRuiling Song   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
475208332deSRuiling Song 
476208332deSRuiling Song   // Transfer aliveBlocks from Reg to NewReg
477208332deSRuiling Song   for (auto *MBB : ElseBlocks) {
478208332deSRuiling Song     unsigned BBNum = MBB->getNumber();
479208332deSRuiling Song     if (OldVarInfo.AliveBlocks.test(BBNum)) {
480208332deSRuiling Song       NewVarInfo.AliveBlocks.set(BBNum);
4819ced1e44SSebastian Neubauer       LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
482208332deSRuiling Song                         << '\n');
483208332deSRuiling Song       OldVarInfo.AliveBlocks.reset(BBNum);
484208332deSRuiling Song     }
485208332deSRuiling Song   }
486208332deSRuiling Song 
487208332deSRuiling Song   // Transfer the possible Kills in ElseBlocks from Reg to NewReg
488208332deSRuiling Song   auto I = OldVarInfo.Kills.begin();
489208332deSRuiling Song   while (I != OldVarInfo.Kills.end()) {
490208332deSRuiling Song     if (ElseBlocks.contains((*I)->getParent())) {
491208332deSRuiling Song       NewVarInfo.Kills.push_back(*I);
492208332deSRuiling Song       I = OldVarInfo.Kills.erase(I);
493208332deSRuiling Song     } else {
494208332deSRuiling Song       ++I;
495208332deSRuiling Song     }
496208332deSRuiling Song   }
497208332deSRuiling Song }
498208332deSRuiling Song 
optimizeLiveRange(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const499208332deSRuiling Song void SIOptimizeVGPRLiveRange::optimizeLiveRange(
500208332deSRuiling Song     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
501208332deSRuiling Song     MachineBasicBlock *Endif,
502208332deSRuiling Song     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
503208332deSRuiling Song   // Insert a new PHI, marking the value from the THEN region being
504208332deSRuiling Song   // undef.
505208332deSRuiling Song   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
506208332deSRuiling Song   const auto *RC = MRI->getRegClass(Reg);
507208332deSRuiling Song   Register NewReg = MRI->createVirtualRegister(RC);
508208332deSRuiling Song   Register UndefReg = MRI->createVirtualRegister(RC);
509208332deSRuiling Song   MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
510208332deSRuiling Song                                     TII->get(TargetOpcode::PHI), NewReg);
511208332deSRuiling Song   for (auto *Pred : Flow->predecessors()) {
512208332deSRuiling Song     if (Pred == If)
513208332deSRuiling Song       PHI.addReg(Reg).addMBB(Pred);
514208332deSRuiling Song     else
515208332deSRuiling Song       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
516208332deSRuiling Song   }
517208332deSRuiling Song 
518208332deSRuiling Song   // Replace all uses in the ELSE region or the PHIs in ENDIF block
519ad2c66ecSSebastian Neubauer   // Use early increment range because setReg() will update the linked list.
520ad2c66ecSSebastian Neubauer   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
521208332deSRuiling Song     auto *UseMI = O.getParent();
522208332deSRuiling Song     auto *UseBlock = UseMI->getParent();
523208332deSRuiling Song     // Replace uses in Endif block
524208332deSRuiling Song     if (UseBlock == Endif) {
525208332deSRuiling Song       assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
526208332deSRuiling Song       O.setReg(NewReg);
527208332deSRuiling Song       continue;
528208332deSRuiling Song     }
529208332deSRuiling Song 
530208332deSRuiling Song     // Replace uses in Else region
531208332deSRuiling Song     if (ElseBlocks.contains(UseBlock))
532208332deSRuiling Song       O.setReg(NewReg);
533208332deSRuiling Song   }
534208332deSRuiling Song 
535208332deSRuiling Song   // The optimized Reg is not alive through Flow blocks anymore.
536208332deSRuiling Song   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
537208332deSRuiling Song   OldVarInfo.AliveBlocks.reset(Flow->getNumber());
538208332deSRuiling Song 
539208332deSRuiling Song   updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
540208332deSRuiling Song   updateLiveRangeInThenRegion(Reg, If, Flow);
541208332deSRuiling Song }
542208332deSRuiling Song 
optimizeWaterfallLiveRange(Register Reg,MachineBasicBlock * LoopHeader,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const543ad2c66ecSSebastian Neubauer void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
5441f52d02cSCarl Ritson     Register Reg, MachineBasicBlock *LoopHeader,
5451f52d02cSCarl Ritson     SmallSetVector<MachineBasicBlock *, 2> &Blocks,
5461f52d02cSCarl Ritson     SmallVectorImpl<MachineInstr *> &Instructions) const {
547ad2c66ecSSebastian Neubauer   // Insert a new PHI, marking the value from the last loop iteration undef.
548ad2c66ecSSebastian Neubauer   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
549ad2c66ecSSebastian Neubauer   const auto *RC = MRI->getRegClass(Reg);
550ad2c66ecSSebastian Neubauer   Register NewReg = MRI->createVirtualRegister(RC);
551ad2c66ecSSebastian Neubauer   Register UndefReg = MRI->createVirtualRegister(RC);
552ad2c66ecSSebastian Neubauer 
553ad2c66ecSSebastian Neubauer   // Replace all uses in the LOOP region
554ad2c66ecSSebastian Neubauer   // Use early increment range because setReg() will update the linked list.
555ad2c66ecSSebastian Neubauer   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
556ad2c66ecSSebastian Neubauer     auto *UseMI = O.getParent();
557ad2c66ecSSebastian Neubauer     auto *UseBlock = UseMI->getParent();
5581f52d02cSCarl Ritson     // Replace uses in Loop blocks
5591f52d02cSCarl Ritson     if (Blocks.contains(UseBlock))
560ad2c66ecSSebastian Neubauer       O.setReg(NewReg);
561ad2c66ecSSebastian Neubauer   }
562ad2c66ecSSebastian Neubauer 
5631f52d02cSCarl Ritson   MachineInstrBuilder PHI =
5641f52d02cSCarl Ritson       BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
565ad2c66ecSSebastian Neubauer               TII->get(TargetOpcode::PHI), NewReg);
5661f52d02cSCarl Ritson   for (auto *Pred : LoopHeader->predecessors()) {
5671f52d02cSCarl Ritson     if (Blocks.contains(Pred))
568ad2c66ecSSebastian Neubauer       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
569ad2c66ecSSebastian Neubauer     else
570ad2c66ecSSebastian Neubauer       PHI.addReg(Reg).addMBB(Pred);
571ad2c66ecSSebastian Neubauer   }
572ad2c66ecSSebastian Neubauer 
573ad2c66ecSSebastian Neubauer   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
574ad2c66ecSSebastian Neubauer   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
575ad2c66ecSSebastian Neubauer 
5761f52d02cSCarl Ritson   // Find last use and mark as kill
5771f52d02cSCarl Ritson   MachineInstr *Kill = nullptr;
5781f52d02cSCarl Ritson   for (auto *MI : reverse(Instructions)) {
5791f52d02cSCarl Ritson     if (MI->readsRegister(NewReg, TRI)) {
5801f52d02cSCarl Ritson       MI->addRegisterKilled(NewReg, TRI);
5811f52d02cSCarl Ritson       NewVarInfo.Kills.push_back(MI);
5821f52d02cSCarl Ritson       Kill = MI;
583ad2c66ecSSebastian Neubauer       break;
584ad2c66ecSSebastian Neubauer     }
585ad2c66ecSSebastian Neubauer   }
5861f52d02cSCarl Ritson   assert(Kill && "Failed to find last usage of register in loop");
5871f52d02cSCarl Ritson 
5881f52d02cSCarl Ritson   MachineBasicBlock *KillBlock = Kill->getParent();
5891f52d02cSCarl Ritson   bool PostKillBlock = false;
5901f52d02cSCarl Ritson   for (auto *Block : Blocks) {
5911f52d02cSCarl Ritson     auto BBNum = Block->getNumber();
5921f52d02cSCarl Ritson 
5931f52d02cSCarl Ritson     // collectWaterfallCandidateRegisters only collects registers that are dead
5941f52d02cSCarl Ritson     // after the loop. So we know that the old reg is no longer live throughout
5951f52d02cSCarl Ritson     // the waterfall loop.
5961f52d02cSCarl Ritson     OldVarInfo.AliveBlocks.reset(BBNum);
5971f52d02cSCarl Ritson 
5981f52d02cSCarl Ritson     // The new register is live up to (and including) the block that kills it.
5991f52d02cSCarl Ritson     PostKillBlock |= (Block == KillBlock);
6001f52d02cSCarl Ritson     if (PostKillBlock) {
6011f52d02cSCarl Ritson       NewVarInfo.AliveBlocks.reset(BBNum);
6021f52d02cSCarl Ritson     } else if (Block != LoopHeader) {
6031f52d02cSCarl Ritson       NewVarInfo.AliveBlocks.set(BBNum);
6041f52d02cSCarl Ritson     }
6051f52d02cSCarl Ritson   }
606ad2c66ecSSebastian Neubauer }
607ad2c66ecSSebastian Neubauer 
608208332deSRuiling Song char SIOptimizeVGPRLiveRange::ID = 0;
609208332deSRuiling Song 
610208332deSRuiling Song INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
611208332deSRuiling Song                       "SI Optimize VGPR LiveRange", false, false)
612208332deSRuiling Song INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
613208332deSRuiling Song INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
614208332deSRuiling Song INITIALIZE_PASS_DEPENDENCY(LiveVariables)
615208332deSRuiling Song INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
616208332deSRuiling Song                     "SI Optimize VGPR LiveRange", false, false)
617208332deSRuiling Song 
618208332deSRuiling Song char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
619208332deSRuiling Song 
createSIOptimizeVGPRLiveRangePass()620208332deSRuiling Song FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
621208332deSRuiling Song   return new SIOptimizeVGPRLiveRange();
622208332deSRuiling Song }
623208332deSRuiling Song 
runOnMachineFunction(MachineFunction & MF)624208332deSRuiling Song bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
625208332deSRuiling Song 
626208332deSRuiling Song   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
627208332deSRuiling Song   TII = ST.getInstrInfo();
628208332deSRuiling Song   TRI = &TII->getRegisterInfo();
629208332deSRuiling Song   MDT = &getAnalysis<MachineDominatorTree>();
630208332deSRuiling Song   Loops = &getAnalysis<MachineLoopInfo>();
631208332deSRuiling Song   LV = &getAnalysis<LiveVariables>();
632208332deSRuiling Song   MRI = &MF.getRegInfo();
633208332deSRuiling Song 
634208332deSRuiling Song   if (skipFunction(MF.getFunction()))
635208332deSRuiling Song     return false;
636208332deSRuiling Song 
637208332deSRuiling Song   bool MadeChange = false;
638208332deSRuiling Song 
639208332deSRuiling Song   // TODO: we need to think about the order of visiting the blocks to get
640208332deSRuiling Song   // optimal result for nesting if-else cases.
641208332deSRuiling Song   for (MachineBasicBlock &MBB : MF) {
642208332deSRuiling Song     for (auto &MI : MBB.terminators()) {
643208332deSRuiling Song       // Detect the if-else blocks
644208332deSRuiling Song       if (MI.getOpcode() == AMDGPU::SI_IF) {
645208332deSRuiling Song         MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
646208332deSRuiling Song         auto *Endif = getElseTarget(IfTarget);
647208332deSRuiling Song         if (!Endif)
648208332deSRuiling Song           continue;
649208332deSRuiling Song 
650*4dcb42faSRuiling Song         // Skip unexpected control flow.
651*4dcb42faSRuiling Song         if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
652*4dcb42faSRuiling Song           continue;
653*4dcb42faSRuiling Song 
654208332deSRuiling Song         SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
655208332deSRuiling Song         SmallVector<Register> CandidateRegs;
656208332deSRuiling Song 
657208332deSRuiling Song         LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
658208332deSRuiling Song                           << printMBBReference(MBB) << ' '
659208332deSRuiling Song                           << printMBBReference(*IfTarget) << ' '
660208332deSRuiling Song                           << printMBBReference(*Endif) << '\n');
661208332deSRuiling Song 
662208332deSRuiling Song         // Collect all the blocks in the ELSE region
663208332deSRuiling Song         collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
664208332deSRuiling Song 
665208332deSRuiling Song         // Collect the registers can be optimized
666208332deSRuiling Song         collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
667208332deSRuiling Song                                   CandidateRegs);
668208332deSRuiling Song         MadeChange |= !CandidateRegs.empty();
669208332deSRuiling Song         // Now we are safe to optimize.
670208332deSRuiling Song         for (auto Reg : CandidateRegs)
671208332deSRuiling Song           optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
672ad2c66ecSSebastian Neubauer       } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
6731f52d02cSCarl Ritson         auto *LoopHeader = MI.getOperand(0).getMBB();
6741f52d02cSCarl Ritson         auto *LoopEnd = &MBB;
6751f52d02cSCarl Ritson 
676ad2c66ecSSebastian Neubauer         LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
6771f52d02cSCarl Ritson                           << printMBBReference(*LoopHeader) << '\n');
678ad2c66ecSSebastian Neubauer 
679ad2c66ecSSebastian Neubauer         SmallSetVector<Register, 16> CandidateRegs;
6801f52d02cSCarl Ritson         SmallVector<MachineInstr *, 16> Instructions;
6811f52d02cSCarl Ritson         SmallSetVector<MachineBasicBlock *, 2> Blocks;
6821f52d02cSCarl Ritson 
6831f52d02cSCarl Ritson         collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
6841f52d02cSCarl Ritson                                            Blocks, Instructions);
685ad2c66ecSSebastian Neubauer         MadeChange |= !CandidateRegs.empty();
686ad2c66ecSSebastian Neubauer         // Now we are safe to optimize.
687ad2c66ecSSebastian Neubauer         for (auto Reg : CandidateRegs)
6881f52d02cSCarl Ritson           optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
689208332deSRuiling Song       }
690208332deSRuiling Song     }
691208332deSRuiling Song   }
692208332deSRuiling Song 
693208332deSRuiling Song   return MadeChange;
694208332deSRuiling Song }
695