1208332deSRuiling Song //===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===//
2208332deSRuiling Song //
3208332deSRuiling Song // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4208332deSRuiling Song // See https://llvm.org/LICENSE.txt for license information.
5208332deSRuiling Song // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6208332deSRuiling Song //
7208332deSRuiling Song //===----------------------------------------------------------------------===//
8208332deSRuiling Song //
9208332deSRuiling Song /// \file
10ad2c66ecSSebastian Neubauer /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11ad2c66ecSSebastian Neubauer /// structures and waterfall loops.
12208332deSRuiling Song ///
13ad2c66ecSSebastian Neubauer /// When we do structurization, we usually transform an if-else into two
14d1f45ed5SNeubauer, Sebastian /// successive if-then (with a flow block to do predicate inversion). Consider a
15208332deSRuiling Song /// simple case after structurization: A divergent value %a was defined before
16208332deSRuiling Song /// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17208332deSRuiling Song /// bb.if:
18208332deSRuiling Song /// %a = ...
19208332deSRuiling Song /// ...
20208332deSRuiling Song /// bb.then:
21208332deSRuiling Song /// ... = op %a
22208332deSRuiling Song /// ... // %a can be dead here
23208332deSRuiling Song /// bb.flow:
24208332deSRuiling Song /// ...
25208332deSRuiling Song /// bb.else:
26208332deSRuiling Song /// ... = %a
27208332deSRuiling Song /// ...
28208332deSRuiling Song /// bb.endif
29208332deSRuiling Song ///
30208332deSRuiling Song /// As register allocator has no idea of the thread-control-flow, it will just
31208332deSRuiling Song /// assume %a would be alive in the whole range of bb.then because of a later
32ad2c66ecSSebastian Neubauer /// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33208332deSRuiling Song /// to exec mask. For this if-else case, the lanes active in bb.then will be
34ad2c66ecSSebastian Neubauer /// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35ad2c66ecSSebastian Neubauer /// after the last use in bb.then until the end of the block. The reason is
36208332deSRuiling Song /// the instructions in bb.then will only overwrite lanes that will never be
37208332deSRuiling Song /// accessed in bb.else.
38208332deSRuiling Song ///
39208332deSRuiling Song /// This pass aims to to tell register allocator that %a is in-fact dead,
40208332deSRuiling Song /// through inserting a phi-node in bb.flow saying that %a is undef when coming
41208332deSRuiling Song /// from bb.then, and then replace the uses in the bb.else with the result of
42208332deSRuiling Song /// newly inserted phi.
43208332deSRuiling Song ///
44208332deSRuiling Song /// Two key conditions must be met to ensure correctness:
45208332deSRuiling Song /// 1.) The def-point should be in the same loop-level as if-else-endif to make
46208332deSRuiling Song /// sure the second loop iteration still get correct data.
47208332deSRuiling Song /// 2.) There should be no further uses after the IF-ELSE region.
48208332deSRuiling Song ///
49ad2c66ecSSebastian Neubauer ///
50ad2c66ecSSebastian Neubauer /// Waterfall loops get inserted around instructions that use divergent values
51ad2c66ecSSebastian Neubauer /// but can only be executed with a uniform value. For example an indirect call
52ad2c66ecSSebastian Neubauer /// to a divergent address:
53ad2c66ecSSebastian Neubauer /// bb.start:
54ad2c66ecSSebastian Neubauer /// %a = ...
55ad2c66ecSSebastian Neubauer /// %fun = ...
56ad2c66ecSSebastian Neubauer /// ...
57ad2c66ecSSebastian Neubauer /// bb.loop:
58ad2c66ecSSebastian Neubauer /// call %fun (%a)
59ad2c66ecSSebastian Neubauer /// ... // %a can be dead here
60ad2c66ecSSebastian Neubauer /// loop %bb.loop
61ad2c66ecSSebastian Neubauer ///
62ad2c66ecSSebastian Neubauer /// The loop block is executed multiple times, but it is run exactly once for
63ad2c66ecSSebastian Neubauer /// each active lane. Similar to the if-else case, the register allocator
64ad2c66ecSSebastian Neubauer /// assumes that %a is live throughout the loop as it is used again in the next
65ad2c66ecSSebastian Neubauer /// iteration. If %a is a VGPR that is unused after the loop, it does not need
66ad2c66ecSSebastian Neubauer /// to be live after its last use in the loop block. By inserting a phi-node at
67ad2c66ecSSebastian Neubauer /// the start of bb.loop that is undef when coming from bb.loop, the register
68ad2c66ecSSebastian Neubauer /// allocation knows that the value of %a does not need to be preserved through
69ad2c66ecSSebastian Neubauer /// iterations of the loop.
70ad2c66ecSSebastian Neubauer ///
71208332deSRuiling Song //
72208332deSRuiling Song //===----------------------------------------------------------------------===//
73208332deSRuiling Song
74208332deSRuiling Song #include "AMDGPU.h"
75208332deSRuiling Song #include "GCNSubtarget.h"
76208332deSRuiling Song #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
77208332deSRuiling Song #include "SIMachineFunctionInfo.h"
78208332deSRuiling Song #include "llvm/CodeGen/LiveVariables.h"
79208332deSRuiling Song #include "llvm/CodeGen/MachineDominators.h"
80208332deSRuiling Song #include "llvm/CodeGen/MachineLoopInfo.h"
81208332deSRuiling Song #include "llvm/CodeGen/TargetRegisterInfo.h"
82208332deSRuiling Song #include "llvm/InitializePasses.h"
83208332deSRuiling Song
84208332deSRuiling Song using namespace llvm;
85208332deSRuiling Song
86208332deSRuiling Song #define DEBUG_TYPE "si-opt-vgpr-liverange"
87208332deSRuiling Song
88208332deSRuiling Song namespace {
89208332deSRuiling Song
90208332deSRuiling Song class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
91208332deSRuiling Song private:
92208332deSRuiling Song const SIRegisterInfo *TRI = nullptr;
93208332deSRuiling Song const SIInstrInfo *TII = nullptr;
94208332deSRuiling Song LiveVariables *LV = nullptr;
95208332deSRuiling Song MachineDominatorTree *MDT = nullptr;
96208332deSRuiling Song const MachineLoopInfo *Loops = nullptr;
97208332deSRuiling Song MachineRegisterInfo *MRI = nullptr;
98208332deSRuiling Song
99208332deSRuiling Song public:
100208332deSRuiling Song static char ID;
101208332deSRuiling Song
102208332deSRuiling Song MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
103208332deSRuiling Song
104208332deSRuiling Song void collectElseRegionBlocks(MachineBasicBlock *Flow,
105208332deSRuiling Song MachineBasicBlock *Endif,
106208332deSRuiling Song SmallSetVector<MachineBasicBlock *, 16> &) const;
107208332deSRuiling Song
108208332deSRuiling Song void
109208332deSRuiling Song collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
110208332deSRuiling Song MachineBasicBlock *Endif,
111208332deSRuiling Song SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
112208332deSRuiling Song SmallVectorImpl<Register> &CandidateRegs) const;
113208332deSRuiling Song
114ad2c66ecSSebastian Neubauer void collectWaterfallCandidateRegisters(
1151f52d02cSCarl Ritson MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
1161f52d02cSCarl Ritson SmallSetVector<Register, 16> &CandidateRegs,
1171f52d02cSCarl Ritson SmallSetVector<MachineBasicBlock *, 2> &Blocks,
1181f52d02cSCarl Ritson SmallVectorImpl<MachineInstr *> &Instructions) const;
119ad2c66ecSSebastian Neubauer
120208332deSRuiling Song void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
121208332deSRuiling Song SmallVectorImpl<MachineInstr *> &Uses) const;
122208332deSRuiling Song
123208332deSRuiling Song void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
124208332deSRuiling Song MachineBasicBlock *Flow) const;
125208332deSRuiling Song
126208332deSRuiling Song void updateLiveRangeInElseRegion(
127208332deSRuiling Song Register Reg, Register NewReg, MachineBasicBlock *Flow,
128208332deSRuiling Song MachineBasicBlock *Endif,
129208332deSRuiling Song SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
130208332deSRuiling Song
131208332deSRuiling Song void
132208332deSRuiling Song optimizeLiveRange(Register Reg, MachineBasicBlock *If,
133208332deSRuiling Song MachineBasicBlock *Flow, MachineBasicBlock *Endif,
134208332deSRuiling Song SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
135208332deSRuiling Song
1361f52d02cSCarl Ritson void optimizeWaterfallLiveRange(
1371f52d02cSCarl Ritson Register Reg, MachineBasicBlock *LoopHeader,
1381f52d02cSCarl Ritson SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks,
1391f52d02cSCarl Ritson SmallVectorImpl<MachineInstr *> &Instructions) const;
140ad2c66ecSSebastian Neubauer
SIOptimizeVGPRLiveRange()141208332deSRuiling Song SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
142208332deSRuiling Song
143208332deSRuiling Song bool runOnMachineFunction(MachineFunction &MF) override;
144208332deSRuiling Song
getPassName() const145208332deSRuiling Song StringRef getPassName() const override {
146208332deSRuiling Song return "SI Optimize VGPR LiveRange";
147208332deSRuiling Song }
148208332deSRuiling Song
getAnalysisUsage(AnalysisUsage & AU) const149208332deSRuiling Song void getAnalysisUsage(AnalysisUsage &AU) const override {
150208332deSRuiling Song AU.addRequired<LiveVariables>();
151208332deSRuiling Song AU.addRequired<MachineDominatorTree>();
152208332deSRuiling Song AU.addRequired<MachineLoopInfo>();
153208332deSRuiling Song AU.addPreserved<LiveVariables>();
154208332deSRuiling Song AU.addPreserved<MachineDominatorTree>();
155208332deSRuiling Song AU.addPreserved<MachineLoopInfo>();
156208332deSRuiling Song MachineFunctionPass::getAnalysisUsage(AU);
157208332deSRuiling Song }
158208332deSRuiling Song
getRequiredProperties() const159208332deSRuiling Song MachineFunctionProperties getRequiredProperties() const override {
160208332deSRuiling Song return MachineFunctionProperties().set(
161208332deSRuiling Song MachineFunctionProperties::Property::IsSSA);
162208332deSRuiling Song }
1634fc18de3SMatt Arsenault
getClearedProperties() const1644fc18de3SMatt Arsenault MachineFunctionProperties getClearedProperties() const override {
1654fc18de3SMatt Arsenault return MachineFunctionProperties().set(
1664fc18de3SMatt Arsenault MachineFunctionProperties::Property::NoPHIs);
1674fc18de3SMatt Arsenault }
168208332deSRuiling Song };
169208332deSRuiling Song
170208332deSRuiling Song } // end anonymous namespace
171208332deSRuiling Song
172208332deSRuiling Song // Check whether the MBB is a else flow block and get the branching target which
173208332deSRuiling Song // is the Endif block
174208332deSRuiling Song MachineBasicBlock *
getElseTarget(MachineBasicBlock * MBB) const175208332deSRuiling Song SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
176208332deSRuiling Song for (auto &BR : MBB->terminators()) {
177208332deSRuiling Song if (BR.getOpcode() == AMDGPU::SI_ELSE)
178208332deSRuiling Song return BR.getOperand(2).getMBB();
179208332deSRuiling Song }
180208332deSRuiling Song return nullptr;
181208332deSRuiling Song }
182208332deSRuiling Song
collectElseRegionBlocks(MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & Blocks) const183208332deSRuiling Song void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
184208332deSRuiling Song MachineBasicBlock *Flow, MachineBasicBlock *Endif,
185208332deSRuiling Song SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
186208332deSRuiling Song assert(Flow != Endif);
187208332deSRuiling Song
188208332deSRuiling Song MachineBasicBlock *MBB = Endif;
189208332deSRuiling Song unsigned Cur = 0;
190208332deSRuiling Song while (MBB) {
191208332deSRuiling Song for (auto *Pred : MBB->predecessors()) {
192208332deSRuiling Song if (Pred != Flow && !Blocks.contains(Pred))
193208332deSRuiling Song Blocks.insert(Pred);
194208332deSRuiling Song }
195208332deSRuiling Song
196208332deSRuiling Song if (Cur < Blocks.size())
197208332deSRuiling Song MBB = Blocks[Cur++];
198208332deSRuiling Song else
199208332deSRuiling Song MBB = nullptr;
200208332deSRuiling Song }
201208332deSRuiling Song
202b650778dSJordan Rupprecht LLVM_DEBUG({
203b650778dSJordan Rupprecht dbgs() << "Found Else blocks: ";
204208332deSRuiling Song for (auto *MBB : Blocks)
205b650778dSJordan Rupprecht dbgs() << printMBBReference(*MBB) << ' ';
206b650778dSJordan Rupprecht dbgs() << '\n';
207b650778dSJordan Rupprecht });
208208332deSRuiling Song }
209208332deSRuiling Song
210208332deSRuiling Song /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
findNonPHIUsesInBlock(Register Reg,MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Uses) const211208332deSRuiling Song void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
212208332deSRuiling Song Register Reg, MachineBasicBlock *MBB,
213208332deSRuiling Song SmallVectorImpl<MachineInstr *> &Uses) const {
214208332deSRuiling Song for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
215208332deSRuiling Song if (UseMI.getParent() == MBB && !UseMI.isPHI())
216208332deSRuiling Song Uses.push_back(&UseMI);
217208332deSRuiling Song }
218208332deSRuiling Song }
219208332deSRuiling Song
220208332deSRuiling Song /// Collect the killed registers in the ELSE region which are not alive through
221208332deSRuiling Song /// the whole THEN region.
collectCandidateRegisters(MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks,SmallVectorImpl<Register> & CandidateRegs) const222208332deSRuiling Song void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
223208332deSRuiling Song MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
224208332deSRuiling Song SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
225208332deSRuiling Song SmallVectorImpl<Register> &CandidateRegs) const {
226208332deSRuiling Song
227208332deSRuiling Song SmallSet<Register, 8> KillsInElse;
228208332deSRuiling Song
229208332deSRuiling Song for (auto *Else : ElseBlocks) {
230208332deSRuiling Song for (auto &MI : Else->instrs()) {
231208332deSRuiling Song if (MI.isDebugInstr())
232208332deSRuiling Song continue;
233208332deSRuiling Song
234208332deSRuiling Song for (auto &MO : MI.operands()) {
235208332deSRuiling Song if (!MO.isReg() || !MO.getReg() || MO.isDef())
236208332deSRuiling Song continue;
237208332deSRuiling Song
238208332deSRuiling Song Register MOReg = MO.getReg();
239208332deSRuiling Song // We can only optimize AGPR/VGPR virtual register
240208332deSRuiling Song if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
241208332deSRuiling Song continue;
242208332deSRuiling Song
243b642d01fSSebastian Neubauer if (MO.readsReg()) {
244208332deSRuiling Song LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
245208332deSRuiling Song const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
246208332deSRuiling Song // Make sure two conditions are met:
247208332deSRuiling Song // a.) the value is defined before/in the IF block
248208332deSRuiling Song // b.) should be defined in the same loop-level.
249208332deSRuiling Song if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
250b642d01fSSebastian Neubauer Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
251b642d01fSSebastian Neubauer // Check if the register is live into the endif block. If not,
252b642d01fSSebastian Neubauer // consider it killed in the else region.
253b642d01fSSebastian Neubauer LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
254b642d01fSSebastian Neubauer if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
255208332deSRuiling Song KillsInElse.insert(MOReg);
256b642d01fSSebastian Neubauer } else {
257b642d01fSSebastian Neubauer LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
258b642d01fSSebastian Neubauer << " as Live in Endif\n");
259b642d01fSSebastian Neubauer }
260b642d01fSSebastian Neubauer }
261208332deSRuiling Song }
262208332deSRuiling Song }
263208332deSRuiling Song }
264208332deSRuiling Song }
265208332deSRuiling Song
266208332deSRuiling Song // Check the phis in the Endif, looking for value coming from the ELSE
267208332deSRuiling Song // region. Make sure the phi-use is the last use.
268208332deSRuiling Song for (auto &MI : Endif->phis()) {
269208332deSRuiling Song for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
270208332deSRuiling Song auto &MO = MI.getOperand(Idx);
271208332deSRuiling Song auto *Pred = MI.getOperand(Idx + 1).getMBB();
272208332deSRuiling Song if (Pred == Flow)
273208332deSRuiling Song continue;
274208332deSRuiling Song assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
275208332deSRuiling Song
276208332deSRuiling Song if (!MO.isReg() || !MO.getReg() || MO.isUndef())
277208332deSRuiling Song continue;
278208332deSRuiling Song
279208332deSRuiling Song Register Reg = MO.getReg();
280208332deSRuiling Song if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
281208332deSRuiling Song continue;
282208332deSRuiling Song
283208332deSRuiling Song LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
284208332deSRuiling Song
285208332deSRuiling Song if (VI.isLiveIn(*Endif, Reg, *MRI)) {
286208332deSRuiling Song LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
287208332deSRuiling Song << " as Live in Endif\n");
288208332deSRuiling Song continue;
289208332deSRuiling Song }
290208332deSRuiling Song // Make sure two conditions are met:
291208332deSRuiling Song // a.) the value is defined before/in the IF block
292208332deSRuiling Song // b.) should be defined in the same loop-level.
293208332deSRuiling Song const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
294208332deSRuiling Song if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
295208332deSRuiling Song Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
296208332deSRuiling Song KillsInElse.insert(Reg);
297208332deSRuiling Song }
298208332deSRuiling Song }
299208332deSRuiling Song
300208332deSRuiling Song auto IsLiveThroughThen = [&](Register Reg) {
301208332deSRuiling Song for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
302208332deSRuiling Song ++I) {
303208332deSRuiling Song if (!I->readsReg())
304208332deSRuiling Song continue;
305208332deSRuiling Song auto *UseMI = I->getParent();
306208332deSRuiling Song auto *UseMBB = UseMI->getParent();
307208332deSRuiling Song if (UseMBB == Flow || UseMBB == Endif) {
308208332deSRuiling Song if (!UseMI->isPHI())
309208332deSRuiling Song return true;
310208332deSRuiling Song
311208332deSRuiling Song auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
312208332deSRuiling Song // The register is live through the path If->Flow or Flow->Endif.
313208332deSRuiling Song // we should not optimize for such cases.
314208332deSRuiling Song if ((UseMBB == Flow && IncomingMBB != If) ||
315208332deSRuiling Song (UseMBB == Endif && IncomingMBB == Flow))
316208332deSRuiling Song return true;
317208332deSRuiling Song }
318208332deSRuiling Song }
319208332deSRuiling Song return false;
320208332deSRuiling Song };
321208332deSRuiling Song
322208332deSRuiling Song for (auto Reg : KillsInElse) {
323208332deSRuiling Song if (!IsLiveThroughThen(Reg))
324208332deSRuiling Song CandidateRegs.push_back(Reg);
325208332deSRuiling Song }
326208332deSRuiling Song }
327208332deSRuiling Song
328ad2c66ecSSebastian Neubauer /// Collect the registers used in the waterfall loop block that are defined
329ad2c66ecSSebastian Neubauer /// before.
collectWaterfallCandidateRegisters(MachineBasicBlock * LoopHeader,MachineBasicBlock * LoopEnd,SmallSetVector<Register,16> & CandidateRegs,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const330ad2c66ecSSebastian Neubauer void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
3311f52d02cSCarl Ritson MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
3321f52d02cSCarl Ritson SmallSetVector<Register, 16> &CandidateRegs,
3331f52d02cSCarl Ritson SmallSetVector<MachineBasicBlock *, 2> &Blocks,
3341f52d02cSCarl Ritson SmallVectorImpl<MachineInstr *> &Instructions) const {
335ad2c66ecSSebastian Neubauer
3361f52d02cSCarl Ritson // Collect loop instructions, potentially spanning multiple blocks
3371f52d02cSCarl Ritson auto *MBB = LoopHeader;
3381f52d02cSCarl Ritson for (;;) {
3391f52d02cSCarl Ritson Blocks.insert(MBB);
3401f52d02cSCarl Ritson for (auto &MI : *MBB) {
341ad2c66ecSSebastian Neubauer if (MI.isDebugInstr())
342ad2c66ecSSebastian Neubauer continue;
3431f52d02cSCarl Ritson Instructions.push_back(&MI);
3441f52d02cSCarl Ritson }
3451f52d02cSCarl Ritson if (MBB == LoopEnd)
3461f52d02cSCarl Ritson break;
3472bca7d85SCarl Ritson
3482bca7d85SCarl Ritson if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
3492bca7d85SCarl Ritson (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
3502bca7d85SCarl Ritson LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
3512bca7d85SCarl Ritson return;
3522bca7d85SCarl Ritson }
3532bca7d85SCarl Ritson
3541f52d02cSCarl Ritson MBB = *MBB->succ_begin();
3551f52d02cSCarl Ritson }
3561f52d02cSCarl Ritson
3571f52d02cSCarl Ritson for (auto *I : Instructions) {
3581f52d02cSCarl Ritson auto &MI = *I;
359ad2c66ecSSebastian Neubauer
360ad2c66ecSSebastian Neubauer for (auto &MO : MI.operands()) {
361ad2c66ecSSebastian Neubauer if (!MO.isReg() || !MO.getReg() || MO.isDef())
362ad2c66ecSSebastian Neubauer continue;
363ad2c66ecSSebastian Neubauer
364ad2c66ecSSebastian Neubauer Register MOReg = MO.getReg();
365ad2c66ecSSebastian Neubauer // We can only optimize AGPR/VGPR virtual register
366ad2c66ecSSebastian Neubauer if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
367ad2c66ecSSebastian Neubauer continue;
368ad2c66ecSSebastian Neubauer
369ad2c66ecSSebastian Neubauer if (MO.readsReg()) {
3701f52d02cSCarl Ritson MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
371ad2c66ecSSebastian Neubauer // Make sure the value is defined before the LOOP block
3721f52d02cSCarl Ritson if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
373ad2c66ecSSebastian Neubauer // If the variable is used after the loop, the register coalescer will
374ad2c66ecSSebastian Neubauer // merge the newly created register and remove the phi node again.
375ad2c66ecSSebastian Neubauer // Just do nothing in that case.
376ad2c66ecSSebastian Neubauer LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
377ad2c66ecSSebastian Neubauer bool IsUsed = false;
3781f52d02cSCarl Ritson for (auto *Succ : LoopEnd->successors()) {
3791f52d02cSCarl Ritson if (!Blocks.contains(Succ) &&
3801f52d02cSCarl Ritson OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
381ad2c66ecSSebastian Neubauer IsUsed = true;
382ad2c66ecSSebastian Neubauer break;
383ad2c66ecSSebastian Neubauer }
384ad2c66ecSSebastian Neubauer }
385ad2c66ecSSebastian Neubauer if (!IsUsed) {
386ad2c66ecSSebastian Neubauer LLVM_DEBUG(dbgs() << "Found candidate reg: "
387ad2c66ecSSebastian Neubauer << printReg(MOReg, TRI, 0, MRI) << '\n');
388ad2c66ecSSebastian Neubauer CandidateRegs.insert(MOReg);
389ad2c66ecSSebastian Neubauer } else {
390ad2c66ecSSebastian Neubauer LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
391ad2c66ecSSebastian Neubauer << printReg(MOReg, TRI, 0, MRI) << '\n');
392ad2c66ecSSebastian Neubauer }
393ad2c66ecSSebastian Neubauer }
394ad2c66ecSSebastian Neubauer }
395ad2c66ecSSebastian Neubauer }
396ad2c66ecSSebastian Neubauer }
397ad2c66ecSSebastian Neubauer }
398ad2c66ecSSebastian Neubauer
399208332deSRuiling Song // Re-calculate the liveness of \p Reg in the THEN-region
updateLiveRangeInThenRegion(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow) const400208332deSRuiling Song void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
401208332deSRuiling Song Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
402e09f98a6SMatt Arsenault SetVector<MachineBasicBlock *> Blocks;
403e09f98a6SMatt Arsenault SmallVector<MachineBasicBlock *> WorkList({If});
404208332deSRuiling Song
405e09f98a6SMatt Arsenault // Collect all successors until we see the flow block, where we should
406e09f98a6SMatt Arsenault // reconverge.
407e09f98a6SMatt Arsenault while (!WorkList.empty()) {
408e09f98a6SMatt Arsenault auto *MBB = WorkList.pop_back_val();
409e09f98a6SMatt Arsenault for (auto *Succ : MBB->successors()) {
410e09f98a6SMatt Arsenault if (Succ != Flow && !Blocks.contains(Succ)) {
411e09f98a6SMatt Arsenault WorkList.push_back(Succ);
412e09f98a6SMatt Arsenault Blocks.insert(Succ);
413208332deSRuiling Song }
414208332deSRuiling Song }
415e09f98a6SMatt Arsenault }
416208332deSRuiling Song
417208332deSRuiling Song LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
418e09f98a6SMatt Arsenault for (MachineBasicBlock *MBB : Blocks) {
419208332deSRuiling Song // Clear Live bit, as we will recalculate afterwards
420208332deSRuiling Song LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
421208332deSRuiling Song << '\n');
422208332deSRuiling Song OldVarInfo.AliveBlocks.reset(MBB->getNumber());
423208332deSRuiling Song }
424208332deSRuiling Song
425e09f98a6SMatt Arsenault SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
426e09f98a6SMatt Arsenault
427208332deSRuiling Song // Get the blocks the Reg should be alive through
428208332deSRuiling Song for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
429208332deSRuiling Song ++I) {
430208332deSRuiling Song auto *UseMI = I->getParent();
431208332deSRuiling Song if (UseMI->isPHI() && I->readsReg()) {
432e09f98a6SMatt Arsenault if (Blocks.contains(UseMI->getParent()))
433208332deSRuiling Song PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
434208332deSRuiling Song }
435208332deSRuiling Song }
436208332deSRuiling Song
437e09f98a6SMatt Arsenault for (MachineBasicBlock *MBB : Blocks) {
438208332deSRuiling Song SmallVector<MachineInstr *> Uses;
439208332deSRuiling Song // PHI instructions has been processed before.
440208332deSRuiling Song findNonPHIUsesInBlock(Reg, MBB, Uses);
441208332deSRuiling Song
442208332deSRuiling Song if (Uses.size() == 1) {
443208332deSRuiling Song LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
444208332deSRuiling Song << printMBBReference(*MBB) << '\n');
445208332deSRuiling Song LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
446208332deSRuiling Song } else if (Uses.size() > 1) {
447208332deSRuiling Song // Process the instructions in-order
448208332deSRuiling Song LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
449208332deSRuiling Song << printMBBReference(*MBB) << '\n');
450208332deSRuiling Song for (MachineInstr &MI : *MBB) {
451208332deSRuiling Song if (llvm::is_contained(Uses, &MI))
452208332deSRuiling Song LV->HandleVirtRegUse(Reg, MBB, MI);
453208332deSRuiling Song }
454208332deSRuiling Song }
455208332deSRuiling Song
456208332deSRuiling Song // Mark Reg alive through the block if this is a PHI incoming block
457208332deSRuiling Song if (PHIIncoming.contains(MBB))
458208332deSRuiling Song LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
459208332deSRuiling Song MBB);
460208332deSRuiling Song }
461208332deSRuiling Song
462208332deSRuiling Song // Set the isKilled flag if we get new Kills in the THEN region.
463208332deSRuiling Song for (auto *MI : OldVarInfo.Kills) {
464e09f98a6SMatt Arsenault if (Blocks.contains(MI->getParent()))
465208332deSRuiling Song MI->addRegisterKilled(Reg, TRI);
466208332deSRuiling Song }
467208332deSRuiling Song }
468208332deSRuiling Song
updateLiveRangeInElseRegion(Register Reg,Register NewReg,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const469208332deSRuiling Song void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
470208332deSRuiling Song Register Reg, Register NewReg, MachineBasicBlock *Flow,
471208332deSRuiling Song MachineBasicBlock *Endif,
472208332deSRuiling Song SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
473208332deSRuiling Song LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
474208332deSRuiling Song LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
475208332deSRuiling Song
476208332deSRuiling Song // Transfer aliveBlocks from Reg to NewReg
477208332deSRuiling Song for (auto *MBB : ElseBlocks) {
478208332deSRuiling Song unsigned BBNum = MBB->getNumber();
479208332deSRuiling Song if (OldVarInfo.AliveBlocks.test(BBNum)) {
480208332deSRuiling Song NewVarInfo.AliveBlocks.set(BBNum);
4819ced1e44SSebastian Neubauer LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
482208332deSRuiling Song << '\n');
483208332deSRuiling Song OldVarInfo.AliveBlocks.reset(BBNum);
484208332deSRuiling Song }
485208332deSRuiling Song }
486208332deSRuiling Song
487208332deSRuiling Song // Transfer the possible Kills in ElseBlocks from Reg to NewReg
488208332deSRuiling Song auto I = OldVarInfo.Kills.begin();
489208332deSRuiling Song while (I != OldVarInfo.Kills.end()) {
490208332deSRuiling Song if (ElseBlocks.contains((*I)->getParent())) {
491208332deSRuiling Song NewVarInfo.Kills.push_back(*I);
492208332deSRuiling Song I = OldVarInfo.Kills.erase(I);
493208332deSRuiling Song } else {
494208332deSRuiling Song ++I;
495208332deSRuiling Song }
496208332deSRuiling Song }
497208332deSRuiling Song }
498208332deSRuiling Song
optimizeLiveRange(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const499208332deSRuiling Song void SIOptimizeVGPRLiveRange::optimizeLiveRange(
500208332deSRuiling Song Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
501208332deSRuiling Song MachineBasicBlock *Endif,
502208332deSRuiling Song SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
503208332deSRuiling Song // Insert a new PHI, marking the value from the THEN region being
504208332deSRuiling Song // undef.
505208332deSRuiling Song LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
506208332deSRuiling Song const auto *RC = MRI->getRegClass(Reg);
507208332deSRuiling Song Register NewReg = MRI->createVirtualRegister(RC);
508208332deSRuiling Song Register UndefReg = MRI->createVirtualRegister(RC);
509208332deSRuiling Song MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
510208332deSRuiling Song TII->get(TargetOpcode::PHI), NewReg);
511208332deSRuiling Song for (auto *Pred : Flow->predecessors()) {
512208332deSRuiling Song if (Pred == If)
513208332deSRuiling Song PHI.addReg(Reg).addMBB(Pred);
514208332deSRuiling Song else
515208332deSRuiling Song PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
516208332deSRuiling Song }
517208332deSRuiling Song
518208332deSRuiling Song // Replace all uses in the ELSE region or the PHIs in ENDIF block
519ad2c66ecSSebastian Neubauer // Use early increment range because setReg() will update the linked list.
520ad2c66ecSSebastian Neubauer for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
521208332deSRuiling Song auto *UseMI = O.getParent();
522208332deSRuiling Song auto *UseBlock = UseMI->getParent();
523208332deSRuiling Song // Replace uses in Endif block
524208332deSRuiling Song if (UseBlock == Endif) {
525208332deSRuiling Song assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
526208332deSRuiling Song O.setReg(NewReg);
527208332deSRuiling Song continue;
528208332deSRuiling Song }
529208332deSRuiling Song
530208332deSRuiling Song // Replace uses in Else region
531208332deSRuiling Song if (ElseBlocks.contains(UseBlock))
532208332deSRuiling Song O.setReg(NewReg);
533208332deSRuiling Song }
534208332deSRuiling Song
535208332deSRuiling Song // The optimized Reg is not alive through Flow blocks anymore.
536208332deSRuiling Song LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
537208332deSRuiling Song OldVarInfo.AliveBlocks.reset(Flow->getNumber());
538208332deSRuiling Song
539208332deSRuiling Song updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
540208332deSRuiling Song updateLiveRangeInThenRegion(Reg, If, Flow);
541208332deSRuiling Song }
542208332deSRuiling Song
optimizeWaterfallLiveRange(Register Reg,MachineBasicBlock * LoopHeader,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const543ad2c66ecSSebastian Neubauer void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
5441f52d02cSCarl Ritson Register Reg, MachineBasicBlock *LoopHeader,
5451f52d02cSCarl Ritson SmallSetVector<MachineBasicBlock *, 2> &Blocks,
5461f52d02cSCarl Ritson SmallVectorImpl<MachineInstr *> &Instructions) const {
547ad2c66ecSSebastian Neubauer // Insert a new PHI, marking the value from the last loop iteration undef.
548ad2c66ecSSebastian Neubauer LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
549ad2c66ecSSebastian Neubauer const auto *RC = MRI->getRegClass(Reg);
550ad2c66ecSSebastian Neubauer Register NewReg = MRI->createVirtualRegister(RC);
551ad2c66ecSSebastian Neubauer Register UndefReg = MRI->createVirtualRegister(RC);
552ad2c66ecSSebastian Neubauer
553ad2c66ecSSebastian Neubauer // Replace all uses in the LOOP region
554ad2c66ecSSebastian Neubauer // Use early increment range because setReg() will update the linked list.
555ad2c66ecSSebastian Neubauer for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
556ad2c66ecSSebastian Neubauer auto *UseMI = O.getParent();
557ad2c66ecSSebastian Neubauer auto *UseBlock = UseMI->getParent();
5581f52d02cSCarl Ritson // Replace uses in Loop blocks
5591f52d02cSCarl Ritson if (Blocks.contains(UseBlock))
560ad2c66ecSSebastian Neubauer O.setReg(NewReg);
561ad2c66ecSSebastian Neubauer }
562ad2c66ecSSebastian Neubauer
5631f52d02cSCarl Ritson MachineInstrBuilder PHI =
5641f52d02cSCarl Ritson BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
565ad2c66ecSSebastian Neubauer TII->get(TargetOpcode::PHI), NewReg);
5661f52d02cSCarl Ritson for (auto *Pred : LoopHeader->predecessors()) {
5671f52d02cSCarl Ritson if (Blocks.contains(Pred))
568ad2c66ecSSebastian Neubauer PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
569ad2c66ecSSebastian Neubauer else
570ad2c66ecSSebastian Neubauer PHI.addReg(Reg).addMBB(Pred);
571ad2c66ecSSebastian Neubauer }
572ad2c66ecSSebastian Neubauer
573ad2c66ecSSebastian Neubauer LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
574ad2c66ecSSebastian Neubauer LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
575ad2c66ecSSebastian Neubauer
5761f52d02cSCarl Ritson // Find last use and mark as kill
5771f52d02cSCarl Ritson MachineInstr *Kill = nullptr;
5781f52d02cSCarl Ritson for (auto *MI : reverse(Instructions)) {
5791f52d02cSCarl Ritson if (MI->readsRegister(NewReg, TRI)) {
5801f52d02cSCarl Ritson MI->addRegisterKilled(NewReg, TRI);
5811f52d02cSCarl Ritson NewVarInfo.Kills.push_back(MI);
5821f52d02cSCarl Ritson Kill = MI;
583ad2c66ecSSebastian Neubauer break;
584ad2c66ecSSebastian Neubauer }
585ad2c66ecSSebastian Neubauer }
5861f52d02cSCarl Ritson assert(Kill && "Failed to find last usage of register in loop");
5871f52d02cSCarl Ritson
5881f52d02cSCarl Ritson MachineBasicBlock *KillBlock = Kill->getParent();
5891f52d02cSCarl Ritson bool PostKillBlock = false;
5901f52d02cSCarl Ritson for (auto *Block : Blocks) {
5911f52d02cSCarl Ritson auto BBNum = Block->getNumber();
5921f52d02cSCarl Ritson
5931f52d02cSCarl Ritson // collectWaterfallCandidateRegisters only collects registers that are dead
5941f52d02cSCarl Ritson // after the loop. So we know that the old reg is no longer live throughout
5951f52d02cSCarl Ritson // the waterfall loop.
5961f52d02cSCarl Ritson OldVarInfo.AliveBlocks.reset(BBNum);
5971f52d02cSCarl Ritson
5981f52d02cSCarl Ritson // The new register is live up to (and including) the block that kills it.
5991f52d02cSCarl Ritson PostKillBlock |= (Block == KillBlock);
6001f52d02cSCarl Ritson if (PostKillBlock) {
6011f52d02cSCarl Ritson NewVarInfo.AliveBlocks.reset(BBNum);
6021f52d02cSCarl Ritson } else if (Block != LoopHeader) {
6031f52d02cSCarl Ritson NewVarInfo.AliveBlocks.set(BBNum);
6041f52d02cSCarl Ritson }
6051f52d02cSCarl Ritson }
606ad2c66ecSSebastian Neubauer }
607ad2c66ecSSebastian Neubauer
608208332deSRuiling Song char SIOptimizeVGPRLiveRange::ID = 0;
609208332deSRuiling Song
610208332deSRuiling Song INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
611208332deSRuiling Song "SI Optimize VGPR LiveRange", false, false)
612208332deSRuiling Song INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
613208332deSRuiling Song INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
614208332deSRuiling Song INITIALIZE_PASS_DEPENDENCY(LiveVariables)
615208332deSRuiling Song INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
616208332deSRuiling Song "SI Optimize VGPR LiveRange", false, false)
617208332deSRuiling Song
618208332deSRuiling Song char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
619208332deSRuiling Song
createSIOptimizeVGPRLiveRangePass()620208332deSRuiling Song FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
621208332deSRuiling Song return new SIOptimizeVGPRLiveRange();
622208332deSRuiling Song }
623208332deSRuiling Song
runOnMachineFunction(MachineFunction & MF)624208332deSRuiling Song bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
625208332deSRuiling Song
626208332deSRuiling Song const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
627208332deSRuiling Song TII = ST.getInstrInfo();
628208332deSRuiling Song TRI = &TII->getRegisterInfo();
629208332deSRuiling Song MDT = &getAnalysis<MachineDominatorTree>();
630208332deSRuiling Song Loops = &getAnalysis<MachineLoopInfo>();
631208332deSRuiling Song LV = &getAnalysis<LiveVariables>();
632208332deSRuiling Song MRI = &MF.getRegInfo();
633208332deSRuiling Song
634208332deSRuiling Song if (skipFunction(MF.getFunction()))
635208332deSRuiling Song return false;
636208332deSRuiling Song
637208332deSRuiling Song bool MadeChange = false;
638208332deSRuiling Song
639208332deSRuiling Song // TODO: we need to think about the order of visiting the blocks to get
640208332deSRuiling Song // optimal result for nesting if-else cases.
641208332deSRuiling Song for (MachineBasicBlock &MBB : MF) {
642208332deSRuiling Song for (auto &MI : MBB.terminators()) {
643208332deSRuiling Song // Detect the if-else blocks
644208332deSRuiling Song if (MI.getOpcode() == AMDGPU::SI_IF) {
645208332deSRuiling Song MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
646208332deSRuiling Song auto *Endif = getElseTarget(IfTarget);
647208332deSRuiling Song if (!Endif)
648208332deSRuiling Song continue;
649208332deSRuiling Song
650*4dcb42faSRuiling Song // Skip unexpected control flow.
651*4dcb42faSRuiling Song if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
652*4dcb42faSRuiling Song continue;
653*4dcb42faSRuiling Song
654208332deSRuiling Song SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
655208332deSRuiling Song SmallVector<Register> CandidateRegs;
656208332deSRuiling Song
657208332deSRuiling Song LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
658208332deSRuiling Song << printMBBReference(MBB) << ' '
659208332deSRuiling Song << printMBBReference(*IfTarget) << ' '
660208332deSRuiling Song << printMBBReference(*Endif) << '\n');
661208332deSRuiling Song
662208332deSRuiling Song // Collect all the blocks in the ELSE region
663208332deSRuiling Song collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
664208332deSRuiling Song
665208332deSRuiling Song // Collect the registers can be optimized
666208332deSRuiling Song collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
667208332deSRuiling Song CandidateRegs);
668208332deSRuiling Song MadeChange |= !CandidateRegs.empty();
669208332deSRuiling Song // Now we are safe to optimize.
670208332deSRuiling Song for (auto Reg : CandidateRegs)
671208332deSRuiling Song optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
672ad2c66ecSSebastian Neubauer } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
6731f52d02cSCarl Ritson auto *LoopHeader = MI.getOperand(0).getMBB();
6741f52d02cSCarl Ritson auto *LoopEnd = &MBB;
6751f52d02cSCarl Ritson
676ad2c66ecSSebastian Neubauer LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
6771f52d02cSCarl Ritson << printMBBReference(*LoopHeader) << '\n');
678ad2c66ecSSebastian Neubauer
679ad2c66ecSSebastian Neubauer SmallSetVector<Register, 16> CandidateRegs;
6801f52d02cSCarl Ritson SmallVector<MachineInstr *, 16> Instructions;
6811f52d02cSCarl Ritson SmallSetVector<MachineBasicBlock *, 2> Blocks;
6821f52d02cSCarl Ritson
6831f52d02cSCarl Ritson collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
6841f52d02cSCarl Ritson Blocks, Instructions);
685ad2c66ecSSebastian Neubauer MadeChange |= !CandidateRegs.empty();
686ad2c66ecSSebastian Neubauer // Now we are safe to optimize.
687ad2c66ecSSebastian Neubauer for (auto Reg : CandidateRegs)
6881f52d02cSCarl Ritson optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
689208332deSRuiling Song }
690208332deSRuiling Song }
691208332deSRuiling Song }
692208332deSRuiling Song
693208332deSRuiling Song return MadeChange;
694208332deSRuiling Song }
695