1739174c4SStanislav Mekhanoshin //===-- SIFormMemoryClauses.cpp -------------------------------------------===//
2739174c4SStanislav Mekhanoshin //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6739174c4SStanislav Mekhanoshin //
7739174c4SStanislav Mekhanoshin //===----------------------------------------------------------------------===//
8739174c4SStanislav Mekhanoshin //
9172d746eSAustin Kerbow /// \file This pass extends the live ranges of registers used as pointers in
10172d746eSAustin Kerbow /// sequences of adjacent SMEM and VMEM instructions if XNACK is enabled. A
11172d746eSAustin Kerbow /// load that would overwrite a pointer would require breaking the soft clause.
12172d746eSAustin Kerbow /// Artificially extend the live ranges of the pointer operands by adding
13172d746eSAustin Kerbow /// implicit-def early-clobber operands throughout the soft clause.
14739174c4SStanislav Mekhanoshin ///
15739174c4SStanislav Mekhanoshin //===----------------------------------------------------------------------===//
16739174c4SStanislav Mekhanoshin 
17739174c4SStanislav Mekhanoshin #include "AMDGPU.h"
18739174c4SStanislav Mekhanoshin #include "GCNRegPressure.h"
19739174c4SStanislav Mekhanoshin #include "SIMachineFunctionInfo.h"
2005da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
21739174c4SStanislav Mekhanoshin 
22739174c4SStanislav Mekhanoshin using namespace llvm;
23739174c4SStanislav Mekhanoshin 
24739174c4SStanislav Mekhanoshin #define DEBUG_TYPE "si-form-memory-clauses"
25739174c4SStanislav Mekhanoshin 
26739174c4SStanislav Mekhanoshin // Clauses longer then 15 instructions would overflow one of the counters
27739174c4SStanislav Mekhanoshin // and stall. They can stall even earlier if there are outstanding counters.
28739174c4SStanislav Mekhanoshin static cl::opt<unsigned>
29739174c4SStanislav Mekhanoshin MaxClause("amdgpu-max-memory-clause", cl::Hidden, cl::init(15),
30739174c4SStanislav Mekhanoshin           cl::desc("Maximum length of a memory clause, instructions"));
31739174c4SStanislav Mekhanoshin 
32739174c4SStanislav Mekhanoshin namespace {
33739174c4SStanislav Mekhanoshin 
34739174c4SStanislav Mekhanoshin class SIFormMemoryClauses : public MachineFunctionPass {
35739174c4SStanislav Mekhanoshin   typedef DenseMap<unsigned, std::pair<unsigned, LaneBitmask>> RegUse;
36739174c4SStanislav Mekhanoshin 
37739174c4SStanislav Mekhanoshin public:
38739174c4SStanislav Mekhanoshin   static char ID;
39739174c4SStanislav Mekhanoshin 
40739174c4SStanislav Mekhanoshin public:
SIFormMemoryClauses()41739174c4SStanislav Mekhanoshin   SIFormMemoryClauses() : MachineFunctionPass(ID) {
42739174c4SStanislav Mekhanoshin     initializeSIFormMemoryClausesPass(*PassRegistry::getPassRegistry());
43739174c4SStanislav Mekhanoshin   }
44739174c4SStanislav Mekhanoshin 
45739174c4SStanislav Mekhanoshin   bool runOnMachineFunction(MachineFunction &MF) override;
46739174c4SStanislav Mekhanoshin 
getPassName() const47739174c4SStanislav Mekhanoshin   StringRef getPassName() const override {
48739174c4SStanislav Mekhanoshin     return "SI Form memory clauses";
49739174c4SStanislav Mekhanoshin   }
50739174c4SStanislav Mekhanoshin 
getAnalysisUsage(AnalysisUsage & AU) const51739174c4SStanislav Mekhanoshin   void getAnalysisUsage(AnalysisUsage &AU) const override {
52739174c4SStanislav Mekhanoshin     AU.addRequired<LiveIntervals>();
53739174c4SStanislav Mekhanoshin     AU.setPreservesAll();
54739174c4SStanislav Mekhanoshin     MachineFunctionPass::getAnalysisUsage(AU);
55739174c4SStanislav Mekhanoshin   }
56739174c4SStanislav Mekhanoshin 
getClearedProperties() const57551a69e4SMatt Arsenault   MachineFunctionProperties getClearedProperties() const override {
58551a69e4SMatt Arsenault     return MachineFunctionProperties().set(
59551a69e4SMatt Arsenault         MachineFunctionProperties::Property::IsSSA);
60551a69e4SMatt Arsenault   }
61551a69e4SMatt Arsenault 
62739174c4SStanislav Mekhanoshin private:
638f14a088SMatt Arsenault   bool canBundle(const MachineInstr &MI, const RegUse &Defs,
648f14a088SMatt Arsenault                  const RegUse &Uses) const;
65477e3fe4SMatt Arsenault   bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT);
66739174c4SStanislav Mekhanoshin   void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
67739174c4SStanislav Mekhanoshin   bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses,
68477e3fe4SMatt Arsenault                       GCNDownwardRPTracker &RPT);
69739174c4SStanislav Mekhanoshin 
705bfbae5cSTom Stellard   const GCNSubtarget *ST;
71739174c4SStanislav Mekhanoshin   const SIRegisterInfo *TRI;
72739174c4SStanislav Mekhanoshin   const MachineRegisterInfo *MRI;
73739174c4SStanislav Mekhanoshin   SIMachineFunctionInfo *MFI;
74739174c4SStanislav Mekhanoshin 
75739174c4SStanislav Mekhanoshin   unsigned LastRecordedOccupancy;
76739174c4SStanislav Mekhanoshin   unsigned MaxVGPRs;
77739174c4SStanislav Mekhanoshin   unsigned MaxSGPRs;
78739174c4SStanislav Mekhanoshin };
79739174c4SStanislav Mekhanoshin 
80739174c4SStanislav Mekhanoshin } // End anonymous namespace.
81739174c4SStanislav Mekhanoshin 
82739174c4SStanislav Mekhanoshin INITIALIZE_PASS_BEGIN(SIFormMemoryClauses, DEBUG_TYPE,
83739174c4SStanislav Mekhanoshin                       "SI Form memory clauses", false, false)
84739174c4SStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
85739174c4SStanislav Mekhanoshin INITIALIZE_PASS_END(SIFormMemoryClauses, DEBUG_TYPE,
86739174c4SStanislav Mekhanoshin                     "SI Form memory clauses", false, false)
87739174c4SStanislav Mekhanoshin 
88739174c4SStanislav Mekhanoshin 
89739174c4SStanislav Mekhanoshin char SIFormMemoryClauses::ID = 0;
90739174c4SStanislav Mekhanoshin 
91739174c4SStanislav Mekhanoshin char &llvm::SIFormMemoryClausesID = SIFormMemoryClauses::ID;
92739174c4SStanislav Mekhanoshin 
createSIFormMemoryClausesPass()93739174c4SStanislav Mekhanoshin FunctionPass *llvm::createSIFormMemoryClausesPass() {
94739174c4SStanislav Mekhanoshin   return new SIFormMemoryClauses();
95739174c4SStanislav Mekhanoshin }
96739174c4SStanislav Mekhanoshin 
isVMEMClauseInst(const MachineInstr & MI)97739174c4SStanislav Mekhanoshin static bool isVMEMClauseInst(const MachineInstr &MI) {
98739174c4SStanislav Mekhanoshin   return SIInstrInfo::isFLAT(MI) || SIInstrInfo::isVMEM(MI);
99739174c4SStanislav Mekhanoshin }
100739174c4SStanislav Mekhanoshin 
isSMEMClauseInst(const MachineInstr & MI)101739174c4SStanislav Mekhanoshin static bool isSMEMClauseInst(const MachineInstr &MI) {
102739174c4SStanislav Mekhanoshin   return SIInstrInfo::isSMRD(MI);
103739174c4SStanislav Mekhanoshin }
104739174c4SStanislav Mekhanoshin 
105739174c4SStanislav Mekhanoshin // There no sense to create store clauses, they do not define anything,
106739174c4SStanislav Mekhanoshin // thus there is nothing to set early-clobber.
isValidClauseInst(const MachineInstr & MI,bool IsVMEMClause)107739174c4SStanislav Mekhanoshin static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
10841877b82SMatt Arsenault   assert(!MI.isDebugInstr() && "debug instructions should not reach here");
10941877b82SMatt Arsenault   if (MI.isBundled())
110739174c4SStanislav Mekhanoshin     return false;
111739174c4SStanislav Mekhanoshin   if (!MI.mayLoad() || MI.mayStore())
112739174c4SStanislav Mekhanoshin     return false;
1135cf9292cSStanislav Mekhanoshin   if (SIInstrInfo::isAtomic(MI))
114739174c4SStanislav Mekhanoshin     return false;
115739174c4SStanislav Mekhanoshin   if (IsVMEMClause && !isVMEMClauseInst(MI))
116739174c4SStanislav Mekhanoshin     return false;
117739174c4SStanislav Mekhanoshin   if (!IsVMEMClause && !isSMEMClauseInst(MI))
118739174c4SStanislav Mekhanoshin     return false;
119f64f8efeSTim Renouf   // If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it.
120f64f8efeSTim Renouf   for (const MachineOperand &ResMO : MI.defs()) {
1210c476111SDaniel Sanders     Register ResReg = ResMO.getReg();
122f64f8efeSTim Renouf     for (const MachineOperand &MO : MI.uses()) {
123f64f8efeSTim Renouf       if (!MO.isReg() || MO.isDef())
124f64f8efeSTim Renouf         continue;
125f64f8efeSTim Renouf       if (MO.getReg() == ResReg)
126f64f8efeSTim Renouf         return false;
127f64f8efeSTim Renouf     }
128f64f8efeSTim Renouf     break; // Only check the first def.
129f64f8efeSTim Renouf   }
130739174c4SStanislav Mekhanoshin   return true;
131739174c4SStanislav Mekhanoshin }
132739174c4SStanislav Mekhanoshin 
getMopState(const MachineOperand & MO)133739174c4SStanislav Mekhanoshin static unsigned getMopState(const MachineOperand &MO) {
134739174c4SStanislav Mekhanoshin   unsigned S = 0;
135739174c4SStanislav Mekhanoshin   if (MO.isImplicit())
136739174c4SStanislav Mekhanoshin     S |= RegState::Implicit;
137739174c4SStanislav Mekhanoshin   if (MO.isDead())
138739174c4SStanislav Mekhanoshin     S |= RegState::Dead;
139739174c4SStanislav Mekhanoshin   if (MO.isUndef())
140739174c4SStanislav Mekhanoshin     S |= RegState::Undef;
141739174c4SStanislav Mekhanoshin   if (MO.isKill())
142739174c4SStanislav Mekhanoshin     S |= RegState::Kill;
143739174c4SStanislav Mekhanoshin   if (MO.isEarlyClobber())
144739174c4SStanislav Mekhanoshin     S |= RegState::EarlyClobber;
14534978602SJay Foad   if (MO.getReg().isPhysical() && MO.isRenamable())
146739174c4SStanislav Mekhanoshin     S |= RegState::Renamable;
147739174c4SStanislav Mekhanoshin   return S;
148739174c4SStanislav Mekhanoshin }
149739174c4SStanislav Mekhanoshin 
150739174c4SStanislav Mekhanoshin // Returns false if there is a use of a def already in the map.
151739174c4SStanislav Mekhanoshin // In this case we must break the clause.
canBundle(const MachineInstr & MI,const RegUse & Defs,const RegUse & Uses) const1528f14a088SMatt Arsenault bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, const RegUse &Defs,
1538f14a088SMatt Arsenault                                     const RegUse &Uses) const {
154739174c4SStanislav Mekhanoshin   // Check interference with defs.
155739174c4SStanislav Mekhanoshin   for (const MachineOperand &MO : MI.operands()) {
156739174c4SStanislav Mekhanoshin     // TODO: Prologue/Epilogue Insertion pass does not process bundled
157739174c4SStanislav Mekhanoshin     //       instructions.
158739174c4SStanislav Mekhanoshin     if (MO.isFI())
159739174c4SStanislav Mekhanoshin       return false;
160739174c4SStanislav Mekhanoshin 
161739174c4SStanislav Mekhanoshin     if (!MO.isReg())
162739174c4SStanislav Mekhanoshin       continue;
163739174c4SStanislav Mekhanoshin 
1640c476111SDaniel Sanders     Register Reg = MO.getReg();
165739174c4SStanislav Mekhanoshin 
166739174c4SStanislav Mekhanoshin     // If it is tied we will need to write same register as we read.
167739174c4SStanislav Mekhanoshin     if (MO.isTied())
168739174c4SStanislav Mekhanoshin       return false;
169739174c4SStanislav Mekhanoshin 
1708f14a088SMatt Arsenault     const RegUse &Map = MO.isDef() ? Uses : Defs;
171739174c4SStanislav Mekhanoshin     auto Conflict = Map.find(Reg);
172739174c4SStanislav Mekhanoshin     if (Conflict == Map.end())
173739174c4SStanislav Mekhanoshin       continue;
174739174c4SStanislav Mekhanoshin 
17534978602SJay Foad     if (Reg.isPhysical())
176739174c4SStanislav Mekhanoshin       return false;
177739174c4SStanislav Mekhanoshin 
178739174c4SStanislav Mekhanoshin     LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
179739174c4SStanislav Mekhanoshin     if ((Conflict->second.second & Mask).any())
180739174c4SStanislav Mekhanoshin       return false;
181739174c4SStanislav Mekhanoshin   }
182739174c4SStanislav Mekhanoshin 
183739174c4SStanislav Mekhanoshin   return true;
184739174c4SStanislav Mekhanoshin }
185739174c4SStanislav Mekhanoshin 
186739174c4SStanislav Mekhanoshin // Since all defs in the clause are early clobber we can run out of registers.
187739174c4SStanislav Mekhanoshin // Function returns false if pressure would hit the limit if instruction is
188739174c4SStanislav Mekhanoshin // bundled into a memory clause.
checkPressure(const MachineInstr & MI,GCNDownwardRPTracker & RPT)189739174c4SStanislav Mekhanoshin bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI,
190477e3fe4SMatt Arsenault                                         GCNDownwardRPTracker &RPT) {
191739174c4SStanislav Mekhanoshin   // NB: skip advanceBeforeNext() call. Since all defs will be marked
192739174c4SStanislav Mekhanoshin   // early-clobber they will all stay alive at least to the end of the
193477e3fe4SMatt Arsenault   // clause. Therefor we should not decrease pressure even if load
194477e3fe4SMatt Arsenault   // pointer becomes dead and could otherwise be reused for destination.
195739174c4SStanislav Mekhanoshin   RPT.advanceToNext();
196477e3fe4SMatt Arsenault   GCNRegPressure MaxPressure = RPT.moveMaxPressure();
197477e3fe4SMatt Arsenault   unsigned Occupancy = MaxPressure.getOccupancy(*ST);
198e3c6fa36SMatt Arsenault 
199e3c6fa36SMatt Arsenault   // Don't push over half the register budget. We don't want to introduce
200e3c6fa36SMatt Arsenault   // spilling just to form a soft clause.
201e3c6fa36SMatt Arsenault   //
202e3c6fa36SMatt Arsenault   // FIXME: This pressure check is fundamentally broken. First, this is checking
203e3c6fa36SMatt Arsenault   // the global pressure, not the pressure at this specific point in the
204e3c6fa36SMatt Arsenault   // program. Second, it's not accounting for the increased liveness of the use
205e3c6fa36SMatt Arsenault   // operands due to the early clobber we will introduce. Third, the pressure
206e3c6fa36SMatt Arsenault   // tracking does not account for the alignment requirements for SGPRs, or the
207e3c6fa36SMatt Arsenault   // fragmentation of registers the allocator will need to satisfy.
208477e3fe4SMatt Arsenault   if (Occupancy >= MFI->getMinAllowedOccupancy() &&
209a8d9d507SStanislav Mekhanoshin       MaxPressure.getVGPRNum(ST->hasGFX90AInsts()) <= MaxVGPRs / 2 &&
210e3c6fa36SMatt Arsenault       MaxPressure.getSGPRNum() <= MaxSGPRs / 2) {
211477e3fe4SMatt Arsenault     LastRecordedOccupancy = Occupancy;
2121e377a27SMatt Arsenault     return true;
213739174c4SStanislav Mekhanoshin   }
214477e3fe4SMatt Arsenault   return false;
215477e3fe4SMatt Arsenault }
216739174c4SStanislav Mekhanoshin 
217739174c4SStanislav Mekhanoshin // Collect register defs and uses along with their lane masks and states.
collectRegUses(const MachineInstr & MI,RegUse & Defs,RegUse & Uses) const218739174c4SStanislav Mekhanoshin void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
219739174c4SStanislav Mekhanoshin                                          RegUse &Defs, RegUse &Uses) const {
220739174c4SStanislav Mekhanoshin   for (const MachineOperand &MO : MI.operands()) {
221739174c4SStanislav Mekhanoshin     if (!MO.isReg())
222739174c4SStanislav Mekhanoshin       continue;
2230c476111SDaniel Sanders     Register Reg = MO.getReg();
224739174c4SStanislav Mekhanoshin     if (!Reg)
225739174c4SStanislav Mekhanoshin       continue;
226739174c4SStanislav Mekhanoshin 
22734978602SJay Foad     LaneBitmask Mask = Reg.isVirtual()
2282bea69bfSDaniel Sanders                            ? TRI->getSubRegIndexLaneMask(MO.getSubReg())
2292bea69bfSDaniel Sanders                            : LaneBitmask::getAll();
230739174c4SStanislav Mekhanoshin     RegUse &Map = MO.isDef() ? Defs : Uses;
231739174c4SStanislav Mekhanoshin 
232739174c4SStanislav Mekhanoshin     auto Loc = Map.find(Reg);
233739174c4SStanislav Mekhanoshin     unsigned State = getMopState(MO);
234739174c4SStanislav Mekhanoshin     if (Loc == Map.end()) {
235739174c4SStanislav Mekhanoshin       Map[Reg] = std::make_pair(State, Mask);
236739174c4SStanislav Mekhanoshin     } else {
237739174c4SStanislav Mekhanoshin       Loc->second.first |= State;
238739174c4SStanislav Mekhanoshin       Loc->second.second |= Mask;
239739174c4SStanislav Mekhanoshin     }
240739174c4SStanislav Mekhanoshin   }
241739174c4SStanislav Mekhanoshin }
242739174c4SStanislav Mekhanoshin 
243739174c4SStanislav Mekhanoshin // Check register def/use conflicts, occupancy limits and collect def/use maps.
244*6527b2a4SSebastian Neubauer // Return true if instruction can be bundled with previous. If it cannot
245739174c4SStanislav Mekhanoshin // def/use maps are not updated.
processRegUses(const MachineInstr & MI,RegUse & Defs,RegUse & Uses,GCNDownwardRPTracker & RPT)246477e3fe4SMatt Arsenault bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI,
247477e3fe4SMatt Arsenault                                          RegUse &Defs, RegUse &Uses,
248477e3fe4SMatt Arsenault                                          GCNDownwardRPTracker &RPT) {
249739174c4SStanislav Mekhanoshin   if (!canBundle(MI, Defs, Uses))
250739174c4SStanislav Mekhanoshin     return false;
251739174c4SStanislav Mekhanoshin 
252477e3fe4SMatt Arsenault   if (!checkPressure(MI, RPT))
253739174c4SStanislav Mekhanoshin     return false;
254739174c4SStanislav Mekhanoshin 
255739174c4SStanislav Mekhanoshin   collectRegUses(MI, Defs, Uses);
256739174c4SStanislav Mekhanoshin   return true;
257739174c4SStanislav Mekhanoshin }
258739174c4SStanislav Mekhanoshin 
runOnMachineFunction(MachineFunction & MF)259739174c4SStanislav Mekhanoshin bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
260739174c4SStanislav Mekhanoshin   if (skipFunction(MF.getFunction()))
261739174c4SStanislav Mekhanoshin     return false;
262739174c4SStanislav Mekhanoshin 
2635bfbae5cSTom Stellard   ST = &MF.getSubtarget<GCNSubtarget>();
264739174c4SStanislav Mekhanoshin   if (!ST->isXNACKEnabled())
265739174c4SStanislav Mekhanoshin     return false;
266739174c4SStanislav Mekhanoshin 
267739174c4SStanislav Mekhanoshin   const SIInstrInfo *TII = ST->getInstrInfo();
268739174c4SStanislav Mekhanoshin   TRI = ST->getRegisterInfo();
269739174c4SStanislav Mekhanoshin   MRI = &MF.getRegInfo();
270739174c4SStanislav Mekhanoshin   MFI = MF.getInfo<SIMachineFunctionInfo>();
271739174c4SStanislav Mekhanoshin   LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
272739174c4SStanislav Mekhanoshin   SlotIndexes *Ind = LIS->getSlotIndexes();
273739174c4SStanislav Mekhanoshin   bool Changed = false;
274739174c4SStanislav Mekhanoshin 
275739174c4SStanislav Mekhanoshin   MaxVGPRs = TRI->getAllocatableSet(MF, &AMDGPU::VGPR_32RegClass).count();
276739174c4SStanislav Mekhanoshin   MaxSGPRs = TRI->getAllocatableSet(MF, &AMDGPU::SGPR_32RegClass).count();
2777fecdf36STim Renouf   unsigned FuncMaxClause = AMDGPU::getIntegerAttribute(
2787fecdf36STim Renouf       MF.getFunction(), "amdgpu-max-memory-clause", MaxClause);
279739174c4SStanislav Mekhanoshin 
280739174c4SStanislav Mekhanoshin   for (MachineBasicBlock &MBB : MF) {
2815b648df1SChangpeng Fang     GCNDownwardRPTracker RPT(*LIS);
282739174c4SStanislav Mekhanoshin     MachineBasicBlock::instr_iterator Next;
283739174c4SStanislav Mekhanoshin     for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) {
284739174c4SStanislav Mekhanoshin       MachineInstr &MI = *I;
285739174c4SStanislav Mekhanoshin       Next = std::next(I);
286739174c4SStanislav Mekhanoshin 
28781b2c23bSMatt Arsenault       if (MI.isMetaInstruction())
28841877b82SMatt Arsenault         continue;
28941877b82SMatt Arsenault 
290739174c4SStanislav Mekhanoshin       bool IsVMEM = isVMEMClauseInst(MI);
291739174c4SStanislav Mekhanoshin 
292739174c4SStanislav Mekhanoshin       if (!isValidClauseInst(MI, IsVMEM))
293739174c4SStanislav Mekhanoshin         continue;
294739174c4SStanislav Mekhanoshin 
2955b648df1SChangpeng Fang       if (!RPT.getNext().isValid())
296739174c4SStanislav Mekhanoshin         RPT.reset(MI);
2975b648df1SChangpeng Fang       else { // Advance the state to the current MI.
2985b648df1SChangpeng Fang         RPT.advance(MachineBasicBlock::const_iterator(MI));
2995b648df1SChangpeng Fang         RPT.advanceBeforeNext();
3005b648df1SChangpeng Fang       }
301739174c4SStanislav Mekhanoshin 
3025b648df1SChangpeng Fang       const GCNRPTracker::LiveRegSet LiveRegsCopy(RPT.getLiveRegs());
3035b648df1SChangpeng Fang       RegUse Defs, Uses;
304477e3fe4SMatt Arsenault       if (!processRegUses(MI, Defs, Uses, RPT)) {
3055b648df1SChangpeng Fang         RPT.reset(MI, &LiveRegsCopy);
306739174c4SStanislav Mekhanoshin         continue;
3075b648df1SChangpeng Fang       }
308739174c4SStanislav Mekhanoshin 
30981b2c23bSMatt Arsenault       MachineBasicBlock::iterator LastClauseInst = Next;
310739174c4SStanislav Mekhanoshin       unsigned Length = 1;
3117fecdf36STim Renouf       for ( ; Next != E && Length < FuncMaxClause; ++Next) {
31281b2c23bSMatt Arsenault         // Debug instructions should not change the kill insertion.
31381b2c23bSMatt Arsenault         if (Next->isMetaInstruction())
31441877b82SMatt Arsenault           continue;
31541877b82SMatt Arsenault 
316739174c4SStanislav Mekhanoshin         if (!isValidClauseInst(*Next, IsVMEM))
317739174c4SStanislav Mekhanoshin           break;
318739174c4SStanislav Mekhanoshin 
319739174c4SStanislav Mekhanoshin         // A load from pointer which was loaded inside the same bundle is an
320739174c4SStanislav Mekhanoshin         // impossible clause because we will need to write and read the same
321739174c4SStanislav Mekhanoshin         // register inside. In this case processRegUses will return false.
322477e3fe4SMatt Arsenault         if (!processRegUses(*Next, Defs, Uses, RPT))
323739174c4SStanislav Mekhanoshin           break;
324739174c4SStanislav Mekhanoshin 
32581b2c23bSMatt Arsenault         LastClauseInst = Next;
326739174c4SStanislav Mekhanoshin         ++Length;
327739174c4SStanislav Mekhanoshin       }
3285b648df1SChangpeng Fang       if (Length < 2) {
3295b648df1SChangpeng Fang         RPT.reset(MI, &LiveRegsCopy);
330739174c4SStanislav Mekhanoshin         continue;
3315b648df1SChangpeng Fang       }
332739174c4SStanislav Mekhanoshin 
333739174c4SStanislav Mekhanoshin       Changed = true;
334739174c4SStanislav Mekhanoshin       MFI->limitOccupancy(LastRecordedOccupancy);
335739174c4SStanislav Mekhanoshin 
33681b2c23bSMatt Arsenault       assert(!LastClauseInst->isMetaInstruction());
337739174c4SStanislav Mekhanoshin 
33881b2c23bSMatt Arsenault       SlotIndex ClauseLiveInIdx = LIS->getInstructionIndex(MI);
33981b2c23bSMatt Arsenault       SlotIndex ClauseLiveOutIdx =
34081b2c23bSMatt Arsenault           LIS->getInstructionIndex(*LastClauseInst).getNextIndex();
3415b648df1SChangpeng Fang 
34281b2c23bSMatt Arsenault       // Track the last inserted kill.
34381b2c23bSMatt Arsenault       MachineInstrBuilder Kill;
34441877b82SMatt Arsenault 
34581b2c23bSMatt Arsenault       // Insert one kill per register, with operands covering all necessary
34681b2c23bSMatt Arsenault       // subregisters.
34781b2c23bSMatt Arsenault       for (auto &&R : Uses) {
34881b2c23bSMatt Arsenault         Register Reg = R.first;
34981b2c23bSMatt Arsenault         if (Reg.isPhysical())
35081b2c23bSMatt Arsenault           continue;
35181b2c23bSMatt Arsenault 
35281b2c23bSMatt Arsenault         // Collect the register operands we should extend the live ranges of.
35381b2c23bSMatt Arsenault         SmallVector<std::tuple<unsigned, unsigned>> KillOps;
35481b2c23bSMatt Arsenault         const LiveInterval &LI = LIS->getInterval(R.first);
35581b2c23bSMatt Arsenault 
35681b2c23bSMatt Arsenault         if (!LI.hasSubRanges()) {
35781b2c23bSMatt Arsenault           if (!LI.liveAt(ClauseLiveOutIdx)) {
35881b2c23bSMatt Arsenault             KillOps.emplace_back(R.second.first | RegState::Kill,
35981b2c23bSMatt Arsenault                                  AMDGPU::NoSubRegister);
36081b2c23bSMatt Arsenault           }
36181b2c23bSMatt Arsenault         } else {
36281b2c23bSMatt Arsenault           LaneBitmask KilledMask;
36381b2c23bSMatt Arsenault           for (const LiveInterval::SubRange &SR : LI.subranges()) {
36481b2c23bSMatt Arsenault             if (SR.liveAt(ClauseLiveInIdx) && !SR.liveAt(ClauseLiveOutIdx))
36581b2c23bSMatt Arsenault               KilledMask |= SR.LaneMask;
36681b2c23bSMatt Arsenault           }
36781b2c23bSMatt Arsenault 
36881b2c23bSMatt Arsenault           if (KilledMask.none())
36981b2c23bSMatt Arsenault             continue;
37081b2c23bSMatt Arsenault 
37181b2c23bSMatt Arsenault           SmallVector<unsigned> KilledIndexes;
37281b2c23bSMatt Arsenault           bool Success = TRI->getCoveringSubRegIndexes(
37381b2c23bSMatt Arsenault               *MRI, MRI->getRegClass(Reg), KilledMask, KilledIndexes);
37481b2c23bSMatt Arsenault           (void)Success;
37581b2c23bSMatt Arsenault           assert(Success && "Failed to find subregister mask to cover lanes");
37681b2c23bSMatt Arsenault           for (unsigned SubReg : KilledIndexes) {
37781b2c23bSMatt Arsenault             KillOps.emplace_back(R.second.first | RegState::Kill, SubReg);
37881b2c23bSMatt Arsenault           }
37981b2c23bSMatt Arsenault         }
38081b2c23bSMatt Arsenault 
38181b2c23bSMatt Arsenault         if (KillOps.empty())
38281b2c23bSMatt Arsenault           continue;
38381b2c23bSMatt Arsenault 
38481b2c23bSMatt Arsenault         // We only want to extend the live ranges of used registers. If they
38581b2c23bSMatt Arsenault         // already have existing uses beyond the bundle, we don't need the kill.
38681b2c23bSMatt Arsenault         //
38781b2c23bSMatt Arsenault         // It's possible all of the use registers were already live past the
38881b2c23bSMatt Arsenault         // bundle.
38981b2c23bSMatt Arsenault         Kill = BuildMI(*MI.getParent(), std::next(LastClauseInst),
39081b2c23bSMatt Arsenault                        DebugLoc(), TII->get(AMDGPU::KILL));
39181b2c23bSMatt Arsenault         for (auto &Op : KillOps)
39281b2c23bSMatt Arsenault           Kill.addUse(Reg, std::get<0>(Op), std::get<1>(Op));
39381b2c23bSMatt Arsenault         Ind->insertMachineInstrInMaps(*Kill);
39481b2c23bSMatt Arsenault       }
39581b2c23bSMatt Arsenault 
39681b2c23bSMatt Arsenault       if (!Kill) {
39781b2c23bSMatt Arsenault         RPT.reset(MI, &LiveRegsCopy);
39841877b82SMatt Arsenault         continue;
39941877b82SMatt Arsenault       }
40041877b82SMatt Arsenault 
40181b2c23bSMatt Arsenault       // Restore the state after processing the end of the bundle.
40281b2c23bSMatt Arsenault       RPT.reset(*Kill, &LiveRegsCopy);
403739174c4SStanislav Mekhanoshin 
404739174c4SStanislav Mekhanoshin       for (auto &&R : Defs) {
40534978602SJay Foad         Register Reg = R.first;
406739174c4SStanislav Mekhanoshin         Uses.erase(Reg);
40734978602SJay Foad         if (Reg.isPhysical())
408739174c4SStanislav Mekhanoshin           continue;
409739174c4SStanislav Mekhanoshin         LIS->removeInterval(Reg);
410739174c4SStanislav Mekhanoshin         LIS->createAndComputeVirtRegInterval(Reg);
411739174c4SStanislav Mekhanoshin       }
412739174c4SStanislav Mekhanoshin 
413739174c4SStanislav Mekhanoshin       for (auto &&R : Uses) {
41434978602SJay Foad         Register Reg = R.first;
41534978602SJay Foad         if (Reg.isPhysical())
416739174c4SStanislav Mekhanoshin           continue;
417739174c4SStanislav Mekhanoshin         LIS->removeInterval(Reg);
418739174c4SStanislav Mekhanoshin         LIS->createAndComputeVirtRegInterval(Reg);
419739174c4SStanislav Mekhanoshin       }
420739174c4SStanislav Mekhanoshin     }
421739174c4SStanislav Mekhanoshin   }
422739174c4SStanislav Mekhanoshin 
423739174c4SStanislav Mekhanoshin   return Changed;
424739174c4SStanislav Mekhanoshin }
425